RISCVISelLowering.cpp source code [llvm_projects/llvm/lib/Target/RISCV/RISCVISelLowering.cpp]

1	//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "RISCVISelLowering.h"
15	#include "MCTargetDesc/RISCVMatInt.h"
16	#include "RISCV.h"
17	#include "RISCVConstantPoolValue.h"
18	#include "RISCVMachineFunctionInfo.h"
19	#include "RISCVRegisterInfo.h"
20	#include "RISCVSelectionDAGInfo.h"
21	#include "RISCVSubtarget.h"
22	#include "llvm/ADT/SmallSet.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/ADT/Statistic.h"
25	#include "llvm/Analysis/MemoryLocation.h"
26	#include "llvm/Analysis/ValueTracking.h"
27	#include "llvm/Analysis/VectorUtils.h"
28	#include "llvm/CodeGen/MachineFrameInfo.h"
29	#include "llvm/CodeGen/MachineFunction.h"
30	#include "llvm/CodeGen/MachineInstrBuilder.h"
31	#include "llvm/CodeGen/MachineJumpTableInfo.h"
32	#include "llvm/CodeGen/MachineRegisterInfo.h"
33	#include "llvm/CodeGen/SDPatternMatch.h"
34	#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
35	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
36	#include "llvm/CodeGen/ValueTypes.h"
37	#include "llvm/IR/DiagnosticInfo.h"
38	#include "llvm/IR/DiagnosticPrinter.h"
39	#include "llvm/IR/IRBuilder.h"
40	#include "llvm/IR/Instructions.h"
41	#include "llvm/IR/IntrinsicsRISCV.h"
42	#include "llvm/IR/PatternMatch.h"
43	#include "llvm/MC/MCCodeEmitter.h"
44	#include "llvm/MC/MCInstBuilder.h"
45	#include "llvm/Support/CommandLine.h"
46	#include "llvm/Support/Debug.h"
47	#include "llvm/Support/ErrorHandling.h"
48	#include "llvm/Support/InstructionCost.h"
49	#include "llvm/Support/KnownBits.h"
50	#include "llvm/Support/MathExtras.h"
51	#include "llvm/Support/raw_ostream.h"
52	#include <optional>
53
54	using namespace llvm;
55
56	#define DEBUG_TYPE "riscv-lower"
57
58	STATISTIC(NumTailCalls, "Number of tail calls");
59
60	static cl::opt<unsigned> ExtensionMaxWebSize(
61	DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62	cl::desc ("Give the maximum size (in number of nodes) of the web of "
63	"instructions that we will consider for VW expansion"),
64	cl::init(Val: `18`));
65
66	static cl::opt<bool>
67	AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68	cl::desc ("Allow the formation of VW_W operations (e.g., "
69	"VWADD_W) with splat constants"),
70	cl::init(Val: false));
71
72	static cl::opt<unsigned> NumRepeatedDivisors(
73	DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74	cl::desc ("Set the minimum number of repetitions of a divisor to allow "
75	"transformation to multiplications by the reciprocal"),
76	cl::init(Val: `2`));
77
78	static cl::opt<int>
79	FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
80	cl::desc ("Give the maximum number of instructions that we will "
81	"use for creating a floating-point immediate value"),
82	cl::init(Val: `2`));
83
84	static cl::opt<bool>
85	ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86	cl::desc ("Swap add and addi in cases where the add may "
87	"be combined with a shift"),
88	cl::init(Val: true));
89
90	RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
91	const RISCVSubtarget &STI)
92	: TargetLowering (TM), Subtarget(STI) {
93
94	RISCVABI::ABI ABI = Subtarget.getTargetABI();
95	assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97	if ((ABI == RISCVABI::ABI_ILP32F \|\| ABI == RISCVABI::ABI_LP64F) &&
98	!Subtarget.hasStdExtF()) {
99	errs() << "Hard-float 'f' ABI can't be used for a target that "
100	"doesn't support the F instruction set extension (ignoring "
101	"target-abi)\n";
102	ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103	} else if ((ABI == RISCVABI::ABI_ILP32D \|\| ABI == RISCVABI::ABI_LP64D) &&
104	!Subtarget.hasStdExtD()) {
105	errs() << "Hard-float 'd' ABI can't be used for a target that "
106	"doesn't support the D instruction set extension (ignoring "
107	"target-abi)\n";
108	ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109	}
110
111	switch (ABI) {
112	default:
113	report_fatal_error(reason: "Don't know how to lower this ABI");
114	case RISCVABI::ABI_ILP32:
115	case RISCVABI::ABI_ILP32E:
116	case RISCVABI::ABI_LP64E:
117	case RISCVABI::ABI_ILP32F:
118	case RISCVABI::ABI_ILP32D:
119	case RISCVABI::ABI_LP64:
120	case RISCVABI::ABI_LP64F:
121	case RISCVABI::ABI_LP64D:
122	break;
123	}
124
125	MVT XLenVT = Subtarget.getXLenVT();
126
127	// Set up the register classes.
128	addRegisterClass(VT: XLenVT, RC: &RISCV::GPRRegClass);
129
130	if (Subtarget.hasStdExtZfhmin())
131	addRegisterClass(VT: MVT::f16, RC: &RISCV::FPR16RegClass);
132	if (Subtarget.hasStdExtZfbfmin())
133	addRegisterClass(VT: MVT::bf16, RC: &RISCV::FPR16RegClass);
134	if (Subtarget.hasStdExtF())
135	addRegisterClass(VT: MVT::f32, RC: &RISCV::FPR32RegClass);
136	if (Subtarget.hasStdExtD())
137	addRegisterClass(VT: MVT::f64, RC: &RISCV::FPR64RegClass);
138	if (Subtarget.hasStdExtZhinxmin())
139	addRegisterClass(VT: MVT::f16, RC: &RISCV::GPRF16RegClass);
140	if (Subtarget.hasStdExtZfinx())
141	addRegisterClass(VT: MVT::f32, RC: &RISCV::GPRF32RegClass);
142	if (Subtarget.hasStdExtZdinx()) {
143	if (Subtarget.is64Bit())
144	addRegisterClass(VT: MVT::f64, RC: &RISCV::GPRRegClass);
145	else
146	addRegisterClass(VT: MVT::f64, RC: &RISCV::GPRPairRegClass);
147	}
148
149	static const MVT::SimpleValueType BoolVecVTs[] = {
150	MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151	MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152	static const MVT::SimpleValueType IntVecVTs[] = {
153	MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154	MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155	MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156	MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157	MVT::nxv4i64, MVT::nxv8i64};
158	static const MVT::SimpleValueType F16VecVTs[] = {
159	MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160	MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161	static const MVT::SimpleValueType BF16VecVTs[] = {
162	MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163	MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164	static const MVT::SimpleValueType F32VecVTs[] = {
165	MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166	static const MVT::SimpleValueType F64VecVTs[] = {
167	MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168	static const MVT::SimpleValueType VecTupleVTs[] = {
169	MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170	MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171	MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172	MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173	MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174	MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175	MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176	MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177	MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178	MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179	MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181	if (Subtarget.hasVInstructions()) {
182	auto addRegClassForRVV = [this](MVT VT) {
183	// Disable the smallest fractional LMUL types if ELEN is less than
184	// RVVBitsPerBlock.
185	unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186	if (VT.getVectorMinNumElements() < MinElts)
187	return;
188
189	unsigned Size = VT.getSizeInBits().getKnownMinValue();
190	const TargetRegisterClass *RC;
191	if (Size <= RISCV::RVVBitsPerBlock)
192	RC = &RISCV::VRRegClass;
193	else if (Size == `2` * RISCV::RVVBitsPerBlock)
194	RC = &RISCV::VRM2RegClass;
195	else if (Size == `4` * RISCV::RVVBitsPerBlock)
196	RC = &RISCV::VRM4RegClass;
197	else if (Size == `8` * RISCV::RVVBitsPerBlock)
198	RC = &RISCV::VRM8RegClass;
199	else
200	llvm_unreachable("Unexpected size");
201
202	addRegisterClass(VT, RC);
203	};
204
205	for (MVT VT : BoolVecVTs)
206	addRegClassForRVV (VT);
207	for (MVT VT : IntVecVTs) {
208	if (VT.getVectorElementType() == MVT::i64 &&
209	!Subtarget.hasVInstructionsI64())
210	continue;
211	addRegClassForRVV (VT);
212	}
213
214	if (Subtarget.hasVInstructionsF16Minimal() \|\|
215	Subtarget.hasVendorXAndesVPackFPH())
216	for (MVT VT : F16VecVTs)
217	addRegClassForRVV (VT);
218
219	if (Subtarget.hasVInstructionsBF16Minimal() \|\|
220	Subtarget.hasVendorXAndesVBFHCvt())
221	for (MVT VT : BF16VecVTs)
222	addRegClassForRVV (VT);
223
224	if (Subtarget.hasVInstructionsF32())
225	for (MVT VT : F32VecVTs)
226	addRegClassForRVV (VT);
227
228	if (Subtarget.hasVInstructionsF64())
229	for (MVT VT : F64VecVTs)
230	addRegClassForRVV (VT);
231
232	if (Subtarget.useRVVForFixedLengthVectors()) {
233	auto addRegClassForFixedVectors = [this](MVT VT) {
234	MVT ContainerVT = getContainerForFixedLengthVector(VT);
235	unsigned RCID = getRegClassIDForVecVT(VT: ContainerVT);
236	const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237	addRegisterClass(VT, RC: TRI.getRegClass(i: RCID));
238	};
239	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
240	if (useRVVForFixedLengthVectorVT(VT))
241	addRegClassForFixedVectors (VT);
242
243	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
244	if (useRVVForFixedLengthVectorVT(VT))
245	addRegClassForFixedVectors (VT);
246	}
247
248	addRegisterClass(VT: MVT::riscv_nxv1i8x2, RC: &RISCV::VRN2M1RegClass);
249	addRegisterClass(VT: MVT::riscv_nxv1i8x3, RC: &RISCV::VRN3M1RegClass);
250	addRegisterClass(VT: MVT::riscv_nxv1i8x4, RC: &RISCV::VRN4M1RegClass);
251	addRegisterClass(VT: MVT::riscv_nxv1i8x5, RC: &RISCV::VRN5M1RegClass);
252	addRegisterClass(VT: MVT::riscv_nxv1i8x6, RC: &RISCV::VRN6M1RegClass);
253	addRegisterClass(VT: MVT::riscv_nxv1i8x7, RC: &RISCV::VRN7M1RegClass);
254	addRegisterClass(VT: MVT::riscv_nxv1i8x8, RC: &RISCV::VRN8M1RegClass);
255	addRegisterClass(VT: MVT::riscv_nxv2i8x2, RC: &RISCV::VRN2M1RegClass);
256	addRegisterClass(VT: MVT::riscv_nxv2i8x3, RC: &RISCV::VRN3M1RegClass);
257	addRegisterClass(VT: MVT::riscv_nxv2i8x4, RC: &RISCV::VRN4M1RegClass);
258	addRegisterClass(VT: MVT::riscv_nxv2i8x5, RC: &RISCV::VRN5M1RegClass);
259	addRegisterClass(VT: MVT::riscv_nxv2i8x6, RC: &RISCV::VRN6M1RegClass);
260	addRegisterClass(VT: MVT::riscv_nxv2i8x7, RC: &RISCV::VRN7M1RegClass);
261	addRegisterClass(VT: MVT::riscv_nxv2i8x8, RC: &RISCV::VRN8M1RegClass);
262	addRegisterClass(VT: MVT::riscv_nxv4i8x2, RC: &RISCV::VRN2M1RegClass);
263	addRegisterClass(VT: MVT::riscv_nxv4i8x3, RC: &RISCV::VRN3M1RegClass);
264	addRegisterClass(VT: MVT::riscv_nxv4i8x4, RC: &RISCV::VRN4M1RegClass);
265	addRegisterClass(VT: MVT::riscv_nxv4i8x5, RC: &RISCV::VRN5M1RegClass);
266	addRegisterClass(VT: MVT::riscv_nxv4i8x6, RC: &RISCV::VRN6M1RegClass);
267	addRegisterClass(VT: MVT::riscv_nxv4i8x7, RC: &RISCV::VRN7M1RegClass);
268	addRegisterClass(VT: MVT::riscv_nxv4i8x8, RC: &RISCV::VRN8M1RegClass);
269	addRegisterClass(VT: MVT::riscv_nxv8i8x2, RC: &RISCV::VRN2M1RegClass);
270	addRegisterClass(VT: MVT::riscv_nxv8i8x3, RC: &RISCV::VRN3M1RegClass);
271	addRegisterClass(VT: MVT::riscv_nxv8i8x4, RC: &RISCV::VRN4M1RegClass);
272	addRegisterClass(VT: MVT::riscv_nxv8i8x5, RC: &RISCV::VRN5M1RegClass);
273	addRegisterClass(VT: MVT::riscv_nxv8i8x6, RC: &RISCV::VRN6M1RegClass);
274	addRegisterClass(VT: MVT::riscv_nxv8i8x7, RC: &RISCV::VRN7M1RegClass);
275	addRegisterClass(VT: MVT::riscv_nxv8i8x8, RC: &RISCV::VRN8M1RegClass);
276	addRegisterClass(VT: MVT::riscv_nxv16i8x2, RC: &RISCV::VRN2M2RegClass);
277	addRegisterClass(VT: MVT::riscv_nxv16i8x3, RC: &RISCV::VRN3M2RegClass);
278	addRegisterClass(VT: MVT::riscv_nxv16i8x4, RC: &RISCV::VRN4M2RegClass);
279	addRegisterClass(VT: MVT::riscv_nxv32i8x2, RC: &RISCV::VRN2M4RegClass);
280	}
281
282	// Compute derived properties from the register classes.
283	computeRegisterProperties(TRI: STI.getRegisterInfo());
284
285	setStackPointerRegisterToSaveRestore(RISCV::X2);
286
287	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: XLenVT,
288	MemVT: MVT::i1, Action: Promote);
289	// DAGCombiner can call isLoadExtLegal for types that aren't legal.
290	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i32,
291	MemVT: MVT::i1, Action: Promote);
292
293	// TODO: add all necessary setOperationAction calls.
294	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: XLenVT, Action: Custom);
295
296	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
297	setOperationAction(Op: ISD::BR_CC, VT: XLenVT, Action: Expand);
298	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom);
299	setOperationAction(Op: ISD::SELECT_CC, VT: XLenVT, Action: Expand);
300
301	setCondCodeAction(CCs: ISD::SETGT, VT: XLenVT, Action: Custom);
302	setCondCodeAction(CCs: ISD::SETGE, VT: XLenVT, Action: Expand);
303	setCondCodeAction(CCs: ISD::SETUGT, VT: XLenVT, Action: Custom);
304	setCondCodeAction(CCs: ISD::SETUGE, VT: XLenVT, Action: Expand);
305	if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
306	setCondCodeAction(CCs: ISD::SETULE, VT: XLenVT, Action: Expand);
307	setCondCodeAction(CCs: ISD::SETLE, VT: XLenVT, Action: Expand);
308	}
309
310	setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
311
312	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
313	setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
314
315	if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316	!Subtarget.hasVendorXAndesPerf())
317	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
318
319	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
320
321	if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
322	!Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
323	!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
324	setOperationAction(Ops: ISD::SIGN_EXTEND_INREG, VTs: {MVT::i8, MVT::i16}, Action: Expand);
325
326	if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
327	setOperationAction(Op: ISD::LOAD, VT: MVT::i64, Action: Custom);
328	setOperationAction(Op: ISD::STORE, VT: MVT::i64, Action: Custom);
329	}
330
331	if (Subtarget.is64Bit()) {
332	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom);
333
334	setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom);
335	setOperationAction(Ops: {ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
336	VT: MVT::i32, Action: Custom);
337	setOperationAction(Ops: {ISD::UADDO, ISD::USUBO}, VT: MVT::i32, Action: Custom);
338	if (!Subtarget.hasStdExtZbb())
339	setOperationAction(
340	Ops: {ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT}, VT: MVT::i32,
341	Action: Custom);
342	setOperationAction(Op: ISD::SADDO, VT: MVT::i32, Action: Custom);
343	}
344	if (!Subtarget.hasStdExtZmmul()) {
345	setOperationAction(Ops: {ISD::MUL, ISD::MULHS, ISD::MULHU}, VT: XLenVT, Action: Expand);
346	} else if (Subtarget.is64Bit()) {
347	setOperationAction(Op: ISD::MUL, VT: MVT::i128, Action: Custom);
348	setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Custom);
349	} else {
350	setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom);
351	}
352
353	if (!Subtarget.hasStdExtM()) {
354	setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: XLenVT,
355	Action: Expand);
356	} else if (Subtarget.is64Bit()) {
357	setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::UREM},
358	VTs: {MVT::i8, MVT::i16, MVT::i32}, Action: Custom);
359	}
360
361	setOperationAction(
362	Ops: {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: XLenVT,
363	Action: Expand);
364
365	setOperationAction(Ops: {ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, VT: XLenVT,
366	Action: Custom);
367
368	if (Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()) {
369	if (Subtarget.is64Bit())
370	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: MVT::i32, Action: Custom);
371	} else if (Subtarget.hasVendorXTHeadBb()) {
372	if (Subtarget.is64Bit())
373	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: MVT::i32, Action: Custom);
374	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Custom);
375	} else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
376	setOperationAction(Op: ISD::ROTL, VT: XLenVT, Action: Expand);
377	} else {
378	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT: XLenVT, Action: Expand);
379	}
380
381	// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
382	// pattern match it directly in isel.
383	setOperationAction(Op: ISD::BSWAP, VT: XLenVT,
384	Action: (Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb() \|\|
385	Subtarget.hasVendorXTHeadBb())
386	? Legal
387	: Expand);
388
389	if ((Subtarget.hasVendorXCVbitmanip() \|\| Subtarget.hasVendorXqcibm()) &&
390	!Subtarget.is64Bit()) {
391	setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT, Action: Legal);
392	} else {
393	// Zbkb can use rev8+brev8 to implement bitreverse.
394	setOperationAction(Op: ISD::BITREVERSE, VT: XLenVT,
395	Action: Subtarget.hasStdExtZbkb() ? Custom : Expand);
396	if (Subtarget.hasStdExtZbkb())
397	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
398	}
399
400	if (Subtarget.hasStdExtZbb() \|\|
401	(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
402	setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT: XLenVT,
403	Action: Legal);
404	}
405
406	if (Subtarget.hasStdExtZbb() \|\|
407	(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
408	if (Subtarget.is64Bit())
409	setOperationAction(Ops: {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, VT: MVT::i32, Action: Custom);
410	} else {
411	setOperationAction(Op: ISD::CTTZ, VT: XLenVT, Action: Expand);
412	// TODO: These should be set to LibCall, but this currently breaks
413	// the Linux kernel build. See #101786. Lacks i128 tests, too.
414	if (Subtarget.is64Bit())
415	setOperationAction(Op: ISD::CTPOP, VT: MVT::i128, Action: Expand);
416	else
417	setOperationAction(Op: ISD::CTPOP, VT: MVT::i32, Action: Expand);
418	setOperationAction(Op: ISD::CTPOP, VT: MVT::i64, Action: Expand);
419	}
420
421	if (Subtarget.hasStdExtZbb() \|\| Subtarget.hasVendorXTHeadBb() \|\|
422	(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
423	// We need the custom lowering to make sure that the resulting sequence
424	// for the 32bit case is efficient on 64bit targets.
425	if (Subtarget.is64Bit())
426	setOperationAction(Ops: {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, VT: MVT::i32, Action: Custom);
427	} else {
428	setOperationAction(Op: ISD::CTLZ, VT: XLenVT, Action: Expand);
429	}
430
431	if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
432	setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal);
433	} else if (Subtarget.hasShortForwardBranchOpt()) {
434	// We can use PseudoCCSUB to implement ABS.
435	setOperationAction(Op: ISD::ABS, VT: XLenVT, Action: Legal);
436	} else if (Subtarget.is64Bit()) {
437	setOperationAction(Op: ISD::ABS, VT: MVT::i32, Action: Custom);
438	}
439
440	if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
441	!Subtarget.hasVendorXqcicm())
442	setOperationAction(Op: ISD::SELECT, VT: XLenVT, Action: Custom);
443
444	if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
445	setOperationAction(Op: ISD::UADDSAT, VT: MVT::i32, Action: Legal);
446	setOperationAction(Op: ISD::SADDSAT, VT: MVT::i32, Action: Legal);
447	setOperationAction(Op: ISD::USUBSAT, VT: MVT::i32, Action: Legal);
448	setOperationAction(Op: ISD::SSUBSAT, VT: MVT::i32, Action: Legal);
449	setOperationAction(Op: ISD::SSHLSAT, VT: MVT::i32, Action: Legal);
450	setOperationAction(Op: ISD::USHLSAT, VT: MVT::i32, Action: Legal);
451	}
452
453	static const unsigned FPLegalNodeTypes[] = {
454	ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
455	ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
456	ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
457	ISD::STRICT_LLRINT, ISD::STRICT_LROUND, ISD::STRICT_LLROUND,
458	ISD::STRICT_FMA, ISD::STRICT_FADD, ISD::STRICT_FSUB,
459	ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
460	ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::FCANONICALIZE};
461
462	static const ISD::CondCode FPCCToExpand[] = {
463	ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
464	ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
465	ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
466
467	static const unsigned FPOpToExpand[] = {
468	ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
469	ISD::FREM};
470
471	static const unsigned FPRndMode[] = {
472	ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
473	ISD::FROUNDEVEN};
474
475	static const unsigned ZfhminZfbfminPromoteOps[] = {
476	ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
477	ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
478	ISD::FMUL, ISD::FMA, ISD::FDIV,
479	ISD::FSQRT, ISD::STRICT_FMA, ISD::STRICT_FADD,
480	ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
481	ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
482	ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
483	ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
484	ISD::FROUNDEVEN, ISD::FCANONICALIZE};
485
486	if (Subtarget.hasStdExtZfbfmin()) {
487	setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
488	setOperationAction(Op: ISD::ConstantFP, VT: MVT::bf16, Action: Expand);
489	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::bf16, Action: Expand);
490	setOperationAction(Op: ISD::SELECT, VT: MVT::bf16, Action: Custom);
491	setOperationAction(Op: ISD::BR_CC, VT: MVT::bf16, Action: Expand);
492	setOperationAction(Ops: ZfhminZfbfminPromoteOps, VT: MVT::bf16, Action: Promote);
493	setOperationAction(Op: ISD::FREM, VT: MVT::bf16, Action: Promote);
494	setOperationAction(Op: ISD::FABS, VT: MVT::bf16, Action: Custom);
495	setOperationAction(Op: ISD::FNEG, VT: MVT::bf16, Action: Custom);
496	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::bf16, Action: Custom);
497	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT: XLenVT, Action: Custom);
498	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT: XLenVT, Action: Custom);
499	}
500
501	if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
502	if (Subtarget.hasStdExtZfhOrZhinx()) {
503	setOperationAction(Ops: FPLegalNodeTypes, VT: MVT::f16, Action: Legal);
504	setOperationAction(Ops: FPRndMode, VT: MVT::f16,
505	Action: Subtarget.hasStdExtZfa() ? Legal : Custom);
506	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f16, Action: Custom);
507	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f16,
508	Action: Subtarget.hasStdExtZfa() ? Legal : Custom);
509	if (Subtarget.hasStdExtZfa())
510	setOperationAction(Op: ISD::ConstantFP, VT: MVT::f16, Action: Custom);
511	} else {
512	setOperationAction(Ops: ZfhminZfbfminPromoteOps, VT: MVT::f16, Action: Promote);
513	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f16, Action: Promote);
514	for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
515	ISD::STRICT_LROUND, ISD::STRICT_LLROUND,
516	ISD::STRICT_LRINT, ISD::STRICT_LLRINT})
517	setOperationAction(Op, VT: MVT::f16, Action: Custom);
518	setOperationAction(Op: ISD::FABS, VT: MVT::f16, Action: Custom);
519	setOperationAction(Op: ISD::FNEG, VT: MVT::f16, Action: Custom);
520	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f16, Action: Custom);
521	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT: XLenVT, Action: Custom);
522	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT: XLenVT, Action: Custom);
523	}
524
525	setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
526
527	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f16, Action: Legal);
528	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal);
529	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f16, Action: Expand);
530	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f16, Action: Expand);
531	setOperationAction(Op: ISD::SELECT, VT: MVT::f16, Action: Custom);
532	setOperationAction(Op: ISD::BR_CC, VT: MVT::f16, Action: Expand);
533
534	setOperationAction(
535	Op: ISD::FNEARBYINT, VT: MVT::f16,
536	Action: Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
537	setOperationAction(Ops: {ISD::FREM, ISD::FPOW, ISD::FPOWI,
538	ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
539	ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
540	ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
541	VT: MVT::f16, Action: Promote);
542
543	// FIXME: Need to promote f16 STRICT_ to f32 libcalls, but we don't have*
544	// complete support for all operations in LegalizeDAG.
545	setOperationAction(Ops: {ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
546	ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
547	ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
548	ISD::STRICT_FTRUNC, ISD::STRICT_FLDEXP},
549	VT: MVT::f16, Action: Promote);
550
551	// We need to custom promote this.
552	if (Subtarget.is64Bit())
553	setOperationAction(Op: ISD::FPOWI, VT: MVT::i32, Action: Custom);
554	}
555
556	if (Subtarget.hasStdExtFOrZfinx()) {
557	setOperationAction(Ops: FPLegalNodeTypes, VT: MVT::f32, Action: Legal);
558	setOperationAction(Ops: FPRndMode, VT: MVT::f32,
559	Action: Subtarget.hasStdExtZfa() ? Legal : Custom);
560	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
561	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
562	setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Custom);
563	setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
564	setOperationAction(Ops: FPOpToExpand, VT: MVT::f32, Action: Expand);
565	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
566	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
567	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
568	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
569	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom);
570	setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
571	setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
572	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
573	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Custom);
574	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Custom);
575	setOperationAction(Op: ISD::STRICT_FP_TO_FP16, VT: MVT::f32, Action: Custom);
576	setOperationAction(Op: ISD::STRICT_FP16_TO_FP, VT: MVT::f32, Action: Custom);
577
578	if (Subtarget.hasStdExtZfa()) {
579	setOperationAction(Op: ISD::ConstantFP, VT: MVT::f32, Action: Custom);
580	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f32, Action: Legal);
581	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f32, Action: Legal);
582	} else {
583	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f32, Action: Custom);
584	}
585	}
586
587	if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
588	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
589
590	if (Subtarget.hasStdExtDOrZdinx()) {
591	setOperationAction(Ops: FPLegalNodeTypes, VT: MVT::f64, Action: Legal);
592
593	if (!Subtarget.is64Bit())
594	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
595
596	if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
597	!Subtarget.is64Bit()) {
598	setOperationAction(Op: ISD::LOAD, VT: MVT::f64, Action: Custom);
599	setOperationAction(Op: ISD::STORE, VT: MVT::f64, Action: Custom);
600	}
601
602	if (Subtarget.hasStdExtZfa()) {
603	setOperationAction(Op: ISD::ConstantFP, VT: MVT::f64, Action: Custom);
604	setOperationAction(Ops: FPRndMode, VT: MVT::f64, Action: Legal);
605	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f64, Action: Legal);
606	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f64, Action: Legal);
607	} else {
608	if (Subtarget.is64Bit())
609	setOperationAction(Ops: FPRndMode, VT: MVT::f64, Action: Custom);
610
611	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT: MVT::f64, Action: Custom);
612	}
613
614	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
615	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal);
616	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
617	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
618	setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Custom);
619	setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
620	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
621	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
622	setOperationAction(Ops: FPOpToExpand, VT: MVT::f64, Action: Expand);
623	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
624	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
625	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
626	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
627	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom);
628	setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
629	setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
630	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
631	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Custom);
632	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
633	setOperationAction(Op: ISD::STRICT_FP_TO_FP16, VT: MVT::f64, Action: Custom);
634	setOperationAction(Op: ISD::STRICT_FP16_TO_FP, VT: MVT::f64, Action: Expand);
635	}
636
637	if (Subtarget.is64Bit()) {
638	setOperationAction(Ops: {ISD::FP_TO_UINT, ISD::FP_TO_SINT,
639	ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
640	VT: MVT::i32, Action: Custom);
641	setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
642	}
643
644	if (Subtarget.hasStdExtFOrZfinx()) {
645	setOperationAction(Ops: {ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, VT: XLenVT,
646	Action: Custom);
647
648	// f16/bf16 require custom handling.
649	setOperationAction(Ops: {ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT}, VT: XLenVT,
650	Action: Custom);
651	setOperationAction(Ops: {ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP}, VT: XLenVT,
652	Action: Custom);
653
654	setOperationAction(Op: ISD::GET_ROUNDING, VT: XLenVT, Action: Custom);
655	setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom);
656	setOperationAction(Op: ISD::GET_FPENV, VT: XLenVT, Action: Custom);
657	setOperationAction(Op: ISD::SET_FPENV, VT: XLenVT, Action: Custom);
658	setOperationAction(Op: ISD::RESET_FPENV, VT: MVT::Other, Action: Custom);
659	}
660
661	setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
662	ISD::JumpTable},
663	VT: XLenVT, Action: Custom);
664
665	setOperationAction(Op: ISD::GlobalTLSAddress, VT: XLenVT, Action: Custom);
666
667	if (Subtarget.is64Bit())
668	setOperationAction(Op: ISD::Constant, VT: MVT::i64, Action: Custom);
669
670	// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
671	// Unfortunately this can't be determined just from the ISA naming string.
672	setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64,
673	Action: Subtarget.is64Bit() ? Legal : Custom);
674	setOperationAction(Op: ISD::READSTEADYCOUNTER, VT: MVT::i64,
675	Action: Subtarget.is64Bit() ? Legal : Custom);
676
677	if (Subtarget.is64Bit()) {
678	setOperationAction(Op: ISD::INIT_TRAMPOLINE, VT: MVT::Other, Action: Custom);
679	setOperationAction(Op: ISD::ADJUST_TRAMPOLINE, VT: MVT::Other, Action: Custom);
680	}
681
682	setOperationAction(Ops: {ISD::TRAP, ISD::DEBUGTRAP}, VT: MVT::Other, Action: Legal);
683	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
684	if (Subtarget.is64Bit())
685	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
686
687	if (Subtarget.hasVendorXMIPSCBOP())
688	setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
689	else if (Subtarget.hasStdExtZicbop())
690	setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal);
691
692	if (Subtarget.hasStdExtA()) {
693	setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
694	if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
695	setMinCmpXchgSizeInBits(`8`);
696	else
697	setMinCmpXchgSizeInBits(`32`);
698	} else if (Subtarget.hasForcedAtomics()) {
699	setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
700	} else {
701	setMaxAtomicSizeInBitsSupported(`0`);
702	}
703
704	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
705
706	setBooleanContents(ZeroOrOneBooleanContent);
707
708	if (getTargetMachine().getTargetTriple().isOSLinux()) {
709	// Custom lowering of llvm.clear_cache.
710	setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: Custom);
711	}
712
713	if (Subtarget.hasVInstructions()) {
714	setBooleanVectorContents(ZeroOrOneBooleanContent);
715
716	setOperationAction(Op: ISD::VSCALE, VT: XLenVT, Action: Custom);
717
718	// RVV intrinsics may have illegal operands.
719	// We also need to custom legalize vmv.x.s.
720	setOperationAction(Ops: {ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
721	ISD::INTRINSIC_VOID},
722	VTs: {MVT::i8, MVT::i16}, Action: Custom);
723	if (Subtarget.is64Bit())
724	setOperationAction(Ops: {ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
725	VT: MVT::i32, Action: Custom);
726	else
727	setOperationAction(Ops: {ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
728	VT: MVT::i64, Action: Custom);
729
730	setOperationAction(Ops: {ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
731	VT: MVT::Other, Action: Custom);
732
733	static const unsigned IntegerVPOps[] = {
734	ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
735	ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
736	ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
737	ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
738	ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
739	ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
740	ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
741	ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
742	ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
743	ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
744	ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
745	ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
746	ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
747	ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
748	ISD::EXPERIMENTAL_VP_SPLAT};
749
750	static const unsigned FloatingPointVPOps[] = {
751	ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
752	ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
753	ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
754	ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
755	ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
756	ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
757	ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
758	ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
759	ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
760	ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
761	ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
762	ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
763	ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
764
765	static const unsigned IntegerVecReduceOps[] = {
766	ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
767	ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
768	ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
769
770	static const unsigned FloatingPointVecReduceOps[] = {
771	ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
772	ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
773
774	static const unsigned FloatingPointLibCallOps[] = {
775	ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
776	ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
777
778	if (!Subtarget.is64Bit()) {
779	// We must custom-lower certain vXi64 operations on RV32 due to the vector
780	// element type being illegal.
781	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
782	VT: MVT::i64, Action: Custom);
783
784	setOperationAction(Ops: IntegerVecReduceOps, VT: MVT::i64, Action: Custom);
785
786	setOperationAction(Ops: {ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
787	ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
788	ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
789	ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
790	VT: MVT::i64, Action: Custom);
791	}
792
793	for (MVT VT : BoolVecVTs) {
794	if (!isTypeLegal(VT))
795	continue;
796
797	setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Custom);
798
799	// Mask VTs are custom-expanded into a series of standard nodes
800	setOperationAction(Ops: {ISD::TRUNCATE, ISD::CONCAT_VECTORS,
801	ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
802	ISD::SCALAR_TO_VECTOR},
803	VT, Action: Custom);
804
805	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
806	Action: Custom);
807
808	setOperationAction(Op: ISD::SELECT, VT, Action: Custom);
809	setOperationAction(Ops: {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
810	Action: Expand);
811	setOperationAction(Op: ISD::VP_MERGE, VT, Action: Custom);
812
813	setOperationAction(Ops: {ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
814	Action: Custom);
815
816	setOperationAction(Ops: {ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Action: Custom);
817
818	setOperationAction(
819	Ops: {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
820	Action: Custom);
821
822	setOperationAction(
823	Ops: {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
824	Action: Custom);
825
826	// RVV has native int->float & float->int conversions where the
827	// element type sizes are within one power-of-two of each other. Any
828	// wider distances between type sizes have to be lowered as sequences
829	// which progressively narrow the gap in stages.
830	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
831	ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
832	ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
833	ISD::STRICT_FP_TO_UINT},
834	VT, Action: Custom);
835	setOperationAction(Ops: {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
836	Action: Custom);
837
838	// Expand all extending loads to types larger than this, and truncating
839	// stores from types larger than this.
840	for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
841	setTruncStoreAction(ValVT: VT, MemVT: OtherVT, Action: Expand);
842	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: VT,
843	MemVT: OtherVT, Action: Expand);
844	}
845
846	setOperationAction(Ops: {ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
847	ISD::VP_TRUNCATE, ISD::VP_SETCC},
848	VT, Action: Custom);
849
850	setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom);
851	setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom);
852
853	setOperationAction(Op: ISD::VECTOR_REVERSE, VT, Action: Custom);
854
855	setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom);
856	setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom);
857	setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLAT, VT, Action: Custom);
858
859	setOperationPromotedToType(
860	Opc: ISD::VECTOR_SPLICE, OrigVT: VT,
861	DestVT: MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount()));
862	}
863
864	for (MVT VT : IntVecVTs) {
865	if (!isTypeLegal(VT))
866	continue;
867
868	setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal);
869	setOperationAction(Op: ISD::SPLAT_VECTOR_PARTS, VT, Action: Custom);
870
871	// Vectors implement MULHS/MULHU.
872	setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Action: Expand);
873
874	// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64.*
875	if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
876	setOperationAction(Ops: {ISD::MULHU, ISD::MULHS}, VT, Action: Expand);
877
878	setOperationAction(Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
879	Action: Legal);
880
881	setOperationAction(Ops: {ISD::ABDS, ISD::ABDU}, VT, Action: Custom);
882
883	// Custom-lower extensions and truncations from/to mask types.
884	setOperationAction(Ops: {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
885	VT, Action: Custom);
886
887	// RVV has native int->float & float->int conversions where the
888	// element type sizes are within one power-of-two of each other. Any
889	// wider distances between type sizes have to be lowered as sequences
890	// which progressively narrow the gap in stages.
891	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
892	ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
893	ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
894	ISD::STRICT_FP_TO_UINT},
895	VT, Action: Custom);
896	setOperationAction(Ops: {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
897	Action: Custom);
898	setOperationAction(Ops: {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,
899	ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,
900	ISD::SSUBSAT, ISD::USUBSAT},
901	VT, Action: Legal);
902
903	// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
904	// nodes which truncate by one power of two at a time.
905	setOperationAction(
906	Ops: {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
907	Action: Custom);
908
909	// Custom-lower insert/extract operations to simplify patterns.
910	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
911	Action: Custom);
912
913	// Custom-lower reduction operations to set up the corresponding custom
914	// nodes' operands.
915	setOperationAction(Ops: IntegerVecReduceOps, VT, Action: Custom);
916
917	setOperationAction(Ops: IntegerVPOps, VT, Action: Custom);
918
919	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom);
920
921	setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
922	VT, Action: Custom);
923
924	setOperationAction(
925	Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
926	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
927	VT, Action: Custom);
928
929	setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
930	ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
931	VT, Action: Custom);
932
933	setOperationAction(Op: ISD::SELECT, VT, Action: Custom);
934	setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
935
936	setOperationAction(Ops: {ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Action: Custom);
937
938	for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
939	setTruncStoreAction(ValVT: VT, MemVT: OtherVT, Action: Expand);
940	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: VT,
941	MemVT: OtherVT, Action: Expand);
942	}
943
944	setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom);
945	setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom);
946
947	// Splice
948	setOperationAction(Op: ISD::VECTOR_SPLICE, VT, Action: Custom);
949
950	if (Subtarget.hasStdExtZvkb()) {
951	setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
952	setOperationAction(Op: ISD::VP_BSWAP, VT, Action: Custom);
953	} else {
954	setOperationAction(Ops: {ISD::BSWAP, ISD::VP_BSWAP}, VT, Action: Expand);
955	setOperationAction(Ops: {ISD::ROTL, ISD::ROTR}, VT, Action: Expand);
956	}
957
958	if (Subtarget.hasStdExtZvbb()) {
959	setOperationAction(Op: ISD::BITREVERSE, VT, Action: Legal);
960	setOperationAction(Op: ISD::VP_BITREVERSE, VT, Action: Custom);
961	setOperationAction(Ops: {ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
962	ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
963	VT, Action: Custom);
964	} else {
965	setOperationAction(Ops: {ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Action: Expand);
966	setOperationAction(Ops: {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Action: Expand);
967	setOperationAction(Ops: {ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
968	ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
969	VT, Action: Expand);
970
971	// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
972	// range of f32.
973	EVT FloatVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
974	if (isTypeLegal(VT: FloatVT)) {
975	setOperationAction(Ops: {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
976	ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
977	ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
978	VT, Action: Custom);
979	}
980	}
981
982	setOperationAction(Op: ISD::VECTOR_COMPRESS, VT, Action: Custom);
983	}
984
985	for (MVT VT : VecTupleVTs) {
986	if (!isTypeLegal(VT))
987	continue;
988
989	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom);
990	}
991
992	// Expand various CCs to best match the RVV ISA, which natively supports UNE
993	// but no other unordered comparisons, and supports all ordered comparisons
994	// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
995	// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
996	// and we pattern-match those back to the "original", swapping operands once
997	// more. This way we catch both operations and both "vf" and "fv" forms with
998	// fewer patterns.
999	static const ISD::CondCode VFPCCToExpand[] = {
1000	ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
1001	ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
1002	ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
1003	};
1004
1005	// TODO: support more ops.
1006	static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1007	ISD::FMINNUM,
1008	ISD::FMAXNUM,
1009	ISD::FMINIMUMNUM,
1010	ISD::FMAXIMUMNUM,
1011	ISD::FADD,
1012	ISD::FSUB,
1013	ISD::FMUL,
1014	ISD::FMA,
1015	ISD::FDIV,
1016	ISD::FSQRT,
1017	ISD::FCEIL,
1018	ISD::FTRUNC,
1019	ISD::FFLOOR,
1020	ISD::FROUND,
1021	ISD::FROUNDEVEN,
1022	ISD::FRINT,
1023	ISD::FNEARBYINT,
1024	ISD::IS_FPCLASS,
1025	ISD::SETCC,
1026	ISD::FMAXIMUM,
1027	ISD::FMINIMUM,
1028	ISD::STRICT_FADD,
1029	ISD::STRICT_FSUB,
1030	ISD::STRICT_FMUL,
1031	ISD::STRICT_FDIV,
1032	ISD::STRICT_FSQRT,
1033	ISD::STRICT_FMA,
1034	ISD::VECREDUCE_FMIN,
1035	ISD::VECREDUCE_FMAX,
1036	ISD::VECREDUCE_FMINIMUM,
1037	ISD::VECREDUCE_FMAXIMUM};
1038
1039	// TODO: support more vp ops.
1040	static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1041	ISD::VP_FADD,
1042	ISD::VP_FSUB,
1043	ISD::VP_FMUL,
1044	ISD::VP_FDIV,
1045	ISD::VP_FMA,
1046	ISD::VP_REDUCE_FMIN,
1047	ISD::VP_REDUCE_FMAX,
1048	ISD::VP_SQRT,
1049	ISD::VP_FMINNUM,
1050	ISD::VP_FMAXNUM,
1051	ISD::VP_FCEIL,
1052	ISD::VP_FFLOOR,
1053	ISD::VP_FROUND,
1054	ISD::VP_FROUNDEVEN,
1055	ISD::VP_FROUNDTOZERO,
1056	ISD::VP_FRINT,
1057	ISD::VP_FNEARBYINT,
1058	ISD::VP_SETCC,
1059	ISD::VP_FMINIMUM,
1060	ISD::VP_FMAXIMUM,
1061	ISD::VP_REDUCE_FMINIMUM,
1062	ISD::VP_REDUCE_FMAXIMUM};
1063
1064	// Sets common operation actions on RVV floating-point vector types.
1065	const auto SetCommonVFPActions = [&](MVT VT) {
1066	setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal);
1067	// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1068	// sizes are within one power-of-two of each other. Therefore conversions
1069	// between vXf16 and vXf64 must be lowered as sequences which convert via
1070	// vXf32.
1071	setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom);
1072	setOperationAction(Ops: {ISD::LRINT, ISD::LLRINT}, VT, Action: Custom);
1073	// Custom-lower insert/extract operations to simplify patterns.
1074	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
1075	Action: Custom);
1076	// Expand various condition codes (explained above).
1077	setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand);
1078
1079	setOperationAction(
1080	Ops: {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1081	Action: Legal);
1082	setOperationAction(Ops: {ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Action: Custom);
1083
1084	setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1085	ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1086	ISD::IS_FPCLASS},
1087	VT, Action: Custom);
1088
1089	setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom);
1090
1091	// Expand FP operations that need libcalls.
1092	setOperationAction(Ops: FloatingPointLibCallOps, VT, Action: Expand);
1093
1094	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Legal);
1095
1096	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom);
1097
1098	setOperationAction(Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1099	VT, Action: Custom);
1100
1101	setOperationAction(
1102	Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1103	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1104	VT, Action: Custom);
1105
1106	setOperationAction(Op: ISD::SELECT, VT, Action: Custom);
1107	setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
1108
1109	setOperationAction(Ops: {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1110	ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
1111	VT, Action: Custom);
1112
1113	setOperationAction(Op: ISD::VECTOR_DEINTERLEAVE, VT, Action: Custom);
1114	setOperationAction(Op: ISD::VECTOR_INTERLEAVE, VT, Action: Custom);
1115
1116	setOperationAction(Ops: {ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Action: Custom);
1117	setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom);
1118	setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom);
1119
1120	setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom);
1121
1122	setOperationAction(Ops: {ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1123	Action: Custom);
1124	setOperationAction(Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1125	ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
1126	VT, Action: Legal);
1127	setOperationAction(Ops: {ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
1128	ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
1129	ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1130	ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1131	VT, Action: Custom);
1132
1133	setOperationAction(Op: ISD::VECTOR_COMPRESS, VT, Action: Custom);
1134	};
1135
1136	// Sets common extload/truncstore actions on RVV floating-point vector
1137	// types.
1138	const auto SetCommonVFPExtLoadTruncStoreActions =
1139	[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1140	for (auto SmallVT : SmallerVTs) {
1141	setTruncStoreAction(ValVT: VT, MemVT: SmallVT, Action: Expand);
1142	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: SmallVT, Action: Expand);
1143	}
1144	};
1145
1146	// Sets common actions for f16 and bf16 for when there's only
1147	// zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1148	const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1149	setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom);
1150	setOperationAction(Ops: {ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1151	Action: Custom);
1152	setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom);
1153	setOperationAction(Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1154	Action: Custom);
1155	setOperationAction(Op: ISD::SELECT_CC, VT, Action: Expand);
1156	setOperationAction(Ops: {ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Action: Custom);
1157	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
1158	ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
1159	ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
1160	ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
1161	ISD::VECTOR_COMPRESS},
1162	VT, Action: Custom);
1163	setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom);
1164	setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom);
1165	MVT EltVT = VT.getVectorElementType();
1166	if (isTypeLegal(VT: EltVT))
1167	setOperationAction(Ops: {ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1168	ISD::EXTRACT_VECTOR_ELT},
1169	VT, Action: Custom);
1170	else
1171	setOperationAction(Ops: {ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1172	VT: EltVT, Action: Custom);
1173	setOperationAction(Ops: {ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1174	ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1175	ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1176	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1177	ISD::VP_SCATTER},
1178	VT, Action: Custom);
1179
1180	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
1181	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
1182	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
1183
1184	// Expand FP operations that need libcalls.
1185	setOperationAction(Ops: FloatingPointLibCallOps, VT, Action: Expand);
1186
1187	// Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1188	if (getLMUL(VT) == RISCVVType::LMUL_8) {
1189	setOperationAction(Ops: ZvfhminZvfbfminPromoteOps, VT, Action: Custom);
1190	setOperationAction(Ops: ZvfhminZvfbfminPromoteVPOps, VT, Action: Custom);
1191	} else {
1192	MVT F32VecVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
1193	setOperationPromotedToType(Ops: ZvfhminZvfbfminPromoteOps, OrigVT: VT, DestVT: F32VecVT);
1194	setOperationPromotedToType(Ops: ZvfhminZvfbfminPromoteVPOps, OrigVT: VT, DestVT: F32VecVT);
1195	}
1196	};
1197
1198	if (Subtarget.hasVInstructionsF16()) {
1199	for (MVT VT : F16VecVTs) {
1200	if (!isTypeLegal(VT))
1201	continue;
1202	SetCommonVFPActions (VT);
1203	}
1204	} else if (Subtarget.hasVInstructionsF16Minimal()) {
1205	for (MVT VT : F16VecVTs) {
1206	if (!isTypeLegal(VT))
1207	continue;
1208	SetCommonPromoteToF32Actions (VT);
1209	}
1210	}
1211
1212	if (Subtarget.hasVInstructionsBF16Minimal()) {
1213	for (MVT VT : BF16VecVTs) {
1214	if (!isTypeLegal(VT))
1215	continue;
1216	SetCommonPromoteToF32Actions (VT);
1217	}
1218	}
1219
1220	if (Subtarget.hasVInstructionsF32()) {
1221	for (MVT VT : F32VecVTs) {
1222	if (!isTypeLegal(VT))
1223	continue;
1224	SetCommonVFPActions (VT);
1225	SetCommonVFPExtLoadTruncStoreActions (VT, F16VecVTs);
1226	SetCommonVFPExtLoadTruncStoreActions (VT, BF16VecVTs);
1227	}
1228	}
1229
1230	if (Subtarget.hasVInstructionsF64()) {
1231	for (MVT VT : F64VecVTs) {
1232	if (!isTypeLegal(VT))
1233	continue;
1234	SetCommonVFPActions (VT);
1235	SetCommonVFPExtLoadTruncStoreActions (VT, F16VecVTs);
1236	SetCommonVFPExtLoadTruncStoreActions (VT, BF16VecVTs);
1237	SetCommonVFPExtLoadTruncStoreActions (VT, F32VecVTs);
1238	}
1239	}
1240
1241	if (Subtarget.useRVVForFixedLengthVectors()) {
1242	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1243	if (!useRVVForFixedLengthVectorVT(VT))
1244	continue;
1245
1246	// By default everything must be expanded.
1247	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
1248	setOperationAction(Op, VT, Action: Expand);
1249	for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
1250	setTruncStoreAction(ValVT: VT, MemVT: OtherVT, Action: Expand);
1251	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: VT,
1252	MemVT: OtherVT, Action: Expand);
1253	}
1254
1255	// Custom lower fixed vector undefs to scalable vector undefs to avoid
1256	// expansion to a build_vector of 0s.
1257	setOperationAction(Op: ISD::UNDEF, VT, Action: Custom);
1258
1259	// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1260	setOperationAction(Ops: {ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1261	Action: Custom);
1262
1263	setOperationAction(
1264	Ops: {ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE}, VT,
1265	Action: Custom);
1266
1267	setOperationAction(Ops: {ISD::VECTOR_INTERLEAVE, ISD::VECTOR_DEINTERLEAVE},
1268	VT, Action: Custom);
1269
1270	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1271	VT, Action: Custom);
1272
1273	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
1274
1275	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Custom);
1276
1277	setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
1278
1279	setOperationAction(Op: ISD::SELECT, VT, Action: Custom);
1280
1281	setOperationAction(
1282	Ops: {ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S, ISD::TRUNCATE_USAT_U}, VT,
1283	Action: Custom);
1284
1285	setOperationAction(Op: ISD::BITCAST, VT, Action: Custom);
1286
1287	setOperationAction(
1288	Ops: {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1289	Action: Custom);
1290
1291	setOperationAction(
1292	Ops: {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1293	Action: Custom);
1294
1295	setOperationAction(
1296	Ops: {
1297	ISD::SINT_TO_FP,
1298	ISD::UINT_TO_FP,
1299	ISD::FP_TO_SINT,
1300	ISD::FP_TO_UINT,
1301	ISD::STRICT_SINT_TO_FP,
1302	ISD::STRICT_UINT_TO_FP,
1303	ISD::STRICT_FP_TO_SINT,
1304	ISD::STRICT_FP_TO_UINT,
1305	},
1306	VT, Action: Custom);
1307	setOperationAction(Ops: {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
1308	Action: Custom);
1309
1310	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
1311
1312	// Operations below are different for between masks and other vectors.
1313	if (VT.getVectorElementType() == MVT::i1) {
1314	setOperationAction(Ops: {ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1315	ISD::OR, ISD::XOR},
1316	VT, Action: Custom);
1317
1318	setOperationAction(Ops: {ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1319	ISD::VP_SETCC, ISD::VP_TRUNCATE},
1320	VT, Action: Custom);
1321
1322	setOperationAction(Op: ISD::VP_MERGE, VT, Action: Custom);
1323
1324	setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom);
1325	setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom);
1326	continue;
1327	}
1328
1329	// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1330	// it before type legalization for i64 vectors on RV32. It will then be
1331	// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1332	// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1333	// improvements first.
1334	if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1335	setOperationAction(Op: ISD::SPLAT_VECTOR, VT, Action: Legal);
1336	setOperationAction(Op: ISD::SPLAT_VECTOR_PARTS, VT, Action: Custom);
1337
1338	// Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1339	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::i64, Action: Custom);
1340	}
1341
1342	setOperationAction(
1343	Ops: {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Action: Custom);
1344
1345	setOperationAction(Ops: {ISD::VP_LOAD, ISD::VP_STORE,
1346	ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1347	ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1348	ISD::VP_SCATTER},
1349	VT, Action: Custom);
1350
1351	setOperationAction(Ops: {ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1352	ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1353	ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1354	VT, Action: Custom);
1355
1356	setOperationAction(
1357	Ops: {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Action: Custom);
1358
1359	setOperationAction(Ops: {ISD::ABDS, ISD::ABDU}, VT, Action: Custom);
1360
1361	// vXi64 MULHS/MULHU requires the V extension instead of Zve64.*
1362	if (VT.getVectorElementType() != MVT::i64 \|\| Subtarget.hasStdExtV())
1363	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Custom);
1364
1365	setOperationAction(Ops: {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,
1366	ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,
1367	ISD::SSUBSAT, ISD::USUBSAT},
1368	VT, Action: Custom);
1369
1370	setOperationAction(Op: ISD::VSELECT, VT, Action: Custom);
1371
1372	setOperationAction(
1373	Ops: {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Action: Custom);
1374
1375	// Custom-lower reduction operations to set up the corresponding custom
1376	// nodes' operands.
1377	setOperationAction(Ops: {ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1378	ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1379	ISD::VECREDUCE_UMIN},
1380	VT, Action: Custom);
1381
1382	setOperationAction(Ops: IntegerVPOps, VT, Action: Custom);
1383
1384	if (Subtarget.hasStdExtZvkb())
1385	setOperationAction(Ops: {ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Action: Custom);
1386
1387	if (Subtarget.hasStdExtZvbb()) {
1388	setOperationAction(Ops: {ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
1389	ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},
1390	VT, Action: Custom);
1391	} else {
1392	// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1393	// range of f32.
1394	EVT FloatVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
1395	if (isTypeLegal(VT: FloatVT))
1396	setOperationAction(
1397	Ops: {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1398	Action: Custom);
1399	}
1400
1401	setOperationAction(Op: ISD::VECTOR_COMPRESS, VT, Action: Custom);
1402	}
1403
1404	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1405	// There are no extending loads or truncating stores.
1406	for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1407	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
1408	setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
1409	}
1410
1411	if (!useRVVForFixedLengthVectorVT(VT))
1412	continue;
1413
1414	// By default everything must be expanded.
1415	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
1416	setOperationAction(Op, VT, Action: Expand);
1417
1418	// Custom lower fixed vector undefs to scalable vector undefs to avoid
1419	// expansion to a build_vector of 0s.
1420	setOperationAction(Op: ISD::UNDEF, VT, Action: Custom);
1421
1422	setOperationAction(Ops: {ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1423	ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1424	ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
1425	ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
1426	VT, Action: Custom);
1427	setOperationAction(Op: ISD::EXPERIMENTAL_VP_SPLICE, VT, Action: Custom);
1428	setOperationAction(Op: ISD::EXPERIMENTAL_VP_REVERSE, VT, Action: Custom);
1429
1430	setOperationAction(Ops: {ISD::VECTOR_INTERLEAVE, ISD::VECTOR_DEINTERLEAVE},
1431	VT, Action: Custom);
1432
1433	setOperationAction(Ops: {ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1434	ISD::MGATHER, ISD::MSCATTER},
1435	VT, Action: Custom);
1436	setOperationAction(Ops: {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1437	ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1438	ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1439	VT, Action: Custom);
1440
1441	setOperationAction(Ops: {ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Action: Custom);
1442	setOperationAction(Ops: {ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
1443	Action: Custom);
1444
1445	if (VT.getVectorElementType() == MVT::f16 &&
1446	!Subtarget.hasVInstructionsF16()) {
1447	setOperationAction(Op: ISD::BITCAST, VT, Action: Custom);
1448	setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom);
1449	setOperationAction(
1450	Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1451	Action: Custom);
1452	setOperationAction(Ops: {ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1453	Action: Custom);
1454	if (Subtarget.hasStdExtZfhmin()) {
1455	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
1456	} else {
1457	// We need to custom legalize f16 build vectors if Zfhmin isn't
1458	// available.
1459	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
1460	}
1461	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
1462	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
1463	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
1464	MVT F32VecVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
1465	// Don't promote f16 vector operations to f32 if f32 vector type is
1466	// not legal.
1467	// TODO: could split the f16 vector into two vectors and do promotion.
1468	if (!isTypeLegal(VT: F32VecVT))
1469	continue;
1470	setOperationPromotedToType(Ops: ZvfhminZvfbfminPromoteOps, OrigVT: VT, DestVT: F32VecVT);
1471	setOperationPromotedToType(Ops: ZvfhminZvfbfminPromoteVPOps, OrigVT: VT, DestVT: F32VecVT);
1472	continue;
1473	}
1474
1475	if (VT.getVectorElementType() == MVT::bf16) {
1476	setOperationAction(Op: ISD::BITCAST, VT, Action: Custom);
1477	setOperationAction(Ops: {ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Action: Custom);
1478	if (Subtarget.hasStdExtZfbfmin()) {
1479	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
1480	} else {
1481	// We need to custom legalize bf16 build vectors if Zfbfmin isn't
1482	// available.
1483	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::bf16, Action: Custom);
1484	}
1485	setOperationAction(
1486	Ops: {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1487	Action: Custom);
1488	MVT F32VecVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
1489	// Don't promote f16 vector operations to f32 if f32 vector type is
1490	// not legal.
1491	// TODO: could split the f16 vector into two vectors and do promotion.
1492	if (!isTypeLegal(VT: F32VecVT))
1493	continue;
1494	setOperationPromotedToType(Ops: ZvfhminZvfbfminPromoteOps, OrigVT: VT, DestVT: F32VecVT);
1495	// TODO: Promote VP ops to fp32.
1496	continue;
1497	}
1498
1499	setOperationAction(Ops: {ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
1500	Action: Custom);
1501
1502	setOperationAction(Ops: {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1503	ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1504	ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1505	ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1506	ISD::FMAXIMUM, ISD::FMINIMUM},
1507	VT, Action: Custom);
1508
1509	setOperationAction(Ops: {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1510	ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1511	ISD::LLRINT, ISD::FNEARBYINT},
1512	VT, Action: Custom);
1513
1514	setCondCodeAction(CCs: VFPCCToExpand, VT, Action: Expand);
1515
1516	setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
1517	setOperationAction(Ops: {ISD::VSELECT, ISD::SELECT}, VT, Action: Custom);
1518
1519	setOperationAction(Op: ISD::BITCAST, VT, Action: Custom);
1520
1521	setOperationAction(Ops: FloatingPointVecReduceOps, VT, Action: Custom);
1522
1523	setOperationAction(Ops: FloatingPointVPOps, VT, Action: Custom);
1524
1525	setOperationAction(
1526	Ops: {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1527	ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1528	ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1529	ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1530	ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1531	VT, Action: Custom);
1532	}
1533
1534	// Custom-legalize bitcasts from fixed-length vectors to scalar types.
1535	setOperationAction(Ops: ISD::BITCAST, VTs: {MVT::i8, MVT::i16, MVT::i32}, Action: Custom);
1536	if (Subtarget.is64Bit())
1537	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
1538	if (Subtarget.hasStdExtZfhminOrZhinxmin())
1539	setOperationAction(Op: ISD::BITCAST, VT: MVT::f16, Action: Custom);
1540	if (Subtarget.hasStdExtZfbfmin())
1541	setOperationAction(Op: ISD::BITCAST, VT: MVT::bf16, Action: Custom);
1542	if (Subtarget.hasStdExtFOrZfinx())
1543	setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Custom);
1544	if (Subtarget.hasStdExtDOrZdinx())
1545	setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Custom);
1546	}
1547	}
1548
1549	if (Subtarget.hasStdExtA())
1550	setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: XLenVT, Action: Expand);
1551
1552	if (Subtarget.hasForcedAtomics()) {
1553	// Force __sync libcalls to be emitted for atomic rmw/cas operations.
1554	setOperationAction(
1555	Ops: {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1556	ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1557	ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1558	ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1559	VT: XLenVT, Action: LibCall);
1560	}
1561
1562	if (Subtarget.hasVendorXTHeadMemIdx()) {
1563	for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1564	setIndexedLoadAction(IdxModes: im, VT: MVT::i8, Action: Legal);
1565	setIndexedStoreAction(IdxModes: im, VT: MVT::i8, Action: Legal);
1566	setIndexedLoadAction(IdxModes: im, VT: MVT::i16, Action: Legal);
1567	setIndexedStoreAction(IdxModes: im, VT: MVT::i16, Action: Legal);
1568	setIndexedLoadAction(IdxModes: im, VT: MVT::i32, Action: Legal);
1569	setIndexedStoreAction(IdxModes: im, VT: MVT::i32, Action: Legal);
1570
1571	if (Subtarget.is64Bit()) {
1572	setIndexedLoadAction(IdxModes: im, VT: MVT::i64, Action: Legal);
1573	setIndexedStoreAction(IdxModes: im, VT: MVT::i64, Action: Legal);
1574	}
1575	}
1576	}
1577
1578	if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1579	setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: MVT::i8, Action: Legal);
1580	setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: MVT::i16, Action: Legal);
1581	setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: MVT::i32, Action: Legal);
1582
1583	setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: MVT::i8, Action: Legal);
1584	setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: MVT::i16, Action: Legal);
1585	setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: MVT::i32, Action: Legal);
1586	}
1587
1588	// zve32x is broken for partial_reduce_umla, but let's not make it worse.
1589	if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= `64`) {
1590	static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1591	ISD::PARTIAL_REDUCE_UMLA,
1592	ISD::PARTIAL_REDUCE_SUMLA};
1593	setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: MVT::nxv1i32, InputVT: MVT::nxv4i8, Action: Custom);
1594	setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: MVT::nxv2i32, InputVT: MVT::nxv8i8, Action: Custom);
1595	setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: MVT::nxv4i32, InputVT: MVT::nxv16i8, Action: Custom);
1596	setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: MVT::nxv8i32, InputVT: MVT::nxv32i8, Action: Custom);
1597	setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: MVT::nxv16i32, InputVT: MVT::nxv64i8, Action: Custom);
1598
1599	if (Subtarget.useRVVForFixedLengthVectors()) {
1600	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1601	if (VT.getVectorElementType() != MVT::i32 \|\|
1602	!useRVVForFixedLengthVectorVT(VT))
1603	continue;
1604	ElementCount EC = VT.getVectorElementCount();
1605	MVT ArgVT = MVT::getVectorVT(VT: MVT::i8, EC: EC.multiplyCoefficientBy(RHS: `4`));
1606	setPartialReduceMLAAction(Opcodes: MLAOps, AccVT: VT, InputVT: ArgVT, Action: Custom);
1607	}
1608	}
1609	}
1610
1611	// Function alignments.
1612	const Align FunctionAlignment(Subtarget.hasStdExtZca() ? `2` : `4`);
1613	setMinFunctionAlignment(FunctionAlignment);
1614	// Set preferred alignments.
1615	setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1616	setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1617
1618	setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1619	ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
1620	ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1621	setTargetDAGCombine(ISD::SRA);
1622	setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1623
1624	if (Subtarget.hasStdExtFOrZfinx())
1625	setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1626
1627	if (Subtarget.hasStdExtZbb())
1628	setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1629
1630	if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) \|\|
1631	Subtarget.hasVInstructions())
1632	setTargetDAGCombine(ISD::TRUNCATE);
1633
1634	if (Subtarget.hasStdExtZbkb())
1635	setTargetDAGCombine(ISD::BITREVERSE);
1636
1637	if (Subtarget.hasStdExtFOrZfinx())
1638	setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1639	ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1640	if (Subtarget.hasVInstructions())
1641	setTargetDAGCombine(
1642	{ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1643	ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1644	ISD::SRL, ISD::SHL, ISD::STORE,
1645	ISD::SPLAT_VECTOR, ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1646	ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1647	ISD::MUL, ISD::SDIV, ISD::UDIV,
1648	ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT,
1649	ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE,
1650	ISD::VSELECT, ISD::VECREDUCE_ADD});
1651
1652	if (Subtarget.hasVendorXTHeadMemPair())
1653	setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1654	if (Subtarget.useRVVForFixedLengthVectors())
1655	setTargetDAGCombine(ISD::BITCAST);
1656
1657	// Disable strict node mutation.
1658	IsStrictFPEnabled = true;
1659	EnableExtLdPromotion = true;
1660
1661	// Let the subtarget decide if a predictable select is more expensive than the
1662	// corresponding branch. This information is used in CGP/SelectOpt to decide
1663	// when to convert selects into branches.
1664	PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1665
1666	MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/OptSize=/true);
1667	MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/OptSize=/false);
1668
1669	MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1670	MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/OptSize=/true);
1671	MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/OptSize=/false);
1672
1673	MaxStoresPerMemmoveOptSize =
1674	Subtarget.getMaxStoresPerMemmove(/OptSize=/true);
1675	MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/OptSize=/false);
1676
1677	MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/OptSize=/true);
1678	MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/OptSize=/false);
1679	}
1680
1681	EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1682	LLVMContext &Context,
1683	EVT VT) const {
1684	if (!VT.isVector())
1685	return getPointerTy(DL);
1686	if (Subtarget.hasVInstructions() &&
1687	(VT.isScalableVector() \|\| Subtarget.useRVVForFixedLengthVectors()))
1688	return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount());
1689	return VT.changeVectorElementTypeToInteger();
1690	}
1691
1692	MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1693	return Subtarget.getXLenVT();
1694	}
1695
1696	// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1697	bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1698	unsigned VF,
1699	bool IsScalable) const {
1700	if (!Subtarget.hasVInstructions())
1701	return true;
1702
1703	if (!IsScalable)
1704	return true;
1705
1706	if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1707	return true;
1708
1709	// Don't allow VF=1 if those types are't legal.
1710	if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1711	return true;
1712
1713	// VLEN=32 support is incomplete.
1714	if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1715	return true;
1716
1717	// The maximum VF is for the smallest element width with LMUL=8.
1718	// VF must be a power of 2.
1719	unsigned MaxVF = RISCV::RVVBytesPerBlock * `8`;
1720	return VF > MaxVF \|\| !isPowerOf2_32(Value: VF);
1721	}
1722
1723	bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1724	return !Subtarget.hasVInstructions() \|\|
1725	VT.getVectorElementType() != MVT::i1 \|\| !isTypeLegal(VT);
1726	}
1727
1728	bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1729	const CallInst &I,
1730	MachineFunction &MF,
1731	unsigned Intrinsic) const {
1732	auto &DL = I.getDataLayout();
1733
1734	auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1735	bool IsUnitStrided, bool UsePtrVal = false) {
1736	Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1737	// We can't use ptrVal if the intrinsic can access memory before the
1738	// pointer. This means we can't use it for strided or indexed intrinsics.
1739	if (UsePtrVal)
1740	Info.ptrVal = I.getArgOperand(i: PtrOp);
1741	else
1742	Info.fallbackAddressSpace =
1743	I.getArgOperand(i: PtrOp)->getType()->getPointerAddressSpace();
1744	Type *MemTy;
1745	if (IsStore) {
1746	// Store value is the first operand.
1747	MemTy = I.getArgOperand(i: `0`)->getType();
1748	} else {
1749	// Use return type. If it's segment load, return type is a struct.
1750	MemTy = I.getType();
1751	if (MemTy->isStructTy())
1752	MemTy = MemTy->getStructElementType(N: `0`);
1753	}
1754	if (!IsUnitStrided)
1755	MemTy = MemTy->getScalarType();
1756
1757	Info.memVT = getValueType(DL, Ty: MemTy);
1758	if (MemTy->isTargetExtTy()) {
1759	// RISC-V vector tuple type's alignment type should be its element type.
1760	if (cast<TargetExtType>(Val: MemTy)->getName() == "riscv.vector.tuple")
1761	MemTy = Type::getIntNTy(
1762	C&: MemTy->getContext(),
1763	N: `1` << cast<ConstantInt>(Val: I.getArgOperand(i: I.arg_size() - `1`))
1764	->getZExtValue());
1765	Info.align = DL.getABITypeAlign(Ty: MemTy);
1766	} else {
1767	Info.align = Align (DL.getTypeSizeInBits(Ty: MemTy->getScalarType()) / `8`);
1768	}
1769	Info.size = MemoryLocation::UnknownSize;
1770	Info.flags \|=
1771	IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1772	return true;
1773	};
1774
1775	if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal))
1776	Info.flags \|= MachineMemOperand::MONonTemporal;
1777
1778	Info.flags \|= RISCVTargetLowering::getTargetMMOFlags(I);
1779	switch (Intrinsic) {
1780	default:
1781	return false;
1782	case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1783	case Intrinsic::riscv_masked_atomicrmw_add_i32:
1784	case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1785	case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1786	case Intrinsic::riscv_masked_atomicrmw_max_i32:
1787	case Intrinsic::riscv_masked_atomicrmw_min_i32:
1788	case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1789	case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1790	case Intrinsic::riscv_masked_cmpxchg_i32:
1791	Info.opc = ISD::INTRINSIC_W_CHAIN;
1792	Info.memVT = MVT::i32;
1793	Info.ptrVal = I.getArgOperand(i: `0`);
1794	Info.offset = `0`;
1795	Info.align = Align (`4`);
1796	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
1797	MachineMemOperand::MOVolatile;
1798	return true;
1799	case Intrinsic::riscv_seg2_load_mask:
1800	case Intrinsic::riscv_seg3_load_mask:
1801	case Intrinsic::riscv_seg4_load_mask:
1802	case Intrinsic::riscv_seg5_load_mask:
1803	case Intrinsic::riscv_seg6_load_mask:
1804	case Intrinsic::riscv_seg7_load_mask:
1805	case Intrinsic::riscv_seg8_load_mask:
1806	return SetRVVLoadStoreInfo (/PtrOp/ `0`, /IsStore/ false,
1807	/IsUnitStrided/ false, /UsePtrVal/ true);
1808	case Intrinsic::riscv_seg2_store_mask:
1809	case Intrinsic::riscv_seg3_store_mask:
1810	case Intrinsic::riscv_seg4_store_mask:
1811	case Intrinsic::riscv_seg5_store_mask:
1812	case Intrinsic::riscv_seg6_store_mask:
1813	case Intrinsic::riscv_seg7_store_mask:
1814	case Intrinsic::riscv_seg8_store_mask:
1815	// Operands are (vec, ..., vec, ptr, mask, vl)
1816	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `3`,
1817	/IsStore/ true,
1818	/IsUnitStrided/ false, /UsePtrVal/ true);
1819	case Intrinsic::riscv_vle:
1820	case Intrinsic::riscv_vle_mask:
1821	case Intrinsic::riscv_vleff:
1822	case Intrinsic::riscv_vleff_mask:
1823	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1824	/IsStore/ false,
1825	/IsUnitStrided/ true,
1826	/UsePtrVal/ true);
1827	case Intrinsic::riscv_vse:
1828	case Intrinsic::riscv_vse_mask:
1829	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1830	/IsStore/ true,
1831	/IsUnitStrided/ true,
1832	/UsePtrVal/ true);
1833	case Intrinsic::riscv_vlse:
1834	case Intrinsic::riscv_vlse_mask:
1835	case Intrinsic::riscv_vloxei:
1836	case Intrinsic::riscv_vloxei_mask:
1837	case Intrinsic::riscv_vluxei:
1838	case Intrinsic::riscv_vluxei_mask:
1839	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1840	/IsStore/ false,
1841	/IsUnitStrided/ false);
1842	case Intrinsic::riscv_vsse:
1843	case Intrinsic::riscv_vsse_mask:
1844	case Intrinsic::riscv_vsoxei:
1845	case Intrinsic::riscv_vsoxei_mask:
1846	case Intrinsic::riscv_vsuxei:
1847	case Intrinsic::riscv_vsuxei_mask:
1848	return SetRVVLoadStoreInfo (/PtrOp/ `1`,
1849	/IsStore/ true,
1850	/IsUnitStrided/ false);
1851	case Intrinsic::riscv_vlseg2:
1852	case Intrinsic::riscv_vlseg3:
1853	case Intrinsic::riscv_vlseg4:
1854	case Intrinsic::riscv_vlseg5:
1855	case Intrinsic::riscv_vlseg6:
1856	case Intrinsic::riscv_vlseg7:
1857	case Intrinsic::riscv_vlseg8:
1858	case Intrinsic::riscv_vlseg2ff:
1859	case Intrinsic::riscv_vlseg3ff:
1860	case Intrinsic::riscv_vlseg4ff:
1861	case Intrinsic::riscv_vlseg5ff:
1862	case Intrinsic::riscv_vlseg6ff:
1863	case Intrinsic::riscv_vlseg7ff:
1864	case Intrinsic::riscv_vlseg8ff:
1865	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `3`,
1866	/IsStore/ false,
1867	/IsUnitStrided/ false, /UsePtrVal/ true);
1868	case Intrinsic::riscv_vlseg2_mask:
1869	case Intrinsic::riscv_vlseg3_mask:
1870	case Intrinsic::riscv_vlseg4_mask:
1871	case Intrinsic::riscv_vlseg5_mask:
1872	case Intrinsic::riscv_vlseg6_mask:
1873	case Intrinsic::riscv_vlseg7_mask:
1874	case Intrinsic::riscv_vlseg8_mask:
1875	case Intrinsic::riscv_vlseg2ff_mask:
1876	case Intrinsic::riscv_vlseg3ff_mask:
1877	case Intrinsic::riscv_vlseg4ff_mask:
1878	case Intrinsic::riscv_vlseg5ff_mask:
1879	case Intrinsic::riscv_vlseg6ff_mask:
1880	case Intrinsic::riscv_vlseg7ff_mask:
1881	case Intrinsic::riscv_vlseg8ff_mask:
1882	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `5`,
1883	/IsStore/ false,
1884	/IsUnitStrided/ false, /UsePtrVal/ true);
1885	case Intrinsic::riscv_vlsseg2:
1886	case Intrinsic::riscv_vlsseg3:
1887	case Intrinsic::riscv_vlsseg4:
1888	case Intrinsic::riscv_vlsseg5:
1889	case Intrinsic::riscv_vlsseg6:
1890	case Intrinsic::riscv_vlsseg7:
1891	case Intrinsic::riscv_vlsseg8:
1892	case Intrinsic::riscv_vloxseg2:
1893	case Intrinsic::riscv_vloxseg3:
1894	case Intrinsic::riscv_vloxseg4:
1895	case Intrinsic::riscv_vloxseg5:
1896	case Intrinsic::riscv_vloxseg6:
1897	case Intrinsic::riscv_vloxseg7:
1898	case Intrinsic::riscv_vloxseg8:
1899	case Intrinsic::riscv_vluxseg2:
1900	case Intrinsic::riscv_vluxseg3:
1901	case Intrinsic::riscv_vluxseg4:
1902	case Intrinsic::riscv_vluxseg5:
1903	case Intrinsic::riscv_vluxseg6:
1904	case Intrinsic::riscv_vluxseg7:
1905	case Intrinsic::riscv_vluxseg8:
1906	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `4`,
1907	/IsStore/ false,
1908	/IsUnitStrided/ false);
1909	case Intrinsic::riscv_vlsseg2_mask:
1910	case Intrinsic::riscv_vlsseg3_mask:
1911	case Intrinsic::riscv_vlsseg4_mask:
1912	case Intrinsic::riscv_vlsseg5_mask:
1913	case Intrinsic::riscv_vlsseg6_mask:
1914	case Intrinsic::riscv_vlsseg7_mask:
1915	case Intrinsic::riscv_vlsseg8_mask:
1916	case Intrinsic::riscv_vloxseg2_mask:
1917	case Intrinsic::riscv_vloxseg3_mask:
1918	case Intrinsic::riscv_vloxseg4_mask:
1919	case Intrinsic::riscv_vloxseg5_mask:
1920	case Intrinsic::riscv_vloxseg6_mask:
1921	case Intrinsic::riscv_vloxseg7_mask:
1922	case Intrinsic::riscv_vloxseg8_mask:
1923	case Intrinsic::riscv_vluxseg2_mask:
1924	case Intrinsic::riscv_vluxseg3_mask:
1925	case Intrinsic::riscv_vluxseg4_mask:
1926	case Intrinsic::riscv_vluxseg5_mask:
1927	case Intrinsic::riscv_vluxseg6_mask:
1928	case Intrinsic::riscv_vluxseg7_mask:
1929	case Intrinsic::riscv_vluxseg8_mask:
1930	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `6`,
1931	/IsStore/ false,
1932	/IsUnitStrided/ false);
1933	case Intrinsic::riscv_vsseg2:
1934	case Intrinsic::riscv_vsseg3:
1935	case Intrinsic::riscv_vsseg4:
1936	case Intrinsic::riscv_vsseg5:
1937	case Intrinsic::riscv_vsseg6:
1938	case Intrinsic::riscv_vsseg7:
1939	case Intrinsic::riscv_vsseg8:
1940	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `3`,
1941	/IsStore/ true,
1942	/IsUnitStrided/ false);
1943	case Intrinsic::riscv_vsseg2_mask:
1944	case Intrinsic::riscv_vsseg3_mask:
1945	case Intrinsic::riscv_vsseg4_mask:
1946	case Intrinsic::riscv_vsseg5_mask:
1947	case Intrinsic::riscv_vsseg6_mask:
1948	case Intrinsic::riscv_vsseg7_mask:
1949	case Intrinsic::riscv_vsseg8_mask:
1950	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `4`,
1951	/IsStore/ true,
1952	/IsUnitStrided/ false);
1953	case Intrinsic::riscv_vssseg2:
1954	case Intrinsic::riscv_vssseg3:
1955	case Intrinsic::riscv_vssseg4:
1956	case Intrinsic::riscv_vssseg5:
1957	case Intrinsic::riscv_vssseg6:
1958	case Intrinsic::riscv_vssseg7:
1959	case Intrinsic::riscv_vssseg8:
1960	case Intrinsic::riscv_vsoxseg2:
1961	case Intrinsic::riscv_vsoxseg3:
1962	case Intrinsic::riscv_vsoxseg4:
1963	case Intrinsic::riscv_vsoxseg5:
1964	case Intrinsic::riscv_vsoxseg6:
1965	case Intrinsic::riscv_vsoxseg7:
1966	case Intrinsic::riscv_vsoxseg8:
1967	case Intrinsic::riscv_vsuxseg2:
1968	case Intrinsic::riscv_vsuxseg3:
1969	case Intrinsic::riscv_vsuxseg4:
1970	case Intrinsic::riscv_vsuxseg5:
1971	case Intrinsic::riscv_vsuxseg6:
1972	case Intrinsic::riscv_vsuxseg7:
1973	case Intrinsic::riscv_vsuxseg8:
1974	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `4`,
1975	/IsStore/ true,
1976	/IsUnitStrided/ false);
1977	case Intrinsic::riscv_vssseg2_mask:
1978	case Intrinsic::riscv_vssseg3_mask:
1979	case Intrinsic::riscv_vssseg4_mask:
1980	case Intrinsic::riscv_vssseg5_mask:
1981	case Intrinsic::riscv_vssseg6_mask:
1982	case Intrinsic::riscv_vssseg7_mask:
1983	case Intrinsic::riscv_vssseg8_mask:
1984	case Intrinsic::riscv_vsoxseg2_mask:
1985	case Intrinsic::riscv_vsoxseg3_mask:
1986	case Intrinsic::riscv_vsoxseg4_mask:
1987	case Intrinsic::riscv_vsoxseg5_mask:
1988	case Intrinsic::riscv_vsoxseg6_mask:
1989	case Intrinsic::riscv_vsoxseg7_mask:
1990	case Intrinsic::riscv_vsoxseg8_mask:
1991	case Intrinsic::riscv_vsuxseg2_mask:
1992	case Intrinsic::riscv_vsuxseg3_mask:
1993	case Intrinsic::riscv_vsuxseg4_mask:
1994	case Intrinsic::riscv_vsuxseg5_mask:
1995	case Intrinsic::riscv_vsuxseg6_mask:
1996	case Intrinsic::riscv_vsuxseg7_mask:
1997	case Intrinsic::riscv_vsuxseg8_mask:
1998	return SetRVVLoadStoreInfo (/PtrOp/ I.arg_size() - `5`,
1999	/IsStore/ true,
2000	/IsUnitStrided/ false);
2001	}
2002	}
2003
2004	bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
2005	const AddrMode &AM, Type *Ty,
2006	unsigned AS,
2007	Instruction I) const* {
2008	// No global is ever allowed as a base.
2009	if (AM.BaseGV)
2010	return false;
2011
2012	// None of our addressing modes allows a scalable offset
2013	if (AM.ScalableOffset)
2014	return false;
2015
2016	// RVV instructions only support register addressing.
2017	if (Subtarget.hasVInstructions() && isa<VectorType>(Val: Ty))
2018	return AM.HasBaseReg && AM.Scale == `0` && !AM.BaseOffs;
2019
2020	// Require a 12-bit signed offset.
2021	if (!isInt<`12`>(x: AM.BaseOffs))
2022	return false;
2023
2024	switch (AM.Scale) {
2025	case `0`: // "r+i" or just "i", depending on HasBaseReg.
2026	break;
2027	case `1`:
2028	if (!AM.HasBaseReg) // allow "r+i".
2029	break;
2030	return false; // disallow "r+r" or "r+r+i".
2031	default:
2032	return false;
2033	}
2034
2035	return true;
2036	}
2037
2038	bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
2039	return isInt<`12`>(x: Imm);
2040	}
2041
2042	bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
2043	return isInt<`12`>(x: Imm);
2044	}
2045
2046	// On RV32, 64-bit integers are split into their high and low parts and held
2047	// in two different registers, so the trunc is free since the low register can
2048	// just be used.
2049	// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2050	// isTruncateFree?
2051	bool RISCVTargetLowering::isTruncateFree(Type SrcTy, Type DstTy) const {
2052	if (Subtarget.is64Bit() \|\| !SrcTy->isIntegerTy() \|\| !DstTy->isIntegerTy())
2053	return false;
2054	unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2055	unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2056	return (SrcBits == `64` && DestBits == `32`);
2057	}
2058
2059	bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
2060	// We consider i64->i32 free on RV64 since we have good selection of W
2061	// instructions that make promoting operations back to i64 free in many cases.
2062	if (SrcVT.isVector() \|\| DstVT.isVector() \|\| !SrcVT.isInteger() \|\|
2063	!DstVT.isInteger())
2064	return false;
2065	unsigned SrcBits = SrcVT.getSizeInBits();
2066	unsigned DestBits = DstVT.getSizeInBits();
2067	return (SrcBits == `64` && DestBits == `32`);
2068	}
2069
2070	bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {
2071	EVT SrcVT = Val.getValueType();
2072	// free truncate from vnsrl and vnsra
2073	if (Subtarget.hasVInstructions() &&
2074	(Val.getOpcode() == ISD::SRL \|\| Val.getOpcode() == ISD::SRA) &&
2075	SrcVT.isVector() && VT2.isVector()) {
2076	unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2077	unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2078	if (SrcBits == DestBits * `2`) {
2079	return true;
2080	}
2081	}
2082	return TargetLowering::isTruncateFree(Val, VT2);
2083	}
2084
2085	bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
2086	// Zexts are free if they can be combined with a load.
2087	// Don't advertise i32->i64 zextload as being free for RV64. It interacts
2088	// poorly with type legalization of compares preferring sext.
2089	if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2090	EVT MemVT = LD->getMemoryVT();
2091	if ((MemVT == MVT::i8 \|\| MemVT == MVT::i16) &&
2092	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
2093	LD->getExtensionType() == ISD::ZEXTLOAD))
2094	return true;
2095	}
2096
2097	return TargetLowering::isZExtFree(Val, VT2);
2098	}
2099
2100	bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
2101	return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2102	}
2103
2104	bool RISCVTargetLowering::signExtendConstant(const ConstantInt CI) const* {
2105	return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: `32`);
2106	}
2107
2108	bool RISCVTargetLowering::isCheapToSpeculateCttz(Type Ty) const* {
2109	return Subtarget.hasStdExtZbb() \|\|
2110	(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2111	}
2112
2113	bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type Ty) const* {
2114	return Subtarget.hasStdExtZbb() \|\| Subtarget.hasVendorXTHeadBb() \|\|
2115	(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2116	}
2117
2118	bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
2119	const Instruction &AndI) const {
2120	// We expect to be able to match a bit extraction instruction if the Zbs
2121	// extension is supported and the mask is a power of two. However, we
2122	// conservatively return false if the mask would fit in an ANDI instruction,
2123	// on the basis that it's possible the sinking+duplication of the AND in
2124	// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2125	// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2126	if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2127	return false;
2128	ConstantInt *Mask = dyn_cast<ConstantInt>(Val: AndI.getOperand(i: `1`));
2129	if (!Mask)
2130	return false;
2131	return !Mask->getValue().isSignedIntN(N: `12`) && Mask->getValue().isPowerOf2();
2132	}
2133
2134	bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
2135	EVT VT = Y.getValueType();
2136
2137	if (VT.isVector())
2138	return false;
2139
2140	return (Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()) &&
2141	(!isa<ConstantSDNode>(Val: Y) \|\| cast<ConstantSDNode>(Val&: Y)->isOpaque());
2142	}
2143
2144	bool RISCVTargetLowering::hasAndNot(SDValue Y) const {
2145	EVT VT = Y.getValueType();
2146
2147	if (!VT.isVector())
2148	return hasAndNotCompare(Y);
2149
2150	return Subtarget.hasStdExtZvkb();
2151	}
2152
2153	bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2154	// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2155	if (Subtarget.hasStdExtZbs())
2156	return X.getValueType().isScalarInteger();
2157	auto *C = dyn_cast<ConstantSDNode>(Val&: Y);
2158	// XTheadBs provides th.tst (similar to bexti), if Y is a constant
2159	if (Subtarget.hasVendorXTHeadBs())
2160	return C != nullptr;
2161	// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2162	return C && C->getAPIntValue().ule(RHS: `10`);
2163	}
2164
2165	bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(
2166	unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2167	SDValue Y) const {
2168	if (SelectOpcode != ISD::VSELECT)
2169	return false;
2170
2171	// Only enable for rvv.
2172	if (!VT.isVector() \|\| !Subtarget.hasVInstructions())
2173	return false;
2174
2175	if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2176	return false;
2177
2178	return true;
2179	}
2180
2181	bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
2182	Type Ty) const* {
2183	assert(Ty->isIntegerTy());
2184
2185	unsigned BitSize = Ty->getIntegerBitWidth();
2186	if (BitSize > Subtarget.getXLen())
2187	return false;
2188
2189	// Fast path, assume 32-bit immediates are cheap.
2190	int64_t Val = Imm.getSExtValue();
2191	if (isInt<`32`>(x: Val))
2192	return true;
2193
2194	// A constant pool entry may be more aligned than the load we're trying to
2195	// replace. If we don't support unaligned scalar mem, prefer the constant
2196	// pool.
2197	// TODO: Can the caller pass down the alignment?
2198	if (!Subtarget.enableUnalignedScalarMem())
2199	return true;
2200
2201	// Prefer to keep the load if it would require many instructions.
2202	// This uses the same threshold we use for constant pools but doesn't
2203	// check useConstantPoolForLargeInts.
2204	// TODO: Should we keep the load only when we're definitely going to emit a
2205	// constant pool?
2206
2207	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI: Subtarget);
2208	return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2209	}
2210
2211	bool RISCVTargetLowering::
2212	shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2213	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
2214	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2215	SelectionDAG &DAG) const {
2216	// One interesting pattern that we'd want to form is 'bit extract':
2217	// ((1 >> Y) & 1) ==/!= 0
2218	// But we also need to be careful not to try to reverse that fold.
2219
2220	// Is this '((1 >> Y) & 1)'?
2221	if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2222	return false; // Keep the 'bit extract' pattern.
2223
2224	// Will this be '((1 >> Y) & 1)' after the transform?
2225	if (NewShiftOpcode == ISD::SRL && CC->isOne())
2226	return true; // Do form the 'bit extract' pattern.
2227
2228	// If 'X' is a constant, and we transform, then we will immediately
2229	// try to undo the fold, thus causing endless combine loop.
2230	// So only do the transform if X is not a constant. This matches the default
2231	// implementation of this function.
2232	return !XC;
2233	}
2234
2235	bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
2236	unsigned Opc = VecOp.getOpcode();
2237
2238	// Assume target opcodes can't be scalarized.
2239	// TODO - do we have any exceptions?
2240	if (Opc >= ISD::BUILTIN_OP_END \|\| !isBinOp(Opcode: Opc))
2241	return false;
2242
2243	// If the vector op is not supported, try to convert to scalar.
2244	EVT VecVT = VecOp.getValueType();
2245	if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
2246	return true;
2247
2248	// If the vector op is supported, but the scalar op is not, the transform may
2249	// not be worthwhile.
2250	// Permit a vector binary operation can be converted to scalar binary
2251	// operation which is custom lowered with illegal type.
2252	EVT ScalarVT = VecVT.getScalarType();
2253	return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT) \|\|
2254	isOperationCustom(Op: Opc, VT: ScalarVT);
2255	}
2256
2257	bool RISCVTargetLowering::isOffsetFoldingLegal(
2258	const GlobalAddressSDNode GA) const* {
2259	// In order to maximise the opportunity for common subexpression elimination,
2260	// keep a separate ADD node for the global address offset instead of folding
2261	// it in the global address node. Later peephole optimisations may choose to
2262	// fold it back in when profitable.
2263	return false;
2264	}
2265
2266	// Returns 0-31 if the fli instruction is available for the type and this is
2267	// legal FP immediate for the type. Returns -1 otherwise.
2268	int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
2269	if (!Subtarget.hasStdExtZfa())
2270	return -`1`;
2271
2272	bool IsSupportedVT = false;
2273	if (VT == MVT::f16) {
2274	IsSupportedVT = Subtarget.hasStdExtZfh() \|\| Subtarget.hasStdExtZvfh();
2275	} else if (VT == MVT::f32) {
2276	IsSupportedVT = true;
2277	} else if (VT == MVT::f64) {
2278	assert(Subtarget.hasStdExtD() && "Expect D extension");
2279	IsSupportedVT = true;
2280	}
2281
2282	if (!IsSupportedVT)
2283	return -`1`;
2284
2285	return RISCVLoadFPImm::getLoadFPImm(FPImm: Imm);
2286	}
2287
2288	bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
2289	bool ForCodeSize) const {
2290	bool IsLegalVT = false;
2291	if (VT == MVT::f16)
2292	IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2293	else if (VT == MVT::f32)
2294	IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2295	else if (VT == MVT::f64)
2296	IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2297	else if (VT == MVT::bf16)
2298	IsLegalVT = Subtarget.hasStdExtZfbfmin();
2299
2300	if (!IsLegalVT)
2301	return false;
2302
2303	if (getLegalZfaFPImm(Imm, VT) >= `0`)
2304	return true;
2305
2306	// Cannot create a 64 bit floating-point immediate value for rv32.
2307	if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2308	// td can handle +0.0 or -0.0 already.
2309	// -0.0 can be created by fmv + fneg.
2310	return Imm.isZero();
2311	}
2312
2313	// Special case: fmv + fneg
2314	if (Imm.isNegZero())
2315	return true;
2316
2317	// Building an integer and then converting requires a fmv at the end of
2318	// the integer sequence. The fmv is not required for Zfinx.
2319	const int FmvCost = Subtarget.hasStdExtZfinx() ? `0` : `1`;
2320	const int Cost =
2321	FmvCost + RISCVMatInt::getIntMatCost(Val: Imm.bitcastToAPInt(),
2322	Size: Subtarget.getXLen(), STI: Subtarget);
2323	return Cost <= FPImmCost;
2324	}
2325
2326	// TODO: This is very conservative.
2327	bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2328	unsigned Index) const {
2329	if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
2330	return false;
2331
2332	// Extracts from index 0 are just subreg extracts.
2333	if (Index == `0`)
2334	return true;
2335
2336	// Only support extracting a fixed from a fixed vector for now.
2337	if (ResVT.isScalableVector() \|\| SrcVT.isScalableVector())
2338	return false;
2339
2340	EVT EltVT = ResVT.getVectorElementType();
2341	assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2342
2343	// The smallest type we can slide is i8.
2344	// TODO: We can extract index 0 from a mask vector without a slide.
2345	if (EltVT == MVT::i1)
2346	return false;
2347
2348	unsigned ResElts = ResVT.getVectorNumElements();
2349	unsigned SrcElts = SrcVT.getVectorNumElements();
2350
2351	unsigned MinVLen = Subtarget.getRealMinVLen();
2352	unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2353
2354	// If we're extracting only data from the first VLEN bits of the source
2355	// then we can always do this with an m1 vslidedown.vx. Restricting the
2356	// Index ensures we can use a vslidedown.vi.
2357	// TODO: We can generalize this when the exact VLEN is known.
2358	if (Index + ResElts <= MinVLMAX && Index < `31`)
2359	return true;
2360
2361	// Convervatively only handle extracting half of a vector.
2362	// TODO: We can do arbitrary slidedowns, but for now only support extracting
2363	// the upper half of a vector until we have more test coverage.
2364	// TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2365	// a cheap extract. However, this case is important in practice for
2366	// shuffled extracts of longer vectors. How resolve?
2367	return (ResElts * `2`) == SrcElts && (Index == `0` \|\| Index == ResElts);
2368	}
2369
2370	MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2371	CallingConv::ID CC,
2372	EVT VT) const {
2373	// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2374	// We might still end up using a GPR but that will be decided based on ABI.
2375	if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2376	!Subtarget.hasStdExtZfhminOrZhinxmin())
2377	return MVT::f32;
2378
2379	MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2380
2381	return PartVT;
2382	}
2383
2384	unsigned
2385	RISCVTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT,
2386	std::optional<MVT> RegisterVT) const {
2387	// Pair inline assembly operand
2388	if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2389	*RegisterVT == MVT::Untyped)
2390	return `1`;
2391
2392	return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2393	}
2394
2395	unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2396	CallingConv::ID CC,
2397	EVT VT) const {
2398	// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2399	// We might still end up using a GPR but that will be decided based on ABI.
2400	if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2401	!Subtarget.hasStdExtZfhminOrZhinxmin())
2402	return `1`;
2403
2404	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2405	}
2406
2407	unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(
2408	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2409	unsigned &NumIntermediates, MVT &RegisterVT) const {
2410	unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(
2411	Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2412
2413	return NumRegs;
2414	}
2415
2416	// Changes the condition code and swaps operands if necessary, so the SetCC
2417	// operation matches one of the comparisons supported directly by branches
2418	// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2419	// with 1/-1.
2420	static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2421	ISD::CondCode &CC, SelectionDAG &DAG,
2422	const RISCVSubtarget &Subtarget) {
2423	// If this is a single bit test that can't be handled by ANDI, shift the
2424	// bit to be tested to the MSB and perform a signed compare with 0.
2425	if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
2426	LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2427	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) &&
2428	// XAndesPerf supports branch on test bit.
2429	!Subtarget.hasVendorXAndesPerf()) {
2430	uint64_t Mask = LHS.getConstantOperandVal(i: `1`);
2431	if ((isPowerOf2_64(Value: Mask) \|\| isMask_64(Value: Mask)) && !isInt<`12`>(x: Mask)) {
2432	unsigned ShAmt = `0`;
2433	if (isPowerOf2_64(Value: Mask)) {
2434	CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2435	ShAmt = LHS.getValueSizeInBits() - `1` - Log2_64(Value: Mask);
2436	} else {
2437	ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
2438	}
2439
2440	LHS = LHS.getOperand(i: `0`);
2441	if (ShAmt != `0`)
2442	LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
2443	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
2444	return;
2445	}
2446	}
2447
2448	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
2449	int64_t C = RHSC->getSExtValue();
2450	switch (CC) {
2451	default: break;
2452	case ISD::SETGT:
2453	// Convert X > -1 to X >= 0.
2454	if (C == -`1`) {
2455	RHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
2456	CC = ISD::SETGE;
2457	return;
2458	}
2459	if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<`16`>(x: C + `1`)) {
2460	// We have a branch immediate instruction for SETGE but not SETGT.
2461	// Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2462	RHS = DAG.getSignedConstant(Val: C + `1`, DL, VT: RHS.getValueType());
2463	CC = ISD::SETGE;
2464	return;
2465	}
2466	break;
2467	case ISD::SETLT:
2468	// Convert X < 1 to 0 >= X.
2469	if (C == `1`) {
2470	RHS = LHS;
2471	LHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
2472	CC = ISD::SETGE;
2473	return;
2474	}
2475	break;
2476	case ISD::SETUGT:
2477	if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<`16`>(x: C + `1`) &&
2478	C != -`1`) {
2479	// We have a branch immediate instruction for SETUGE but not SETUGT.
2480	// Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2481	RHS = DAG.getSignedConstant(Val: C + `1`, DL, VT: RHS.getValueType());
2482	CC = ISD::SETUGE;
2483	return;
2484	}
2485	break;
2486	}
2487	}
2488
2489	switch (CC) {
2490	default:
2491	break;
2492	case ISD::SETGT:
2493	case ISD::SETLE:
2494	case ISD::SETUGT:
2495	case ISD::SETULE:
2496	CC = ISD::getSetCCSwappedOperands(Operation: CC);
2497	std::swap(a&: LHS, b&: RHS);
2498	break;
2499	}
2500	}
2501
2502	RISCVVType::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2503	if (VT.isRISCVVectorTuple()) {
2504	if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2505	VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2506	return RISCVVType::LMUL_F8;
2507	if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2508	VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2509	return RISCVVType::LMUL_F4;
2510	if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2511	VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2512	return RISCVVType::LMUL_F2;
2513	if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2514	VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2515	return RISCVVType::LMUL_1;
2516	if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2517	VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2518	return RISCVVType::LMUL_2;
2519	if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2520	return RISCVVType::LMUL_4;
2521	llvm_unreachable("Invalid vector tuple type LMUL.");
2522	}
2523
2524	assert(VT.isScalableVector() && "Expecting a scalable vector type");
2525	unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2526	if (VT.getVectorElementType() == MVT::i1)
2527	KnownSize *= `8`;
2528
2529	switch (KnownSize) {
2530	default:
2531	llvm_unreachable("Invalid LMUL.");
2532	case `8`:
2533	return RISCVVType::LMUL_F8;
2534	case `16`:
2535	return RISCVVType::LMUL_F4;
2536	case `32`:
2537	return RISCVVType::LMUL_F2;
2538	case `64`:
2539	return RISCVVType::LMUL_1;
2540	case `128`:
2541	return RISCVVType::LMUL_2;
2542	case `256`:
2543	return RISCVVType::LMUL_4;
2544	case `512`:
2545	return RISCVVType::LMUL_8;
2546	}
2547	}
2548
2549	unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVVType::VLMUL LMul) {
2550	switch (LMul) {
2551	default:
2552	llvm_unreachable("Invalid LMUL.");
2553	case RISCVVType::LMUL_F8:
2554	case RISCVVType::LMUL_F4:
2555	case RISCVVType::LMUL_F2:
2556	case RISCVVType::LMUL_1:
2557	return RISCV::VRRegClassID;
2558	case RISCVVType::LMUL_2:
2559	return RISCV::VRM2RegClassID;
2560	case RISCVVType::LMUL_4:
2561	return RISCV::VRM4RegClassID;
2562	case RISCVVType::LMUL_8:
2563	return RISCV::VRM8RegClassID;
2564	}
2565	}
2566
2567	unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2568	RISCVVType::VLMUL LMUL = getLMUL(VT);
2569	if (LMUL == RISCVVType::LMUL_F8 \|\| LMUL == RISCVVType::LMUL_F4 \|\|
2570	LMUL == RISCVVType::LMUL_F2 \|\| LMUL == RISCVVType::LMUL_1) {
2571	static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + `7`,
2572	"Unexpected subreg numbering");
2573	return RISCV::sub_vrm1_0 + Index;
2574	}
2575	if (LMUL == RISCVVType::LMUL_2) {
2576	static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + `3`,
2577	"Unexpected subreg numbering");
2578	return RISCV::sub_vrm2_0 + Index;
2579	}
2580	if (LMUL == RISCVVType::LMUL_4) {
2581	static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + `1`,
2582	"Unexpected subreg numbering");
2583	return RISCV::sub_vrm4_0 + Index;
2584	}
2585	llvm_unreachable("Invalid vector type.");
2586	}
2587
2588	unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2589	if (VT.isRISCVVectorTuple()) {
2590	unsigned NF = VT.getRISCVVectorTupleNumFields();
2591	unsigned RegsPerField =
2592	std::max(a: `1U`, b: (unsigned)VT.getSizeInBits().getKnownMinValue() /
2593	(NF * RISCV::RVVBitsPerBlock));
2594	switch (RegsPerField) {
2595	case `1`:
2596	if (NF == `2`)
2597	return RISCV::VRN2M1RegClassID;
2598	if (NF == `3`)
2599	return RISCV::VRN3M1RegClassID;
2600	if (NF == `4`)
2601	return RISCV::VRN4M1RegClassID;
2602	if (NF == `5`)
2603	return RISCV::VRN5M1RegClassID;
2604	if (NF == `6`)
2605	return RISCV::VRN6M1RegClassID;
2606	if (NF == `7`)
2607	return RISCV::VRN7M1RegClassID;
2608	if (NF == `8`)
2609	return RISCV::VRN8M1RegClassID;
2610	break;
2611	case `2`:
2612	if (NF == `2`)
2613	return RISCV::VRN2M2RegClassID;
2614	if (NF == `3`)
2615	return RISCV::VRN3M2RegClassID;
2616	if (NF == `4`)
2617	return RISCV::VRN4M2RegClassID;
2618	break;
2619	case `4`:
2620	assert(NF == `2`);
2621	return RISCV::VRN2M4RegClassID;
2622	default:
2623	break;
2624	}
2625	llvm_unreachable("Invalid vector tuple type RegClass.");
2626	}
2627
2628	if (VT.getVectorElementType() == MVT::i1)
2629	return RISCV::VRRegClassID;
2630	return getRegClassIDForLMUL(LMul: getLMUL(VT));
2631	}
2632
2633	// Attempt to decompose a subvector insert/extract between VecVT and
2634	// SubVecVT via subregister indices. Returns the subregister index that
2635	// can perform the subvector insert/extract with the given element index, as
2636	// well as the index corresponding to any leftover subvectors that must be
2637	// further inserted/extracted within the register class for SubVecVT.
2638	std::pair<unsigned, unsigned>
2639	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2640	MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2641	const RISCVRegisterInfo *TRI) {
2642	static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2643	RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2644	RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2645	"Register classes not ordered");
2646	unsigned VecRegClassID = getRegClassIDForVecVT(VT: VecVT);
2647	unsigned SubRegClassID = getRegClassIDForVecVT(VT: SubVecVT);
2648
2649	// If VecVT is a vector tuple type, either it's the tuple type with same
2650	// RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2651	if (VecVT.isRISCVVectorTuple()) {
2652	if (VecRegClassID == SubRegClassID)
2653	return {RISCV::NoSubRegister, `0`};
2654
2655	assert(SubVecVT.isScalableVector() &&
2656	"Only allow scalable vector subvector.");
2657	assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2658	"Invalid vector tuple insert/extract for vector and subvector with "
2659	"different LMUL.");
2660	return {getSubregIndexByMVT(VT: VecVT, Index: InsertExtractIdx), `0`};
2661	}
2662
2663	// Try to compose a subregister index that takes us from the incoming
2664	// LMUL>1 register class down to the outgoing one. At each step we half
2665	// the LMUL:
2666	// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2667	// Note that this is not guaranteed to find a subregister index, such as
2668	// when we are extracting from one VR type to another.
2669	unsigned SubRegIdx = RISCV::NoSubRegister;
2670	for (const unsigned RCID :
2671	{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2672	if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2673	VecVT = VecVT.getHalfNumVectorElementsVT();
2674	bool IsHi =
2675	InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2676	SubRegIdx = TRI->composeSubRegIndices(a: SubRegIdx,
2677	b: getSubregIndexByMVT(VT: VecVT, Index: IsHi));
2678	if (IsHi)
2679	InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2680	}
2681	return {SubRegIdx, InsertExtractIdx};
2682	}
2683
2684	// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2685	// stores for those types.
2686	bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2687	return !Subtarget.useRVVForFixedLengthVectors() \|\|
2688	(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2689	}
2690
2691	bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2692	if (!ScalarTy.isSimple())
2693	return false;
2694	switch (ScalarTy.getSimpleVT().SimpleTy) {
2695	case MVT::iPTR:
2696	return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2697	case MVT::i8:
2698	case MVT::i16:
2699	case MVT::i32:
2700	return true;
2701	case MVT::i64:
2702	return Subtarget.hasVInstructionsI64();
2703	case MVT::f16:
2704	return Subtarget.hasVInstructionsF16Minimal();
2705	case MVT::bf16:
2706	return Subtarget.hasVInstructionsBF16Minimal();
2707	case MVT::f32:
2708	return Subtarget.hasVInstructionsF32();
2709	case MVT::f64:
2710	return Subtarget.hasVInstructionsF64();
2711	default:
2712	return false;
2713	}
2714	}
2715
2716
2717	unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2718	return NumRepeatedDivisors;
2719	}
2720
2721	static SDValue getVLOperand(SDValue Op) {
2722	assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
2723	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2724	"Unexpected opcode");
2725	bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2726	unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? `1` : `0`);
2727	const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2728	RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID: IntNo);
2729	if (!II)
2730	return SDValue ();
2731	return Op.getOperand(i: II->VLOperand + `1` + HasChain);
2732	}
2733
2734	static bool useRVVForFixedLengthVectorVT(MVT VT,
2735	const RISCVSubtarget &Subtarget) {
2736	assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2737	if (!Subtarget.useRVVForFixedLengthVectors())
2738	return false;
2739
2740	// We only support a set of vector types with a consistent maximum fixed size
2741	// across all supported vector element types to avoid legalization issues.
2742	// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2743	// fixed-length vector type we support is 1024 bytes.
2744	if (VT.getVectorNumElements() > `1024` \|\| VT.getFixedSizeInBits() > `1024` * `8`)
2745	return false;
2746
2747	unsigned MinVLen = Subtarget.getRealMinVLen();
2748
2749	MVT EltVT = VT.getVectorElementType();
2750
2751	// Don't use RVV for vectors we cannot scalarize if required.
2752	switch (EltVT.SimpleTy) {
2753	// i1 is supported but has different rules.
2754	default:
2755	return false;
2756	case MVT::i1:
2757	// Masks can only use a single register.
2758	if (VT.getVectorNumElements() > MinVLen)
2759	return false;
2760	MinVLen /= `8`;
2761	break;
2762	case MVT::i8:
2763	case MVT::i16:
2764	case MVT::i32:
2765	break;
2766	case MVT::i64:
2767	if (!Subtarget.hasVInstructionsI64())
2768	return false;
2769	break;
2770	case MVT::f16:
2771	if (!Subtarget.hasVInstructionsF16Minimal())
2772	return false;
2773	break;
2774	case MVT::bf16:
2775	if (!Subtarget.hasVInstructionsBF16Minimal())
2776	return false;
2777	break;
2778	case MVT::f32:
2779	if (!Subtarget.hasVInstructionsF32())
2780	return false;
2781	break;
2782	case MVT::f64:
2783	if (!Subtarget.hasVInstructionsF64())
2784	return false;
2785	break;
2786	}
2787
2788	// Reject elements larger than ELEN.
2789	if (EltVT.getSizeInBits() > Subtarget.getELen())
2790	return false;
2791
2792	unsigned LMul = divideCeil(Numerator: VT.getSizeInBits(), Denominator: MinVLen);
2793	// Don't use RVV for types that don't fit.
2794	if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2795	return false;
2796
2797	// TODO: Perhaps an artificial restriction, but worth having whilst getting
2798	// the base fixed length RVV support in place.
2799	if (!VT.isPow2VectorType())
2800	return false;
2801
2802	return true;
2803	}
2804
2805	bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2806	return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2807	}
2808
2809	// Return the largest legal scalable vector type that matches VT's element type.
2810	static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2811	const RISCVSubtarget &Subtarget) {
2812	// This may be called before legal types are setup.
2813	assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) \|\|
2814	useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2815	"Expected legal fixed length vector!");
2816
2817	unsigned MinVLen = Subtarget.getRealMinVLen();
2818	unsigned MaxELen = Subtarget.getELen();
2819
2820	MVT EltVT = VT.getVectorElementType();
2821	switch (EltVT.SimpleTy) {
2822	default:
2823	llvm_unreachable("unexpected element type for RVV container");
2824	case MVT::i1:
2825	case MVT::i8:
2826	case MVT::i16:
2827	case MVT::i32:
2828	case MVT::i64:
2829	case MVT::bf16:
2830	case MVT::f16:
2831	case MVT::f32:
2832	case MVT::f64: {
2833	// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2834	// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2835	// each fractional LMUL we support SEW between 8 and LMULELEN.*
2836	unsigned NumElts =
2837	(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2838	NumElts = std::max(a: NumElts, b: RISCV::RVVBitsPerBlock / MaxELen);
2839	assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2840	return MVT::getScalableVectorVT(VT: EltVT, NumElements: NumElts);
2841	}
2842	}
2843	}
2844
2845	static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2846	const RISCVSubtarget &Subtarget) {
2847	return getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT,
2848	Subtarget);
2849	}
2850
2851	MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2852	return ::getContainerForFixedLengthVector(TLI: *this, VT, Subtarget: getSubtarget());
2853	}
2854
2855	// Grow V to consume an entire RVV register.
2856	static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2857	const RISCVSubtarget &Subtarget) {
2858	assert(VT.isScalableVector() &&
2859	"Expected to convert into a scalable vector!");
2860	assert(V.getValueType().isFixedLengthVector() &&
2861	"Expected a fixed length vector operand!");
2862	SDLoc DL(V);
2863	return DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT), SubVec: V, Idx: `0`);
2864	}
2865
2866	// Shrink V so it's just big enough to maintain a VT's worth of data.
2867	static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2868	const RISCVSubtarget &Subtarget) {
2869	assert(VT.isFixedLengthVector() &&
2870	"Expected to convert into a fixed length vector!");
2871	assert(V.getValueType().isScalableVector() &&
2872	"Expected a scalable vector operand!");
2873	SDLoc DL(V);
2874	return DAG.getExtractSubvector(DL, VT, Vec: V, Idx: `0`);
2875	}
2876
2877	/// Return the type of the mask type suitable for masking the provided
2878	/// vector type. This is simply an i1 element type vector of the same
2879	/// (possibly scalable) length.
2880	static MVT getMaskTypeFor(MVT VecVT) {
2881	assert(VecVT.isVector());
2882	ElementCount EC = VecVT.getVectorElementCount();
2883	return MVT::getVectorVT(VT: MVT::i1, EC);
2884	}
2885
2886	/// Creates an all ones mask suitable for masking a vector of type VecTy with
2887	/// vector length VL. .
2888	static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2889	SelectionDAG &DAG) {
2890	MVT MaskVT = getMaskTypeFor(VecVT);
2891	return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: MaskVT, Operand: VL);
2892	}
2893
2894	static std::pair<SDValue, SDValue>
2895	getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2896	const RISCVSubtarget &Subtarget) {
2897	assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2898	SDValue VL = DAG.getRegister(Reg: RISCV::X0, VT: Subtarget.getXLenVT());
2899	SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2900	return {Mask, VL};
2901	}
2902
2903	static std::pair<SDValue, SDValue>
2904	getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2905	SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2906	assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2907	SDValue VL = DAG.getConstant(Val: NumElts, DL, VT: Subtarget.getXLenVT());
2908	SDValue Mask = getAllOnesMask(VecVT: ContainerVT, VL, DL, DAG);
2909	return {Mask, VL};
2910	}
2911
2912	// Gets the two common "VL" operands: an all-ones mask and the vector length.
2913	// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2914	// the vector type that the fixed-length vector is contained in. Otherwise if
2915	// VecVT is scalable, then ContainerVT should be the same as VecVT.
2916	static std::pair<SDValue, SDValue>
2917	getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2918	const RISCVSubtarget &Subtarget) {
2919	if (VecVT.isFixedLengthVector())
2920	return getDefaultVLOps(NumElts: VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2921	Subtarget);
2922	assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2923	return getDefaultScalableVLOps(VecVT: ContainerVT, DL, DAG, Subtarget);
2924	}
2925
2926	SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2927	SelectionDAG &DAG) const {
2928	assert(VecVT.isScalableVector() && "Expected scalable vector");
2929	return DAG.getElementCount(DL, VT: Subtarget.getXLenVT(),
2930	EC: VecVT.getVectorElementCount());
2931	}
2932
2933	std::pair<unsigned, unsigned>
2934	RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,
2935	const RISCVSubtarget &Subtarget) {
2936	assert(VecVT.isScalableVector() && "Expected scalable vector");
2937
2938	unsigned EltSize = VecVT.getScalarSizeInBits();
2939	unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2940
2941	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2942	unsigned MaxVLMAX =
2943	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
2944
2945	unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2946	unsigned MinVLMAX =
2947	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMin, EltSize, MinSize);
2948
2949	return std::make_pair(x&: MinVLMAX, y&: MaxVLMAX);
2950	}
2951
2952	// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2953	// of either is (currently) supported. This can get us into an infinite loop
2954	// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2955	// as a ..., etc.
2956	// Until either (or both) of these can reliably lower any node, reporting that
2957	// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2958	// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2959	// which is not desirable.
2960	bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2961	EVT VT, unsigned DefinedValues) const {
2962	return false;
2963	}
2964
2965	InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2966	// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2967	// implementation-defined.
2968	if (!VT.isVector())
2969	return InstructionCost::getInvalid();
2970	unsigned DLenFactor = Subtarget.getDLenFactor();
2971	unsigned Cost;
2972	if (VT.isScalableVector()) {
2973	unsigned LMul;
2974	bool Fractional;
2975	std::tie(args&: LMul, args&: Fractional) =
2976	RISCVVType::decodeVLMUL(VLMul: RISCVTargetLowering::getLMUL(VT));
2977	if (Fractional)
2978	Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : `1`;
2979	else
2980	Cost = (LMul * DLenFactor);
2981	} else {
2982	Cost = divideCeil(Numerator: VT.getSizeInBits(), Denominator: Subtarget.getRealMinVLen() / DLenFactor);
2983	}
2984	return Cost;
2985	}
2986
2987
2988	/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2989	/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
2990	/// be by default. VRGatherCostModel reflects available options. Note that
2991	/// operand (index and possibly mask) are handled separately.
2992	InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2993	auto LMULCost = getLMULCost(VT);
2994	bool Log2CostModel =
2995	Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
2996	if (Log2CostModel && LMULCost.isValid()) {
2997	unsigned Log = Log2_64(Value: LMULCost.getValue());
2998	if (Log > `0`)
2999	return LMULCost * Log;
3000	}
3001	return LMULCost * LMULCost;
3002	}
3003
3004	/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3005	/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3006	/// or may track the vrgather.vv cost. It is implementation-dependent.
3007	InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
3008	return getLMULCost(VT);
3009	}
3010
3011	/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3012	/// for the type VT. (This does not cover the vslide1up or vslide1down
3013	/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3014	/// or may track the vrgather.vv cost. It is implementation-dependent.
3015	InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
3016	return getLMULCost(VT);
3017	}
3018
3019	/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3020	/// for the type VT. (This does not cover the vslide1up or vslide1down
3021	/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3022	/// or may track the vrgather.vv cost. It is implementation-dependent.
3023	InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
3024	return getLMULCost(VT);
3025	}
3026
3027	static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
3028	const RISCVSubtarget &Subtarget) {
3029	// f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3030	// bf16 conversions are always promoted to f32.
3031	if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) \|\|
3032	Op.getValueType() == MVT::bf16) {
3033	bool IsStrict = Op ->isStrictFPOpcode();
3034
3035	SDLoc DL(Op);
3036	if (IsStrict) {
3037	SDValue Val = DAG.getNode(Opcode: Op.getOpcode(), DL, ResultTys: {MVT::f32, MVT::Other},
3038	Ops: {Op.getOperand(i: `0`), Op.getOperand(i: `1`)});
3039	return DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL,
3040	ResultTys: {Op.getValueType(), MVT::Other},
3041	Ops: {Val.getValue(R: `1`), Val.getValue(R: `0`),
3042	DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true)});
3043	}
3044	return DAG.getNode(
3045	Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(),
3046	N1: DAG.getNode(Opcode: Op.getOpcode(), DL, VT: MVT::f32, Operand: Op.getOperand(i: `0`)),
3047	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
3048	}
3049
3050	// Other operations are legal.
3051	return Op;
3052	}
3053
3054	static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
3055	const RISCVSubtarget &Subtarget) {
3056	// RISC-V FP-to-int conversions saturate to the destination register size, but
3057	// don't produce 0 for nan. We can use a conversion instruction and fix the
3058	// nan case with a compare and a select.
3059	SDValue Src = Op.getOperand(i: `0`);
3060
3061	MVT DstVT = Op.getSimpleValueType();
3062	EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
3063
3064	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3065
3066	if (!DstVT.isVector()) {
3067	// For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3068	// the result.
3069	if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) \|\|
3070	Src.getValueType() == MVT::bf16) {
3071	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: SDLoc (Op), VT: MVT::f32, Operand: Src);
3072	}
3073
3074	unsigned Opc;
3075	if (SatVT == DstVT)
3076	Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3077	else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3078	Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3079	else
3080	return SDValue ();
3081	// FIXME: Support other SatVTs by clamping before or after the conversion.
3082
3083	SDLoc DL(Op);
3084	SDValue FpToInt = DAG.getNode(
3085	Opcode: Opc, DL, VT: DstVT, N1: Src,
3086	N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT()));
3087
3088	if (Opc == RISCVISD::FCVT_WU_RV64)
3089	FpToInt = DAG.getZeroExtendInReg(Op: FpToInt, DL, VT: MVT::i32);
3090
3091	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL, VT: DstVT);
3092	return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt,
3093	Cond: ISD::CondCode::SETUO);
3094	}
3095
3096	// Vectors.
3097
3098	MVT DstEltVT = DstVT.getVectorElementType();
3099	MVT SrcVT = Src.getSimpleValueType();
3100	MVT SrcEltVT = SrcVT.getVectorElementType();
3101	unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3102	unsigned DstEltSize = DstEltVT.getSizeInBits();
3103
3104	// Only handle saturating to the destination type.
3105	if (SatVT != DstEltVT)
3106	return SDValue ();
3107
3108	MVT DstContainerVT = DstVT;
3109	MVT SrcContainerVT = SrcVT;
3110	if (DstVT.isFixedLengthVector()) {
3111	DstContainerVT = getContainerForFixedLengthVector(DAG, VT: DstVT, Subtarget);
3112	SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
3113	assert(DstContainerVT.getVectorElementCount() ==
3114	SrcContainerVT.getVectorElementCount() &&
3115	"Expected same element count");
3116	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
3117	}
3118
3119	SDLoc DL(Op);
3120
3121	auto [Mask, VL] = getDefaultVLOps(VecVT: DstVT, ContainerVT: DstContainerVT, DL, DAG, Subtarget);
3122
3123	SDValue IsNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
3124	Ops: {Src, Src, DAG.getCondCode(Cond: ISD::SETNE),
3125	DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL});
3126
3127	// Need to widen by more than 1 step, promote the FP type, then do a widening
3128	// convert.
3129	if (DstEltSize > (`2` * SrcEltSize)) {
3130	assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3131	MVT InterVT = SrcContainerVT.changeVectorElementType(EltVT: MVT::f32);
3132	Src = DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL);
3133	}
3134
3135	MVT CvtContainerVT = DstContainerVT;
3136	MVT CvtEltVT = DstEltVT;
3137	if (SrcEltSize > (`2` * DstEltSize)) {
3138	CvtEltVT = MVT::getIntegerVT(BitWidth: SrcEltVT.getSizeInBits() / `2`);
3139	CvtContainerVT = CvtContainerVT.changeVectorElementType(EltVT: CvtEltVT);
3140	}
3141
3142	unsigned RVVOpc =
3143	IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3144	SDValue Res = DAG.getNode(Opcode: RVVOpc, DL, VT: CvtContainerVT, N1: Src, N2: Mask, N3: VL);
3145
3146	while (CvtContainerVT != DstContainerVT) {
3147	CvtEltVT = MVT::getIntegerVT(BitWidth: CvtEltVT.getSizeInBits() / `2`);
3148	CvtContainerVT = CvtContainerVT.changeVectorElementType(EltVT: CvtEltVT);
3149	// Rounding mode here is arbitrary since we aren't shifting out any bits.
3150	unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3151	: RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3152	Res = DAG.getNode(Opcode: ClipOpc, DL, VT: CvtContainerVT, N1: Res, N2: Mask, N3: VL);
3153	}
3154
3155	SDValue SplatZero = DAG.getNode(
3156	Opcode: RISCVISD::VMV_V_X_VL, DL, VT: DstContainerVT, N1: DAG.getUNDEF(VT: DstContainerVT),
3157	N2: DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT()), N3: VL);
3158	Res = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: DstContainerVT, N1: IsNan, N2: SplatZero,
3159	N3: Res, N4: DAG.getUNDEF(VT: DstContainerVT), N5: VL);
3160
3161	if (DstVT.isFixedLengthVector())
3162	Res = convertFromScalableVector(VT: DstVT, V: Res, DAG, Subtarget);
3163
3164	return Res;
3165	}
3166
3167	static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
3168	const RISCVSubtarget &Subtarget) {
3169	bool IsStrict = Op ->isStrictFPOpcode();
3170	SDValue SrcVal = Op.getOperand(i: IsStrict ? `1` : `0`);
3171
3172	// f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3173	// bf16 conversions are always promoted to f32.
3174	if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) \|\|
3175	SrcVal.getValueType() == MVT::bf16) {
3176	SDLoc DL(Op);
3177	if (IsStrict) {
3178	SDValue Ext =
3179	DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {MVT::f32, MVT::Other},
3180	Ops: {Op.getOperand(i: `0`), SrcVal});
3181	return DAG.getNode(Opcode: Op.getOpcode(), DL, ResultTys: {Op.getValueType(), MVT::Other},
3182	Ops: {Ext.getValue(R: `1`), Ext.getValue(R: `0`)});
3183	}
3184	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
3185	Operand: DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: SrcVal));
3186	}
3187
3188	// Other operations are legal.
3189	return Op;
3190	}
3191
3192	static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
3193	switch (Opc) {
3194	case ISD::FROUNDEVEN:
3195	case ISD::STRICT_FROUNDEVEN:
3196	case ISD::VP_FROUNDEVEN:
3197	return RISCVFPRndMode::RNE;
3198	case ISD::FTRUNC:
3199	case ISD::STRICT_FTRUNC:
3200	case ISD::VP_FROUNDTOZERO:
3201	return RISCVFPRndMode::RTZ;
3202	case ISD::FFLOOR:
3203	case ISD::STRICT_FFLOOR:
3204	case ISD::VP_FFLOOR:
3205	return RISCVFPRndMode::RDN;
3206	case ISD::FCEIL:
3207	case ISD::STRICT_FCEIL:
3208	case ISD::VP_FCEIL:
3209	return RISCVFPRndMode::RUP;
3210	case ISD::FROUND:
3211	case ISD::STRICT_FROUND:
3212	case ISD::VP_FROUND:
3213	return RISCVFPRndMode::RMM;
3214	case ISD::FRINT:
3215	case ISD::LRINT:
3216	case ISD::LLRINT:
3217	case ISD::STRICT_FRINT:
3218	case ISD::STRICT_LRINT:
3219	case ISD::STRICT_LLRINT:
3220	case ISD::VP_FRINT:
3221	case ISD::VP_LRINT:
3222	case ISD::VP_LLRINT:
3223	return RISCVFPRndMode::DYN;
3224	}
3225
3226	return RISCVFPRndMode::Invalid;
3227	}
3228
3229	// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3230	// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3231	// the integer domain and back. Taking care to avoid converting values that are
3232	// nan or already correct.
3233	static SDValue
3234	lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3235	const RISCVSubtarget &Subtarget) {
3236	MVT VT = Op.getSimpleValueType();
3237	assert(VT.isVector() && "Unexpected type");
3238
3239	SDLoc DL(Op);
3240
3241	SDValue Src = Op.getOperand(i: `0`);
3242
3243	// Freeze the source since we are increasing the number of uses.
3244	Src = DAG.getFreeze(V: Src);
3245
3246	MVT ContainerVT = VT;
3247	if (VT.isFixedLengthVector()) {
3248	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3249	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
3250	}
3251
3252	SDValue Mask, VL;
3253	if (Op ->isVPOpcode()) {
3254	Mask = Op.getOperand(i: `1`);
3255	if (VT.isFixedLengthVector())
3256	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
3257	Subtarget);
3258	VL = Op.getOperand(i: `2`);
3259	} else {
3260	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3261	}
3262
3263	// We do the conversion on the absolute value and fix the sign at the end.
3264	SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
3265
3266	// Determine the largest integer that can be represented exactly. This and
3267	// values larger than it don't have any fractional bits so don't need to
3268	// be converted.
3269	const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3270	unsigned Precision = APFloat::semanticsPrecision(FltSem);
3271	APFloat MaxVal = APFloat (FltSem);
3272	MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - `1`),
3273	/IsSigned/ false, RM: APFloat::rmNearestTiesToEven);
3274	SDValue MaxValNode =
3275	DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType());
3276	SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT,
3277	N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL);
3278
3279	// If abs(Src) was larger than MaxVal or nan, keep it.
3280	MVT SetccVT = MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount());
3281	Mask =
3282	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SetccVT,
3283	Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT),
3284	Mask, Mask, VL});
3285
3286	// Truncate to integer and convert back to FP.
3287	MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3288	MVT XLenVT = Subtarget.getXLenVT();
3289	SDValue Truncated;
3290
3291	switch (Op.getOpcode()) {
3292	default:
3293	llvm_unreachable("Unexpected opcode");
3294	case ISD::FRINT:
3295	case ISD::VP_FRINT:
3296	case ISD::FCEIL:
3297	case ISD::VP_FCEIL:
3298	case ISD::FFLOOR:
3299	case ISD::VP_FFLOOR:
3300	case ISD::FROUND:
3301	case ISD::FROUNDEVEN:
3302	case ISD::VP_FROUND:
3303	case ISD::VP_FROUNDEVEN:
3304	case ISD::VP_FROUNDTOZERO: {
3305	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3306	assert(FRM != RISCVFPRndMode::Invalid);
3307	Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_X_F_VL, DL, VT: IntVT, N1: Src, N2: Mask,
3308	N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL);
3309	break;
3310	}
3311	case ISD::FTRUNC:
3312	Truncated = DAG.getNode(Opcode: RISCVISD::VFCVT_RTZ_X_F_VL, DL, VT: IntVT, N1: Src,
3313	N2: Mask, N3: VL);
3314	break;
3315	case ISD::FNEARBYINT:
3316	case ISD::VP_FNEARBYINT:
3317	Truncated = DAG.getNode(Opcode: RISCVISD::VFROUND_NOEXCEPT_VL, DL, VT: ContainerVT, N1: Src,
3318	N2: Mask, N3: VL);
3319	break;
3320	}
3321
3322	// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3323	if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3324	Truncated = DAG.getNode(Opcode: RISCVISD::SINT_TO_FP_VL, DL, VT: ContainerVT, N1: Truncated,
3325	N2: Mask, N3: VL);
3326
3327	// Restore the original sign so that -0.0 is preserved.
3328	Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated,
3329	N2: Src, N3: Src, N4: Mask, N5: VL);
3330
3331	if (!VT.isFixedLengthVector())
3332	return Truncated;
3333
3334	return convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3335	}
3336
3337	// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3338	// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3339	// qNan and converting the new source to integer and back to FP.
3340	static SDValue
3341	lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3342	const RISCVSubtarget &Subtarget) {
3343	SDLoc DL(Op);
3344	MVT VT = Op.getSimpleValueType();
3345	SDValue Chain = Op.getOperand(i: `0`);
3346	SDValue Src = Op.getOperand(i: `1`);
3347
3348	MVT ContainerVT = VT;
3349	if (VT.isFixedLengthVector()) {
3350	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3351	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
3352	}
3353
3354	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3355
3356	// Freeze the source since we are increasing the number of uses.
3357	Src = DAG.getFreeze(V: Src);
3358
3359	// Convert sNan to qNan by executing x + x for all unordered element x in Src.
3360	MVT MaskVT = Mask.getSimpleValueType();
3361	SDValue Unorder = DAG.getNode(Opcode: RISCVISD::STRICT_FSETCC_VL, DL,
3362	VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other),
3363	Ops: {Chain, Src, Src, DAG.getCondCode(Cond: ISD::SETUNE),
3364	DAG.getUNDEF(VT: MaskVT), Mask, VL});
3365	Chain = Unorder.getValue(R: `1`);
3366	Src = DAG.getNode(Opcode: RISCVISD::STRICT_FADD_VL, DL,
3367	VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other),
3368	Ops: {Chain, Src, Src, Src, Unorder, VL});
3369	Chain = Src.getValue(R: `1`);
3370
3371	// We do the conversion on the absolute value and fix the sign at the end.
3372	SDValue Abs = DAG.getNode(Opcode: RISCVISD::FABS_VL, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
3373
3374	// Determine the largest integer that can be represented exactly. This and
3375	// values larger than it don't have any fractional bits so don't need to
3376	// be converted.
3377	const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3378	unsigned Precision = APFloat::semanticsPrecision(FltSem);
3379	APFloat MaxVal = APFloat (FltSem);
3380	MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - `1`),
3381	/IsSigned/ false, RM: APFloat::rmNearestTiesToEven);
3382	SDValue MaxValNode =
3383	DAG.getConstantFP(Val: MaxVal, DL, VT: ContainerVT.getVectorElementType());
3384	SDValue MaxValSplat = DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: ContainerVT,
3385	N1: DAG.getUNDEF(VT: ContainerVT), N2: MaxValNode, N3: VL);
3386
3387	// If abs(Src) was larger than MaxVal or nan, keep it.
3388	Mask = DAG.getNode(
3389	Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
3390	Ops: {Abs, MaxValSplat, DAG.getCondCode(Cond: ISD::SETOLT), Mask, Mask, VL});
3391
3392	// Truncate to integer and convert back to FP.
3393	MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3394	MVT XLenVT = Subtarget.getXLenVT();
3395	SDValue Truncated;
3396
3397	switch (Op.getOpcode()) {
3398	default:
3399	llvm_unreachable("Unexpected opcode");
3400	case ISD::STRICT_FCEIL:
3401	case ISD::STRICT_FFLOOR:
3402	case ISD::STRICT_FROUND:
3403	case ISD::STRICT_FROUNDEVEN: {
3404	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3405	assert(FRM != RISCVFPRndMode::Invalid);
3406	Truncated = DAG.getNode(
3407	Opcode: RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, VTList: DAG.getVTList(VT1: IntVT, VT2: MVT::Other),
3408	Ops: {Chain, Src, Mask, DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), VL});
3409	break;
3410	}
3411	case ISD::STRICT_FTRUNC:
3412	Truncated =
3413	DAG.getNode(Opcode: RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3414	VTList: DAG.getVTList(VT1: IntVT, VT2: MVT::Other), N1: Chain, N2: Src, N3: Mask, N4: VL);
3415	break;
3416	case ISD::STRICT_FNEARBYINT:
3417	Truncated = DAG.getNode(Opcode: RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3418	VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), N1: Chain, N2: Src,
3419	N3: Mask, N4: VL);
3420	break;
3421	}
3422	Chain = Truncated.getValue(R: `1`);
3423
3424	// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3425	if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3426	Truncated = DAG.getNode(Opcode: RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3427	VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), N1: Chain,
3428	N2: Truncated, N3: Mask, N4: VL);
3429	Chain = Truncated.getValue(R: `1`);
3430	}
3431
3432	// Restore the original sign so that -0.0 is preserved.
3433	Truncated = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Truncated,
3434	N2: Src, N3: Src, N4: Mask, N5: VL);
3435
3436	if (VT.isFixedLengthVector())
3437	Truncated = convertFromScalableVector(VT, V: Truncated, DAG, Subtarget);
3438	return DAG.getMergeValues(Ops: {Truncated, Chain}, dl: DL);
3439	}
3440
3441	static SDValue
3442	lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
3443	const RISCVSubtarget &Subtarget) {
3444	MVT VT = Op.getSimpleValueType();
3445	if (VT.isVector())
3446	return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3447
3448	if (DAG.shouldOptForSize())
3449	return SDValue ();
3450
3451	SDLoc DL(Op);
3452	SDValue Src = Op.getOperand(i: `0`);
3453
3454	// Create an integer the size of the mantissa with the MSB set. This and all
3455	// values larger than it don't have any fractional bits so don't need to be
3456	// converted.
3457	const fltSemantics &FltSem = VT.getFltSemantics();
3458	unsigned Precision = APFloat::semanticsPrecision(FltSem);
3459	APFloat MaxVal = APFloat (FltSem);
3460	MaxVal.convertFromAPInt(Input: APInt::getOneBitSet(numBits: Precision, BitNo: Precision - `1`),
3461	/IsSigned/ false, RM: APFloat::rmNearestTiesToEven);
3462	SDValue MaxValNode = DAG.getConstantFP(Val: MaxVal, DL, VT);
3463
3464	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Op.getOpcode());
3465	return DAG.getNode(Opcode: RISCVISD::FROUND, DL, VT, N1: Src, N2: MaxValNode,
3466	N3: DAG.getTargetConstant(Val: FRM, DL, VT: Subtarget.getXLenVT()));
3467	}
3468
3469	// Expand vector LRINT and LLRINT by converting to the integer domain.
3470	static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,
3471	const RISCVSubtarget &Subtarget) {
3472	SDLoc DL(Op);
3473	MVT DstVT = Op.getSimpleValueType();
3474	SDValue Src = Op.getOperand(i: `0`);
3475	MVT SrcVT = Src.getSimpleValueType();
3476	assert(SrcVT.isVector() && DstVT.isVector() &&
3477	!(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3478	"Unexpected type");
3479
3480	MVT DstContainerVT = DstVT;
3481	MVT SrcContainerVT = SrcVT;
3482
3483	if (DstVT.isFixedLengthVector()) {
3484	DstContainerVT = getContainerForFixedLengthVector(DAG, VT: DstVT, Subtarget);
3485	SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
3486	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
3487	}
3488
3489	auto [Mask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT: SrcContainerVT, DL, DAG, Subtarget);
3490	SDValue Res =
3491	DAG.getNode(Opcode: RISCVISD::VFCVT_RM_X_F_VL, DL, VT: DstContainerVT, N1: Src, N2: Mask,
3492	N3: DAG.getTargetConstant(Val: matchRoundingOp(Opc: Op.getOpcode()), DL,
3493	VT: Subtarget.getXLenVT()),
3494	N4: VL);
3495
3496	if (!DstVT.isFixedLengthVector())
3497	return Res;
3498
3499	return convertFromScalableVector(VT: DstVT, V: Res, DAG, Subtarget);
3500	}
3501
3502	static SDValue
3503	getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
3504	const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3505	SDValue Offset, SDValue Mask, SDValue VL,
3506	unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3507	if (Passthru.isUndef())
3508	Policy = RISCVVType::TAIL_AGNOSTIC \| RISCVVType::MASK_AGNOSTIC;
3509	SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT());
3510	SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3511	return DAG.getNode(Opcode: RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3512	}
3513
3514	static SDValue
3515	getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3516	EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3517	SDValue VL,
3518	unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
3519	if (Passthru.isUndef())
3520	Policy = RISCVVType::TAIL_AGNOSTIC \| RISCVVType::MASK_AGNOSTIC;
3521	SDValue PolicyOp = DAG.getTargetConstant(Val: Policy, DL, VT: Subtarget.getXLenVT());
3522	SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3523	return DAG.getNode(Opcode: RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3524	}
3525
3526	struct VIDSequence {
3527	int64_t StepNumerator;
3528	unsigned StepDenominator;
3529	int64_t Addend;
3530	};
3531
3532	static std::optional<APInt> getExactInteger(const APFloat &APF,
3533	uint32_t BitWidth) {
3534	// We will use a SINT_TO_FP to materialize this constant so we should use a
3535	// signed APSInt here.
3536	APSInt ValInt(BitWidth, /IsUnsigned/ false);
3537	// We use an arbitrary rounding mode here. If a floating-point is an exact
3538	// integer (e.g., 1.0), the rounding mode does not affect the output value. If
3539	// the rounding mode changes the output value, then it is not an exact
3540	// integer.
3541	RoundingMode ArbitraryRM = RoundingMode::TowardZero;
3542	bool IsExact;
3543	// If it is out of signed integer range, it will return an invalid operation.
3544	// If it is not an exact integer, IsExact is false.
3545	if ((APF.convertToInteger(Result&: ValInt, RM: ArbitraryRM, IsExact: &IsExact) ==
3546	APFloatBase::opInvalidOp) \|\|
3547	!IsExact)
3548	return std::nullopt;
3549	return ValInt.extractBits(numBits: BitWidth, bitPosition: `0`);
3550	}
3551
3552	// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2S,...,X+(N-1)S]
3553	// to the (non-zero) step S and start value X. This can be then lowered as the
3554	// RVV sequence (VID S) + X, for example.*
3555	// The step S is represented as an integer numerator divided by a positive
3556	// denominator. Note that the implementation currently only identifies
3557	// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3558	// cannot detect 2/3, for example.
3559	// Note that this method will also match potentially unappealing index
3560	// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3561	// determine whether this is worth generating code for.
3562	//
3563	// EltSizeInBits is the size of the type that the sequence will be calculated
3564	// in, i.e. SEW for build_vectors or XLEN for address calculations.
3565	static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3566	unsigned EltSizeInBits) {
3567	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3568	if (!cast<BuildVectorSDNode>(Val&: Op)->isConstant())
3569	return std::nullopt;
3570	bool IsInteger = Op.getValueType().isInteger();
3571
3572	std::optional<unsigned> SeqStepDenom;
3573	std::optional<APInt> SeqStepNum;
3574	std::optional<APInt> SeqAddend;
3575	std::optional<std::pair<APInt, unsigned>> PrevElt;
3576	assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3577
3578	// First extract the ops into a list of constant integer values. This may not
3579	// be possible for floats if they're not all representable as integers.
3580	SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3581	const unsigned OpSize = Op.getScalarValueSizeInBits();
3582	for (auto [Idx, Elt] : enumerate(First: Op ->op_values())) {
3583	if (Elt.isUndef()) {
3584	Elts [Idx] = std::nullopt;
3585	continue;
3586	}
3587	if (IsInteger) {
3588	Elts [Idx] = Elt ->getAsAPIntVal().trunc(width: OpSize).zext(width: EltSizeInBits);
3589	} else {
3590	auto ExactInteger =
3591	getExactInteger(APF: cast<ConstantFPSDNode>(Val: Elt)->getValueAPF(), BitWidth: OpSize);
3592	if (!ExactInteger)
3593	return std::nullopt;
3594	Elts [Idx] = *ExactInteger;
3595	}
3596	}
3597
3598	for (auto [Idx, Elt] : enumerate(First&: Elts)) {
3599	// Assume undef elements match the sequence; we just have to be careful
3600	// when interpolating across them.
3601	if (!Elt)
3602	continue;
3603
3604	if (PrevElt) {
3605	// Calculate the step since the last non-undef element, and ensure
3606	// it's consistent across the entire sequence.
3607	unsigned IdxDiff = Idx - PrevElt ->second;
3608	APInt ValDiff = *Elt - PrevElt ->first;
3609
3610	// A zero-value value difference means that we're somewhere in the middle
3611	// of a fractional step, e.g. <0,0,0,0,1,1,1,1>. Wait until we notice a*
3612	// step change before evaluating the sequence.
3613	if (ValDiff == `0`)
3614	continue;
3615
3616	int64_t Remainder = ValDiff.srem(RHS: IdxDiff);
3617	// Normalize the step if it's greater than 1.
3618	if (Remainder != ValDiff.getSExtValue()) {
3619	// The difference must cleanly divide the element span.
3620	if (Remainder != `0`)
3621	return std::nullopt;
3622	ValDiff = ValDiff.sdiv(RHS: IdxDiff);
3623	IdxDiff = `1`;
3624	}
3625
3626	if (!SeqStepNum)
3627	SeqStepNum = ValDiff;
3628	else if (ValDiff != SeqStepNum)
3629	return std::nullopt;
3630
3631	if (!SeqStepDenom)
3632	SeqStepDenom = IdxDiff;
3633	else if (IdxDiff != *SeqStepDenom)
3634	return std::nullopt;
3635	}
3636
3637	// Record this non-undef element for later.
3638	if (!PrevElt \|\| PrevElt ->first != *Elt)
3639	PrevElt = std::make_pair(x&: *Elt, y&: Idx);
3640	}
3641
3642	// We need to have logged a step for this to count as a legal index sequence.
3643	if (!SeqStepNum \|\| !SeqStepDenom)
3644	return std::nullopt;
3645
3646	// Loop back through the sequence and validate elements we might have skipped
3647	// while waiting for a valid step. While doing this, log any sequence addend.
3648	for (auto [Idx, Elt] : enumerate(First&: Elts)) {
3649	if (!Elt)
3650	continue;
3651	APInt ExpectedVal =
3652	(APInt (EltSizeInBits, Idx, /isSigned=/false, /implicitTrunc=/true) *
3653	*SeqStepNum)
3654	.sdiv(RHS: *SeqStepDenom);
3655
3656	APInt Addend = *Elt - ExpectedVal;
3657	if (!SeqAddend)
3658	SeqAddend = Addend;
3659	else if (Addend != SeqAddend)
3660	return std::nullopt;
3661	}
3662
3663	assert(SeqAddend && "Must have an addend if we have a step");
3664
3665	return VIDSequence{.StepNumerator: SeqStepNum ->getSExtValue(), .StepDenominator: *SeqStepDenom,
3666	.Addend: SeqAddend ->getSExtValue()};
3667	}
3668
3669	// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3670	// and lower it as a VRGATHER_VX_VL from the source vector.
3671	static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3672	SelectionDAG &DAG,
3673	const RISCVSubtarget &Subtarget) {
3674	if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3675	return SDValue ();
3676	SDValue Src = SplatVal.getOperand(i: `0`);
3677	// Don't perform this optimization for i1 vectors, or if the element types are
3678	// different
3679	// FIXME: Support i1 vectors, maybe by promoting to i8?
3680	MVT EltTy = VT.getVectorElementType();
3681	MVT SrcVT = Src.getSimpleValueType();
3682	if (EltTy == MVT::i1 \|\| EltTy != SrcVT.getVectorElementType() \|\|
3683	!DAG.getTargetLoweringInfo().isTypeLegal(VT: SrcVT))
3684	return SDValue ();
3685	SDValue Idx = SplatVal.getOperand(i: `1`);
3686	// The index must be a legal type.
3687	if (Idx.getValueType() != Subtarget.getXLenVT())
3688	return SDValue ();
3689
3690	// Check that we know Idx lies within VT
3691	if (!TypeSize::isKnownLE(LHS: SrcVT.getSizeInBits(), RHS: VT.getSizeInBits())) {
3692	auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx);
3693	if (!CIdx \|\| CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3694	return SDValue ();
3695	}
3696
3697	// Convert fixed length vectors to scalable
3698	MVT ContainerVT = VT;
3699	if (VT.isFixedLengthVector())
3700	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3701
3702	MVT SrcContainerVT = SrcVT;
3703	if (SrcVT.isFixedLengthVector()) {
3704	SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
3705	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
3706	}
3707
3708	// Put Vec in a VT sized vector
3709	if (SrcContainerVT.getVectorMinNumElements() <
3710	ContainerVT.getVectorMinNumElements())
3711	Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: ContainerVT), SubVec: Src, Idx: `0`);
3712	else
3713	Src = DAG.getExtractSubvector(DL, VT: ContainerVT, Vec: Src, Idx: `0`);
3714
3715	// We checked that Idx fits inside VT earlier
3716	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3717	SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT, N1: Src,
3718	N2: Idx, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
3719	if (VT.isFixedLengthVector())
3720	Gather = convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
3721	return Gather;
3722	}
3723
3724	static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
3725	const RISCVSubtarget &Subtarget) {
3726	MVT VT = Op.getSimpleValueType();
3727	assert(VT.isFixedLengthVector() && "Unexpected vector!");
3728
3729	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3730
3731	SDLoc DL(Op);
3732	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3733
3734	if (auto SimpleVID = isSimpleVIDSequence(Op, EltSizeInBits: Op.getScalarValueSizeInBits())) {
3735	int64_t StepNumerator = SimpleVID ->StepNumerator;
3736	unsigned StepDenominator = SimpleVID ->StepDenominator;
3737	int64_t Addend = SimpleVID ->Addend;
3738
3739	assert(StepNumerator != `0` && "Invalid step");
3740	bool Negate = false;
3741	int64_t SplatStepVal = StepNumerator;
3742	unsigned StepOpcode = ISD::MUL;
3743	// Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3744	// anyway as the shift of 63 won't fit in uimm5.
3745	if (StepNumerator != `1` && StepNumerator != INT64_MIN &&
3746	isPowerOf2_64(Value: std::abs(i: StepNumerator))) {
3747	Negate = StepNumerator < `0`;
3748	StepOpcode = ISD::SHL;
3749	SplatStepVal = Log2_64(Value: std::abs(i: StepNumerator));
3750	}
3751
3752	// Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3753	// since it's the immediate value many RVV instructions accept. There is
3754	// no vmul.vi instruction so ensure multiply constant can fit in a
3755	// single addi instruction. For the addend, we allow up to 32 bits..
3756	if (((StepOpcode == ISD::MUL && isInt<`12`>(x: SplatStepVal)) \|\|
3757	(StepOpcode == ISD::SHL && isUInt<`5`>(x: SplatStepVal))) &&
3758	isPowerOf2_32(Value: StepDenominator) &&
3759	(SplatStepVal >= `0` \|\| StepDenominator == `1`) && isInt<`32`>(x: Addend)) {
3760	MVT VIDVT =
3761	VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3762	MVT VIDContainerVT =
3763	getContainerForFixedLengthVector(DAG, VT: VIDVT, Subtarget);
3764	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: VIDContainerVT, N1: Mask, N2: VL);
3765	// Convert right out of the scalable type so we can use standard ISD
3766	// nodes for the rest of the computation. If we used scalable types with
3767	// these, we'd lose the fixed-length vector info and generate worse
3768	// vsetvli code.
3769	VID = convertFromScalableVector(VT: VIDVT, V: VID, DAG, Subtarget);
3770	if ((StepOpcode == ISD::MUL && SplatStepVal != `1`) \|\|
3771	(StepOpcode == ISD::SHL && SplatStepVal != `0`)) {
3772	SDValue SplatStep = DAG.getSignedConstant(Val: SplatStepVal, DL, VT: VIDVT);
3773	VID = DAG.getNode(Opcode: StepOpcode, DL, VT: VIDVT, N1: VID, N2: SplatStep);
3774	}
3775	if (StepDenominator != `1`) {
3776	SDValue SplatStep =
3777	DAG.getConstant(Val: Log2_64(Value: StepDenominator), DL, VT: VIDVT);
3778	VID = DAG.getNode(Opcode: ISD::SRL, DL, VT: VIDVT, N1: VID, N2: SplatStep);
3779	}
3780	if (Addend != `0` \|\| Negate) {
3781	SDValue SplatAddend = DAG.getSignedConstant(Val: Addend, DL, VT: VIDVT);
3782	VID = DAG.getNode(Opcode: Negate ? ISD::SUB : ISD::ADD, DL, VT: VIDVT, N1: SplatAddend,
3783	N2: VID);
3784	}
3785	if (VT.isFloatingPoint()) {
3786	// TODO: Use vfwcvt to reduce register pressure.
3787	VID = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: VID);
3788	}
3789	return VID;
3790	}
3791	}
3792
3793	return SDValue ();
3794	}
3795
3796	/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3797	/// which constitute a large proportion of the elements. In such cases we can
3798	/// splat a vector with the dominant element and make up the shortfall with
3799	/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3800	/// Note that this includes vectors of 2 elements by association. The
3801	/// upper-most element is the "dominant" one, allowing us to use a splat to
3802	/// "insert" the upper element, and an insert of the lower element at position
3803	/// 0, which improves codegen.
3804	static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,
3805	const RISCVSubtarget &Subtarget) {
3806	MVT VT = Op.getSimpleValueType();
3807	assert(VT.isFixedLengthVector() && "Unexpected vector!");
3808
3809	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3810
3811	SDLoc DL(Op);
3812	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3813
3814	MVT XLenVT = Subtarget.getXLenVT();
3815	unsigned NumElts = Op.getNumOperands();
3816
3817	SDValue DominantValue;
3818	unsigned MostCommonCount = `0`;
3819	DenseMap<SDValue, unsigned> ValueCounts;
3820	unsigned NumUndefElts =
3821	count_if(Range: Op ->op_values(), P: [](const SDValue &V) { return V.isUndef(); });
3822
3823	// Track the number of scalar loads we know we'd be inserting, estimated as
3824	// any non-zero floating-point constant. Other kinds of element are either
3825	// already in registers or are materialized on demand. The threshold at which
3826	// a vector load is more desirable than several scalar materializion and
3827	// vector-insertion instructions is not known.
3828	unsigned NumScalarLoads = `0`;
3829
3830	for (SDValue V : Op ->op_values()) {
3831	if (V.isUndef())
3832	continue;
3833
3834	unsigned &Count = ValueCounts [V];
3835	if (`0` == Count)
3836	if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: V))
3837	NumScalarLoads += !CFP->isExactlyValue(V: +`0.0`);
3838
3839	// Is this value dominant? In case of a tie, prefer the highest element as
3840	// it's cheaper to insert near the beginning of a vector than it is at the
3841	// end.
3842	if (++Count >= MostCommonCount) {
3843	DominantValue = V;
3844	MostCommonCount = Count;
3845	}
3846	}
3847
3848	assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3849	unsigned NumDefElts = NumElts - NumUndefElts;
3850	unsigned DominantValueCountThreshold = NumDefElts <= `2` ? `0` : NumDefElts - `2`;
3851
3852	// Don't perform this optimization when optimizing for size, since
3853	// materializing elements and inserting them tends to cause code bloat.
3854	if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3855	(NumElts != `2` \|\| ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) &&
3856	((MostCommonCount > DominantValueCountThreshold) \|\|
3857	(ValueCounts.size() <= Log2_32(Value: NumDefElts)))) {
3858	// Start by splatting the most common element.
3859	SDValue Vec = DAG.getSplatBuildVector(VT, DL, Op: DominantValue);
3860
3861	DenseSet<SDValue> Processed{DominantValue};
3862
3863	// We can handle an insert into the last element (of a splat) via
3864	// v(f)slide1down. This is slightly better than the vslideup insert
3865	// lowering as it avoids the need for a vector group temporary. It
3866	// is also better than using vmerge.vx as it avoids the need to
3867	// materialize the mask in a vector register.
3868	if (SDValue LastOp = Op ->getOperand(Num: Op ->getNumOperands() - `1`);
3869	!LastOp.isUndef() && ValueCounts [LastOp] == `1` &&
3870	LastOp != DominantValue) {
3871	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
3872	auto OpCode =
3873	VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3874	if (!VT.isFloatingPoint())
3875	LastOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: LastOp);
3876	Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec,
3877	N3: LastOp, N4: Mask, N5: VL);
3878	Vec = convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
3879	Processed.insert(V: LastOp);
3880	}
3881
3882	MVT SelMaskTy = VT.changeVectorElementType(EltVT: MVT::i1);
3883	for (const auto &OpIdx : enumerate(First: Op ->ops())) {
3884	const SDValue &V = OpIdx.value();
3885	if (V.isUndef() \|\| !Processed.insert(V).second)
3886	continue;
3887	if (ValueCounts [V] == `1`) {
3888	Vec = DAG.getInsertVectorElt(DL, Vec, Elt: V, Idx: OpIdx.index());
3889	} else {
3890	// Blend in all instances of this value using a VSELECT, using a
3891	// mask where each bit signals whether that element is the one
3892	// we're after.
3893	SmallVector<SDValue> Ops;
3894	transform(Range: Op ->op_values(), d_first: std::back_inserter(x&: Ops), F: [&](SDValue V1) {
3895	return DAG.getConstant(Val: V == V1, DL, VT: XLenVT);
3896	});
3897	Vec = DAG.getNode(Opcode: ISD::VSELECT, DL, VT,
3898	N1: DAG.getBuildVector(VT: SelMaskTy, DL, Ops),
3899	N2: DAG.getSplatBuildVector(VT, DL, Op: V), N3: Vec);
3900	}
3901	}
3902
3903	return Vec;
3904	}
3905
3906	return SDValue ();
3907	}
3908
3909	static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
3910	const RISCVSubtarget &Subtarget) {
3911	MVT VT = Op.getSimpleValueType();
3912	assert(VT.isFixedLengthVector() && "Unexpected vector!");
3913
3914	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3915
3916	SDLoc DL(Op);
3917	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
3918
3919	MVT XLenVT = Subtarget.getXLenVT();
3920	unsigned NumElts = Op.getNumOperands();
3921
3922	if (VT.getVectorElementType() == MVT::i1) {
3923	if (ISD::isBuildVectorAllZeros(N: Op.getNode())) {
3924	SDValue VMClr = DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT: ContainerVT, Operand: VL);
3925	return convertFromScalableVector(VT, V: VMClr, DAG, Subtarget);
3926	}
3927
3928	if (ISD::isBuildVectorAllOnes(N: Op.getNode())) {
3929	SDValue VMSet = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
3930	return convertFromScalableVector(VT, V: VMSet, DAG, Subtarget);
3931	}
3932
3933	// Lower constant mask BUILD_VECTORs via an integer vector type, in
3934	// scalar integer chunks whose bit-width depends on the number of mask
3935	// bits and XLEN.
3936	// First, determine the most appropriate scalar integer type to use. This
3937	// is at most XLenVT, but may be shrunk to a smaller vector element type
3938	// according to the size of the final vector - use i8 chunks rather than
3939	// XLenVT if we're producing a v8i1. This results in more consistent
3940	// codegen across RV32 and RV64.
3941	unsigned NumViaIntegerBits = std::clamp(val: NumElts, lo: `8u`, hi: Subtarget.getXLen());
3942	NumViaIntegerBits = std::min(a: NumViaIntegerBits, b: Subtarget.getELen());
3943	// If we have to use more than one INSERT_VECTOR_ELT then this
3944	// optimization is likely to increase code size; avoid performing it in
3945	// such a case. We can use a load from a constant pool in this case.
3946	if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3947	return SDValue ();
3948	// Now we can create our integer vector type. Note that it may be larger
3949	// than the resulting mask type: v4i1 would use v1i8 as its integer type.
3950	unsigned IntegerViaVecElts = divideCeil(Numerator: NumElts, Denominator: NumViaIntegerBits);
3951	MVT IntegerViaVecVT =
3952	MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NumViaIntegerBits),
3953	NumElements: IntegerViaVecElts);
3954
3955	uint64_t Bits = `0`;
3956	unsigned BitPos = `0`, IntegerEltIdx = `0`;
3957	SmallVector<SDValue, `8`> Elts(IntegerViaVecElts);
3958
3959	for (unsigned I = `0`; I < NumElts;) {
3960	SDValue V = Op.getOperand(i: I);
3961	bool BitValue = !V.isUndef() && V ->getAsZExtVal();
3962	Bits \|= ((uint64_t)BitValue << BitPos);
3963	++BitPos;
3964	++I;
3965
3966	// Once we accumulate enough bits to fill our scalar type or process the
3967	// last element, insert into our vector and clear our accumulated data.
3968	if (I % NumViaIntegerBits == `0` \|\| I == NumElts) {
3969	if (NumViaIntegerBits <= `32`)
3970	Bits = SignExtend64<`32`>(x: Bits);
3971	SDValue Elt = DAG.getSignedConstant(Val: Bits, DL, VT: XLenVT);
3972	Elts [IntegerEltIdx] = Elt;
3973	Bits = `0`;
3974	BitPos = `0`;
3975	IntegerEltIdx++;
3976	}
3977	}
3978
3979	SDValue Vec = DAG.getBuildVector(VT: IntegerViaVecVT, DL, Ops: Elts);
3980
3981	if (NumElts < NumViaIntegerBits) {
3982	// If we're producing a smaller vector than our minimum legal integer
3983	// type, bitcast to the equivalent (known-legal) mask type, and extract
3984	// our final mask.
3985	assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3986	Vec = DAG.getBitcast(VT: MVT::v8i1, V: Vec);
3987	Vec = DAG.getExtractSubvector(DL, VT, Vec, Idx: `0`);
3988	} else {
3989	// Else we must have produced an integer type with the same size as the
3990	// mask type; bitcast for the final result.
3991	assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3992	Vec = DAG.getBitcast(VT, V: Vec);
3993	}
3994
3995	return Vec;
3996	}
3997
3998	if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
3999	unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4000	: RISCVISD::VMV_V_X_VL;
4001	if (!VT.isFloatingPoint())
4002	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
4003	Splat =
4004	DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL);
4005	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
4006	}
4007
4008	// Try and match index sequences, which we can lower to the vid instruction
4009	// with optional modifications. An all-undef vector is matched by
4010	// getSplatValue, above.
4011	if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4012	return Res;
4013
4014	// For very small build_vectors, use a single scalar insert of a constant.
4015	// TODO: Base this on constant rematerialization cost, not size.
4016	const unsigned EltBitSize = VT.getScalarSizeInBits();
4017	if (VT.getSizeInBits() <= `32` &&
4018	ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode())) {
4019	MVT ViaIntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
4020	assert((ViaIntVT == MVT::i16 \|\| ViaIntVT == MVT::i32) &&
4021	"Unexpected sequence type");
4022	// If we can use the original VL with the modified element type, this
4023	// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4024	// be moved into InsertVSETVLI?
4025	unsigned ViaVecLen =
4026	(Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : `1`;
4027	MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen);
4028
4029	uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize);
4030	uint64_t SplatValue = `0`;
4031	// Construct the amalgamated value at this larger vector type.
4032	for (const auto &OpIdx : enumerate(First: Op ->op_values())) {
4033	const auto &SeqV = OpIdx.value();
4034	if (!SeqV.isUndef())
4035	SplatValue \|=
4036	((SeqV ->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4037	}
4038
4039	// On RV64, sign-extend from 32 to 64 bits where possible in order to
4040	// achieve better constant materializion.
4041	// On RV32, we need to sign-extend to use getSignedConstant.
4042	if (ViaIntVT == MVT::i32)
4043	SplatValue = SignExtend64<`32`>(x: SplatValue);
4044
4045	SDValue Vec = DAG.getInsertVectorElt(
4046	DL, Vec: DAG.getUNDEF(VT: ViaVecVT),
4047	Elt: DAG.getSignedConstant(Val: SplatValue, DL, VT: XLenVT), Idx: `0`);
4048	if (ViaVecLen != `1`)
4049	Vec = DAG.getExtractSubvector(DL, VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: `1`), Vec, Idx: `0`);
4050	return DAG.getBitcast(VT, V: Vec);
4051	}
4052
4053
4054	// Attempt to detect "hidden" splats, which only reveal themselves as splats
4055	// when re-interpreted as a vector with a larger element type. For example,
4056	// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4057	// could be instead splat as
4058	// v2i32 = build_vector i32 0x00010000, i32 0x00010000
4059	// TODO: This optimization could also work on non-constant splats, but it
4060	// would require bit-manipulation instructions to construct the splat value.
4061	SmallVector<SDValue> Sequence;
4062	const auto *BV = cast<BuildVectorSDNode>(Val&: Op);
4063	if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4064	ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) &&
4065	BV->getRepeatedSequence(Sequence) &&
4066	(Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4067	unsigned SeqLen = Sequence.size();
4068	MVT ViaIntVT = MVT::getIntegerVT(BitWidth: EltBitSize * SeqLen);
4069	assert((ViaIntVT == MVT::i16 \|\| ViaIntVT == MVT::i32 \|\|
4070	ViaIntVT == MVT::i64) &&
4071	"Unexpected sequence type");
4072
4073	// If we can use the original VL with the modified element type, this
4074	// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4075	// be moved into InsertVSETVLI?
4076	const unsigned RequiredVL = NumElts / SeqLen;
4077	const unsigned ViaVecLen =
4078	(Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4079	NumElts : RequiredVL;
4080	MVT ViaVecVT = MVT::getVectorVT(VT: ViaIntVT, NumElements: ViaVecLen);
4081
4082	unsigned EltIdx = `0`;
4083	uint64_t EltMask = maskTrailingOnes<uint64_t>(N: EltBitSize);
4084	uint64_t SplatValue = `0`;
4085	// Construct the amalgamated value which can be splatted as this larger
4086	// vector type.
4087	for (const auto &SeqV : Sequence) {
4088	if (!SeqV.isUndef())
4089	SplatValue \|=
4090	((SeqV ->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4091	EltIdx++;
4092	}
4093
4094	// On RV64, sign-extend from 32 to 64 bits where possible in order to
4095	// achieve better constant materializion.
4096	// On RV32, we need to sign-extend to use getSignedConstant.
4097	if (ViaIntVT == MVT::i32)
4098	SplatValue = SignExtend64<`32`>(x: SplatValue);
4099
4100	// Since we can't introduce illegal i64 types at this stage, we can only
4101	// perform an i64 splat on RV32 if it is its own sign-extended value. That
4102	// way we can use RVV instructions to splat.
4103	assert((ViaIntVT.bitsLE(XLenVT) \|\|
4104	(!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4105	"Unexpected bitcast sequence");
4106	if (ViaIntVT.bitsLE(VT: XLenVT) \|\| isInt<`32`>(x: SplatValue)) {
4107	SDValue ViaVL =
4108	DAG.getConstant(Val: ViaVecVT.getVectorNumElements(), DL, VT: XLenVT);
4109	MVT ViaContainerVT =
4110	getContainerForFixedLengthVector(DAG, VT: ViaVecVT, Subtarget);
4111	SDValue Splat =
4112	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ViaContainerVT,
4113	N1: DAG.getUNDEF(VT: ViaContainerVT),
4114	N2: DAG.getSignedConstant(Val: SplatValue, DL, VT: XLenVT), N3: ViaVL);
4115	Splat = convertFromScalableVector(VT: ViaVecVT, V: Splat, DAG, Subtarget);
4116	if (ViaVecLen != RequiredVL)
4117	Splat = DAG.getExtractSubvector(
4118	DL, VT: MVT::getVectorVT(VT: ViaIntVT, NumElements: RequiredVL), Vec: Splat, Idx: `0`);
4119	return DAG.getBitcast(VT, V: Splat);
4120	}
4121	}
4122
4123	// If the number of signbits allows, see if we can lower as a <N x i8>.
4124	// Our main goal here is to reduce LMUL (and thus work) required to
4125	// build the constant, but we will also narrow if the resulting
4126	// narrow vector is known to materialize cheaply.
4127	// TODO: We really should be costing the smaller vector. There are
4128	// profitable cases this misses.
4129	if (EltBitSize > `8` && VT.isInteger() &&
4130	(NumElts <= `4` \|\| VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4131	DAG.ComputeMaxSignificantBits(Op) <= `8`) {
4132	SDValue Source = DAG.getBuildVector(VT: VT.changeVectorElementType(EltVT: MVT::i8),
4133	DL, Ops: Op ->ops());
4134	Source = convertToScalableVector(VT: ContainerVT.changeVectorElementType(EltVT: MVT::i8),
4135	V: Source, DAG, Subtarget);
4136	SDValue Res = DAG.getNode(Opcode: RISCVISD::VSEXT_VL, DL, VT: ContainerVT, N1: Source, N2: Mask, N3: VL);
4137	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
4138	}
4139
4140	if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4141	return Res;
4142
4143	// For constant vectors, use generic constant pool lowering. Otherwise,
4144	// we'd have to materialize constants in GPRs just to move them into the
4145	// vector.
4146	return SDValue ();
4147	}
4148
4149	static unsigned getPACKOpcode(unsigned DestBW,
4150	const RISCVSubtarget &Subtarget) {
4151	switch (DestBW) {
4152	default:
4153	llvm_unreachable("Unsupported pack size");
4154	case `16`:
4155	return RISCV::PACKH;
4156	case `32`:
4157	return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4158	case `64`:
4159	assert(Subtarget.is64Bit());
4160	return RISCV::PACK;
4161	}
4162	}
4163
4164	/// Double the element size of the build vector to reduce the number
4165	/// of vslide1down in the build vector chain. In the worst case, this
4166	/// trades three scalar operations for 1 vector operation. Scalar
4167	/// operations are generally lower latency, and for out-of-order cores
4168	/// we also benefit from additional parallelism.
4169	static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG,
4170	const RISCVSubtarget &Subtarget) {
4171	SDLoc DL(Op);
4172	MVT VT = Op.getSimpleValueType();
4173	assert(VT.isFixedLengthVector() && "Unexpected vector!");
4174	MVT ElemVT = VT.getVectorElementType();
4175	if (!ElemVT.isInteger())
4176	return SDValue ();
4177
4178	// TODO: Relax these architectural restrictions, possibly with costing
4179	// of the actual instructions required.
4180	if (!Subtarget.hasStdExtZbb() \|\| !Subtarget.hasStdExtZba())
4181	return SDValue ();
4182
4183	unsigned NumElts = VT.getVectorNumElements();
4184	unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4185	if (ElemSizeInBits >= std::min(a: Subtarget.getELen(), b: Subtarget.getXLen()) \|\|
4186	NumElts % `2` != `0`)
4187	return SDValue ();
4188
4189	// Produce [B,A] packed into a type twice as wide. Note that all
4190	// scalars are XLenVT, possibly masked (see below).
4191	MVT XLenVT = Subtarget.getXLenVT();
4192	SDValue Mask = DAG.getConstant(
4193	Val: APInt::getLowBitsSet(numBits: XLenVT.getSizeInBits(), loBitsSet: ElemSizeInBits), DL, VT: XLenVT);
4194	auto pack = [&](SDValue A, SDValue B) {
4195	// Bias the scheduling of the inserted operations to near the
4196	// definition of the element - this tends to reduce register
4197	// pressure overall.
4198	SDLoc ElemDL(B);
4199	if (Subtarget.hasStdExtZbkb())
4200	// Note that we're relying on the high bits of the result being
4201	// don't care. For PACKW, the result is sign* extended.*
4202	return SDValue (
4203	DAG.getMachineNode(Opcode: getPACKOpcode(DestBW: ElemSizeInBits * `2`, Subtarget),
4204	dl: ElemDL, VT: XLenVT, Op1: A, Op2: B),
4205	`0`);
4206
4207	A = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (A), VT: XLenVT, N1: A, N2: Mask);
4208	B = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (B), VT: XLenVT, N1: B, N2: Mask);
4209	SDValue ShtAmt = DAG.getConstant(Val: ElemSizeInBits, DL: ElemDL, VT: XLenVT);
4210	return DAG.getNode(Opcode: ISD::OR, DL: ElemDL, VT: XLenVT, N1: A,
4211	N2: DAG.getNode(Opcode: ISD::SHL, DL: ElemDL, VT: XLenVT, N1: B, N2: ShtAmt),
4212	Flags: SDNodeFlags::Disjoint);
4213	};
4214
4215	SmallVector<SDValue> NewOperands;
4216	NewOperands.reserve(N: NumElts / `2`);
4217	for (unsigned i = `0`; i < VT.getVectorNumElements(); i += `2`)
4218	NewOperands.push_back(Elt: pack (Op.getOperand(i), Op.getOperand(i: i + `1`)));
4219	assert(NumElts == NewOperands.size() * `2`);
4220	MVT WideVT = MVT::getIntegerVT(BitWidth: ElemSizeInBits * `2`);
4221	MVT WideVecVT = MVT::getVectorVT(VT: WideVT, NumElements: NumElts / `2`);
4222	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
4223	Operand: DAG.getBuildVector(VT: WideVecVT, DL, Ops: NewOperands));
4224	}
4225
4226	static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
4227	const RISCVSubtarget &Subtarget) {
4228	MVT VT = Op.getSimpleValueType();
4229	assert(VT.isFixedLengthVector() && "Unexpected vector!");
4230
4231	MVT EltVT = VT.getVectorElementType();
4232	MVT XLenVT = Subtarget.getXLenVT();
4233
4234	SDLoc DL(Op);
4235
4236	// Proper support for f16 requires Zvfh. bf16 always requires special
4237	// handling. We need to cast the scalar to integer and create an integer
4238	// build_vector.
4239	if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) \|\| EltVT == MVT::bf16) {
4240	MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16);
4241	SmallVector<SDValue, `16`> NewOps(Op.getNumOperands());
4242	for (unsigned I = `0`, E = Op.getNumOperands(); I != E; ++I) {
4243	SDValue Elem = Op.getOperand(i: I);
4244	if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) \|\|
4245	(EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4246	// Called by LegalizeDAG, we need to use XLenVT operations since we
4247	// can't create illegal types.
4248	if (auto *C = dyn_cast<ConstantFPSDNode>(Val&: Elem)) {
4249	// Manually constant fold so the integer build_vector can be lowered
4250	// better. Waiting for DAGCombine will be too late.
4251	APInt V =
4252	C->getValueAPF().bitcastToAPInt().sext(width: XLenVT.getSizeInBits());
4253	NewOps [I] = DAG.getConstant(Val: V, DL, VT: XLenVT);
4254	} else {
4255	NewOps [I] = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Elem);
4256	}
4257	} else {
4258	// Called by scalar type legalizer, we can use i16.
4259	NewOps [I] = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: I));
4260	}
4261	}
4262	SDValue Res = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: IVT, Ops: NewOps);
4263	return DAG.getBitcast(VT, V: Res);
4264	}
4265
4266	if (ISD::isBuildVectorOfConstantSDNodes(N: Op.getNode()) \|\|
4267	ISD::isBuildVectorOfConstantFPSDNodes(N: Op.getNode()))
4268	return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4269
4270	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4271
4272	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
4273
4274	if (VT.getVectorElementType() == MVT::i1) {
4275	// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4276	// vector type, we have a legal equivalently-sized i8 type, so we can use
4277	// that.
4278	MVT WideVecVT = VT.changeVectorElementType(EltVT: MVT::i8);
4279	SDValue VecZero = DAG.getConstant(Val: `0`, DL, VT: WideVecVT);
4280
4281	SDValue WideVec;
4282	if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
4283	// For a splat, perform a scalar truncate before creating the wider
4284	// vector.
4285	Splat = DAG.getNode(Opcode: ISD::AND, DL, VT: Splat.getValueType(), N1: Splat,
4286	N2: DAG.getConstant(Val: `1`, DL, VT: Splat.getValueType()));
4287	WideVec = DAG.getSplatBuildVector(VT: WideVecVT, DL, Op: Splat);
4288	} else {
4289	SmallVector<SDValue, `8`> Ops(Op ->op_values());
4290	WideVec = DAG.getBuildVector(VT: WideVecVT, DL, Ops);
4291	SDValue VecOne = DAG.getConstant(Val: `1`, DL, VT: WideVecVT);
4292	WideVec = DAG.getNode(Opcode: ISD::AND, DL, VT: WideVecVT, N1: WideVec, N2: VecOne);
4293	}
4294
4295	return DAG.getSetCC(DL, VT, LHS: WideVec, RHS: VecZero, Cond: ISD::SETNE);
4296	}
4297
4298	if (SDValue Splat = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue()) {
4299	if (auto Gather = matchSplatAsGather(SplatVal: Splat, VT, DL, DAG, Subtarget))
4300	return Gather;
4301
4302	// Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4303	// pressure at high LMUL.
4304	if (all_of(Range: Op ->ops().drop_front(),
4305	P: [](const SDUse &U) { return U.get().isUndef(); })) {
4306	unsigned Opc =
4307	VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4308	if (!VT.isFloatingPoint())
4309	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
4310	Splat = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT),
4311	N2: Splat, N3: VL);
4312	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
4313	}
4314
4315	unsigned Opc =
4316	VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4317	if (!VT.isFloatingPoint())
4318	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Splat);
4319	Splat =
4320	DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Splat, N3: VL);
4321	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
4322	}
4323
4324	if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4325	return Res;
4326
4327	// If we're compiling for an exact VLEN value, we can split our work per
4328	// register in the register group.
4329	if (const auto VLen = Subtarget.getRealVLen();
4330	VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4331	MVT ElemVT = VT.getVectorElementType();
4332	unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4333	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4334	MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg);
4335	MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget);
4336	assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4337
4338	// The following semantically builds up a fixed length concat_vector
4339	// of the component build_vectors. We eagerly lower to scalable and
4340	// insert_subvector here to avoid DAG combining it back to a large
4341	// build_vector.
4342	SmallVector<SDValue> BuildVectorOps(Op ->ops());
4343	unsigned NumOpElts = M1VT.getVectorMinNumElements();
4344	SDValue Vec = DAG.getUNDEF(VT: ContainerVT);
4345	for (unsigned i = `0`; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4346	auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(N: i, M: ElemsPerVReg);
4347	SDValue SubBV =
4348	DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL, VT: OneRegVT, Ops: OneVRegOfOps);
4349	SubBV = convertToScalableVector(VT: M1VT, V: SubBV, DAG, Subtarget);
4350	unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4351	Vec = DAG.getInsertSubvector(DL, Vec, SubVec: SubBV, Idx: InsertIdx);
4352	}
4353	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4354	}
4355
4356	// If we're about to resort to vslide1down (or stack usage), pack our
4357	// elements into the widest scalar type we can. This will force a VL/VTYPE
4358	// toggle, but reduces the critical path, the number of vslide1down ops
4359	// required, and possibly enables scalar folds of the values.
4360	if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4361	return Res;
4362
4363	// For m1 vectors, if we have non-undef values in both halves of our vector,
4364	// split the vector into low and high halves, build them separately, then
4365	// use a vselect to combine them. For long vectors, this cuts the critical
4366	// path of the vslide1down sequence in half, and gives us an opportunity
4367	// to special case each half independently. Note that we don't change the
4368	// length of the sub-vectors here, so if both fallback to the generic
4369	// vslide1down path, we should be able to fold the vselect into the final
4370	// vslidedown (for the undef tail) for the first half w/ masking.
4371	unsigned NumElts = VT.getVectorNumElements();
4372	unsigned NumUndefElts =
4373	count_if(Range: Op ->op_values(), P: [](const SDValue &V) { return V.isUndef(); });
4374	unsigned NumDefElts = NumElts - NumUndefElts;
4375	if (NumDefElts >= `8` && NumDefElts > NumElts / `2` &&
4376	ContainerVT.bitsLE(VT: RISCVTargetLowering::getM1VT(VT: ContainerVT))) {
4377	SmallVector<SDValue> SubVecAOps, SubVecBOps;
4378	SmallVector<SDValue> MaskVals;
4379	SDValue UndefElem = DAG.getUNDEF(VT: Op ->getOperand(Num: `0`)->getValueType(ResNo: `0`));
4380	SubVecAOps.reserve(N: NumElts);
4381	SubVecBOps.reserve(N: NumElts);
4382	for (unsigned i = `0`; i < NumElts; i++) {
4383	SDValue Elem = Op ->getOperand(Num: i);
4384	if (i < NumElts / `2`) {
4385	SubVecAOps.push_back(Elt: Elem);
4386	SubVecBOps.push_back(Elt: UndefElem);
4387	} else {
4388	SubVecAOps.push_back(Elt: UndefElem);
4389	SubVecBOps.push_back(Elt: Elem);
4390	}
4391	bool SelectMaskVal = (i < NumElts / `2`);
4392	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
4393	}
4394	assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4395	MaskVals.size() == NumElts);
4396
4397	SDValue SubVecA = DAG.getBuildVector(VT, DL, Ops: SubVecAOps);
4398	SDValue SubVecB = DAG.getBuildVector(VT, DL, Ops: SubVecBOps);
4399	MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts);
4400	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
4401	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: SubVecA, N3: SubVecB);
4402	}
4403
4404	// Cap the cost at a value linear to the number of elements in the vector.
4405	// The default lowering is to use the stack. The vector store + scalar loads
4406	// is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4407	// being (at least) linear in LMUL. As a result, using the vslidedown
4408	// lowering for every element ends up being VLLMUL..*
4409	// TODO: Should we be directly costing the stack alternative? Doing so might
4410	// give us a more accurate upper bound.
4411	InstructionCost LinearBudget = VT.getVectorNumElements() * `2`;
4412
4413	// TODO: unify with TTI getSlideCost.
4414	InstructionCost PerSlideCost = `1`;
4415	switch (RISCVTargetLowering::getLMUL(VT: ContainerVT)) {
4416	default: break;
4417	case RISCVVType::LMUL_2:
4418	PerSlideCost = `2`;
4419	break;
4420	case RISCVVType::LMUL_4:
4421	PerSlideCost = `4`;
4422	break;
4423	case RISCVVType::LMUL_8:
4424	PerSlideCost = `8`;
4425	break;
4426	}
4427
4428	// TODO: Should we be using the build instseq then cost + evaluate scheme
4429	// we use for integer constants here?
4430	unsigned UndefCount = `0`;
4431	for (const SDValue &V : Op ->ops()) {
4432	if (V.isUndef()) {
4433	UndefCount++;
4434	continue;
4435	}
4436	if (UndefCount) {
4437	LinearBudget -= PerSlideCost;
4438	UndefCount = `0`;
4439	}
4440	LinearBudget -= PerSlideCost;
4441	}
4442	if (UndefCount) {
4443	LinearBudget -= PerSlideCost;
4444	}
4445
4446	if (LinearBudget < `0`)
4447	return SDValue ();
4448
4449	assert((!VT.isFloatingPoint() \|\|
4450	VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4451	"Illegal type which will result in reserved encoding");
4452
4453	const unsigned Policy = RISCVVType::TAIL_AGNOSTIC \| RISCVVType::MASK_AGNOSTIC;
4454
4455	SDValue Vec;
4456	UndefCount = `0`;
4457	for (SDValue V : Op ->ops()) {
4458	if (V.isUndef()) {
4459	UndefCount++;
4460	continue;
4461	}
4462
4463	// Start our sequence with a TA splat in the hopes that hardware is able to
4464	// recognize there's no dependency on the prior value of our temporary
4465	// register.
4466	if (!Vec) {
4467	Vec = DAG.getSplatVector(VT, DL, Op: V);
4468	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
4469	UndefCount = `0`;
4470	continue;
4471	}
4472
4473	if (UndefCount) {
4474	const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT());
4475	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Passthru: DAG.getUNDEF(VT: ContainerVT),
4476	Op: Vec, Offset, Mask, VL, Policy);
4477	UndefCount = `0`;
4478	}
4479	auto OpCode =
4480	VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4481	if (!VT.isFloatingPoint())
4482	V = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: V);
4483	Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Vec,
4484	N3: V, N4: Mask, N5: VL);
4485	}
4486	if (UndefCount) {
4487	const SDValue Offset = DAG.getConstant(Val: UndefCount, DL, VT: Subtarget.getXLenVT());
4488	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Passthru: DAG.getUNDEF(VT: ContainerVT),
4489	Op: Vec, Offset, Mask, VL, Policy);
4490	}
4491	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
4492	}
4493
4494	static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4495	SDValue Lo, SDValue Hi, SDValue VL,
4496	SelectionDAG &DAG) {
4497	if (!Passthru)
4498	Passthru = DAG.getUNDEF(VT);
4499	if (isa<ConstantSDNode>(Val: Lo) && isa<ConstantSDNode>(Val: Hi)) {
4500	int32_t LoC = cast<ConstantSDNode>(Val&: Lo)->getSExtValue();
4501	int32_t HiC = cast<ConstantSDNode>(Val&: Hi)->getSExtValue();
4502	// If Hi constant is all the same sign bit as Lo, lower this as a custom
4503	// node in order to try and match RVV vector/scalar instructions.
4504	if ((LoC >> `31`) == HiC)
4505	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4506
4507	// Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4508	// VL. This can temporarily increase VL if VL less than VLMAX.
4509	if (LoC == HiC) {
4510	SDValue NewVL;
4511	if (isa<ConstantSDNode>(Val: VL) && isUInt<`4`>(x: VL ->getAsZExtVal()))
4512	NewVL = DAG.getNode(Opcode: ISD::ADD, DL, VT: VL.getValueType(), N1: VL, N2: VL);
4513	else
4514	NewVL = DAG.getRegister(Reg: RISCV::X0, VT: MVT::i32);
4515	MVT InterVT =
4516	MVT::getVectorVT(VT: MVT::i32, EC: VT.getVectorElementCount() * `2`);
4517	auto InterVec = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterVT,
4518	N1: DAG.getUNDEF(VT: InterVT), N2: Lo, N3: NewVL);
4519	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: InterVec);
4520	}
4521	}
4522
4523	// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4524	if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(i: `0`) == Lo &&
4525	isa<ConstantSDNode>(Val: Hi.getOperand(i: `1`)) &&
4526	Hi.getConstantOperandVal(i: `1`) == `31`)
4527	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4528
4529	// If the hi bits of the splat are undefined, then it's fine to just splat Lo
4530	// even if it might be sign extended.
4531	if (Hi.isUndef())
4532	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Lo, N3: VL);
4533
4534	// Fall back to a stack store and stride x0 vector load.
4535	return DAG.getNode(Opcode: RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, N1: Passthru, N2: Lo,
4536	N3: Hi, N4: VL);
4537	}
4538
4539	// Called by type legalization to handle splat of i64 on RV32.
4540	// FIXME: We can optimize this when the type has sign or zero bits in one
4541	// of the halves.
4542	static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4543	SDValue Scalar, SDValue VL,
4544	SelectionDAG &DAG) {
4545	assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4546	SDValue Lo, Hi;
4547	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Scalar, DL, LoVT: MVT::i32, HiVT: MVT::i32);
4548	return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4549	}
4550
4551	// This function lowers a splat of a scalar operand Splat with the vector
4552	// length VL. It ensures the final sequence is type legal, which is useful when
4553	// lowering a splat after type legalization.
4554	static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4555	MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4556	const RISCVSubtarget &Subtarget) {
4557	bool HasPassthru = Passthru && !Passthru.isUndef();
4558	if (!HasPassthru && !Passthru)
4559	Passthru = DAG.getUNDEF(VT);
4560
4561	MVT EltVT = VT.getVectorElementType();
4562	MVT XLenVT = Subtarget.getXLenVT();
4563
4564	if (VT.isFloatingPoint()) {
4565	if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) \|\|
4566	EltVT == MVT::bf16) {
4567	if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) \|\|
4568	(EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4569	Scalar = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Scalar);
4570	else
4571	Scalar = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Scalar);
4572	MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16);
4573	Passthru = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IVT, Operand: Passthru);
4574	SDValue Splat =
4575	lowerScalarSplat(Passthru, Scalar, VL, VT: IVT, DL, DAG, Subtarget);
4576	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Splat);
4577	}
4578	return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
4579	}
4580
4581	// Simplest case is that the operand needs to be promoted to XLenVT.
4582	if (Scalar.getValueType().bitsLE(VT: XLenVT)) {
4583	// If the operand is a constant, sign extend to increase our chances
4584	// of being able to use a .vi instruction. ANY_EXTEND would become a
4585	// a zero extend and the simm5 check in isel would fail.
4586	// FIXME: Should we ignore the upper bits in isel instead?
4587	unsigned ExtOpc =
4588	isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4589	Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar);
4590	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
4591	}
4592
4593	assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4594	"Unexpected scalar for splat lowering!");
4595
4596	if (isOneConstant(V: VL) && isNullConstant(V: Scalar))
4597	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru,
4598	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: VL);
4599
4600	// Otherwise use the more complicated splatting algorithm.
4601	return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4602	}
4603
4604	// This function lowers an insert of a scalar operand Scalar into lane
4605	// 0 of the vector regardless of the value of VL. The contents of the
4606	// remaining lanes of the result vector are unspecified. VL is assumed
4607	// to be non-zero.
4608	static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
4609	const SDLoc &DL, SelectionDAG &DAG,
4610	const RISCVSubtarget &Subtarget) {
4611	assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4612
4613	const MVT XLenVT = Subtarget.getXLenVT();
4614	SDValue Passthru = DAG.getUNDEF(VT);
4615
4616	if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4617	isNullConstant(V: Scalar.getOperand(i: `1`))) {
4618	SDValue ExtractedVal = Scalar.getOperand(i: `0`);
4619	// The element types must be the same.
4620	if (ExtractedVal.getValueType().getVectorElementType() ==
4621	VT.getVectorElementType()) {
4622	MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4623	MVT ExtractedContainerVT = ExtractedVT;
4624	if (ExtractedContainerVT.isFixedLengthVector()) {
4625	ExtractedContainerVT = getContainerForFixedLengthVector(
4626	DAG, VT: ExtractedContainerVT, Subtarget);
4627	ExtractedVal = convertToScalableVector(VT: ExtractedContainerVT,
4628	V: ExtractedVal, DAG, Subtarget);
4629	}
4630	if (ExtractedContainerVT.bitsLE(VT))
4631	return DAG.getInsertSubvector(DL, Vec: Passthru, SubVec: ExtractedVal, Idx: `0`);
4632	return DAG.getExtractSubvector(DL, VT, Vec: ExtractedVal, Idx: `0`);
4633	}
4634	}
4635
4636	if (VT.isFloatingPoint())
4637	return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, N1: DAG.getUNDEF(VT), N2: Scalar,
4638	N3: VL);
4639
4640	// Avoid the tricky legalization cases by falling back to using the
4641	// splat code which already handles it gracefully.
4642	if (!Scalar.getValueType().bitsLE(VT: XLenVT))
4643	return lowerScalarSplat(Passthru: DAG.getUNDEF(VT), Scalar,
4644	VL: DAG.getConstant(Val: `1`, DL, VT: XLenVT),
4645	VT, DL, DAG, Subtarget);
4646
4647	// If the operand is a constant, sign extend to increase our chances
4648	// of being able to use a .vi instruction. ANY_EXTEND would become a
4649	// a zero extend and the simm5 check in isel would fail.
4650	// FIXME: Should we ignore the upper bits in isel instead?
4651	unsigned ExtOpc =
4652	isa<ConstantSDNode>(Val: Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4653	Scalar = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: Scalar);
4654	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: DAG.getUNDEF(VT), N2: Scalar,
4655	N3: VL);
4656	}
4657
4658	/// If concat_vector(V1,V2) could be folded away to some existing
4659	/// vector source, return it. Note that the source may be larger
4660	/// than the requested concat_vector (i.e. a extract_subvector
4661	/// might be required.)
4662	static SDValue foldConcatVector(SDValue V1, SDValue V2) {
4663	EVT VT = V1.getValueType();
4664	assert(VT == V2.getValueType() && "argument types must match");
4665	// Both input must be extracts.
4666	if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
4667	V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4668	return SDValue ();
4669
4670	// Extracting from the same source.
4671	SDValue Src = V1.getOperand(i: `0`);
4672	if (Src != V2.getOperand(i: `0`) \|\|
4673	VT.isScalableVector() != Src.getValueType().isScalableVector())
4674	return SDValue ();
4675
4676	// The extracts must extract the two halves of the source.
4677	if (V1.getConstantOperandVal(i: `1`) != `0` \|\|
4678	V2.getConstantOperandVal(i: `1`) != VT.getVectorMinNumElements())
4679	return SDValue ();
4680
4681	return Src;
4682	}
4683
4684	// Can this shuffle be performed on exactly one (possibly larger) input?
4685	static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4686
4687	if (V2.isUndef())
4688	return V1;
4689
4690	unsigned NumElts = VT.getVectorNumElements();
4691	// Src needs to have twice the number of elements.
4692	// TODO: Update shuffle lowering to add the extract subvector
4693	if (SDValue Src = foldConcatVector(V1, V2);
4694	Src && Src.getValueType().getVectorNumElements() == (NumElts * `2`))
4695	return Src;
4696
4697	return SDValue ();
4698	}
4699
4700	/// Is this shuffle interleaving contiguous elements from one vector into the
4701	/// even elements and contiguous elements from another vector into the odd
4702	/// elements. \p EvenSrc will contain the element that should be in the first
4703	/// even element. \p OddSrc will contain the element that should be in the first
4704	/// odd element. These can be the first element in a source or the element half
4705	/// way through the source.
4706	static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4707	int &OddSrc, const RISCVSubtarget &Subtarget) {
4708	// We need to be able to widen elements to the next larger integer type or
4709	// use the zip2a instruction at e64.
4710	if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4711	!Subtarget.hasVendorXRivosVizip())
4712	return false;
4713
4714	int Size = Mask.size();
4715	int NumElts = VT.getVectorNumElements();
4716	assert(Size == (int)NumElts && "Unexpected mask size");
4717
4718	SmallVector<unsigned, `2`> StartIndexes;
4719	if (!ShuffleVectorInst::isInterleaveMask(Mask, Factor: `2`, NumInputElts: Size * `2`, StartIndexes))
4720	return false;
4721
4722	EvenSrc = StartIndexes [`0`];
4723	OddSrc = StartIndexes [`1`];
4724
4725	// One source should be low half of first vector.
4726	if (EvenSrc != `0` && OddSrc != `0`)
4727	return false;
4728
4729	// Subvectors will be subtracted from either at the start of the two input
4730	// vectors, or at the start and middle of the first vector if it's an unary
4731	// interleave.
4732	// In both cases, HalfNumElts will be extracted.
4733	// We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4734	// we'll create an illegal extract_subvector.
4735	// FIXME: We could support other values using a slidedown first.
4736	int HalfNumElts = NumElts / `2`;
4737	return ((EvenSrc % HalfNumElts) == `0`) && ((OddSrc % HalfNumElts) == `0`);
4738	}
4739
4740	/// Is this mask representing a masked combination of two slides?
4741	static bool isMaskedSlidePair(ArrayRef<int> Mask,
4742	std::array<std::pair<int, int>, `2`> &SrcInfo) {
4743	if (!llvm::isMaskedSlidePair(Mask, NumElts: Mask.size(), SrcInfo))
4744	return false;
4745
4746	// Avoid matching vselect idioms
4747	if (SrcInfo [`0`].second == `0` && SrcInfo [`1`].second == `0`)
4748	return false;
4749	// Prefer vslideup as the second instruction, and identity
4750	// only as the initial instruction.
4751	if ((SrcInfo [`0`].second > `0` && SrcInfo [`1`].second < `0`) \|\|
4752	SrcInfo [`1`].second == `0`)
4753	std::swap(x&: SrcInfo [`0`], y&: SrcInfo [`1`]);
4754	assert(SrcInfo[`0`].first != -`1` && "Must find one slide");
4755	return true;
4756	}
4757
4758	// Exactly matches the semantics of a previously existing custom matcher
4759	// to allow migration to new matcher without changing output.
4760	static bool isElementRotate(const std::array<std::pair<int, int>, `2`> &SrcInfo,
4761	unsigned NumElts) {
4762	if (SrcInfo [`1`].first == -`1`)
4763	return true;
4764	return SrcInfo [`0`].second < `0` && SrcInfo [`1`].second > `0` &&
4765	SrcInfo [`1`].second - SrcInfo [`0`].second == (int)NumElts;
4766	}
4767
4768	static bool isAlternating(const std::array<std::pair<int, int>, `2`> &SrcInfo,
4769	ArrayRef<int> Mask, unsigned Factor,
4770	bool RequiredPolarity) {
4771	int NumElts = Mask.size();
4772	for (int i = `0`; i != NumElts; ++i) {
4773	int M = Mask [i];
4774	if (M < `0`)
4775	continue;
4776	int Src = M >= NumElts;
4777	int Diff = (int)i - (M % NumElts);
4778	bool C = Src == SrcInfo [`1`].first && Diff == SrcInfo [`1`].second;
4779	assert(C != (Src == SrcInfo[`0`].first && Diff == SrcInfo[`0`].second) &&
4780	"Must match exactly one of the two slides");
4781	if (RequiredPolarity != (C == (i / Factor) % `2`))
4782	return false;
4783	}
4784	return true;
4785	}
4786
4787	/// Given a shuffle which can be represented as a pair of two slides,
4788	/// see if it is a zipeven idiom. Zipeven is:
4789	/// vs2: a0 a1 a2 a3
4790	/// vs1: b0 b1 b2 b3
4791	/// vd: a0 b0 a2 b2
4792	static bool isZipEven(const std::array<std::pair<int, int>, `2`> &SrcInfo,
4793	ArrayRef<int> Mask, unsigned &Factor) {
4794	Factor = SrcInfo [`1`].second;
4795	return SrcInfo [`0`].second == `0` && isPowerOf2_32(Value: Factor) &&
4796	Mask.size() % Factor == `0` &&
4797	isAlternating(SrcInfo, Mask, Factor, RequiredPolarity: true);
4798	}
4799
4800	/// Given a shuffle which can be represented as a pair of two slides,
4801	/// see if it is a zipodd idiom. Zipodd is:
4802	/// vs2: a0 a1 a2 a3
4803	/// vs1: b0 b1 b2 b3
4804	/// vd: a1 b1 a3 b3
4805	/// Note that the operand order is swapped due to the way we canonicalize
4806	/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4807	static bool isZipOdd(const std::array<std::pair<int, int>, `2`> &SrcInfo,
4808	ArrayRef<int> Mask, unsigned &Factor) {
4809	Factor = -SrcInfo [`1`].second;
4810	return SrcInfo [`0`].second == `0` && isPowerOf2_32(Value: Factor) &&
4811	Mask.size() % Factor == `0` &&
4812	isAlternating(SrcInfo, Mask, Factor, RequiredPolarity: false);
4813	}
4814
4815	// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4816	// 2, 4, 8 and the integer type Factor-times larger than VT's
4817	// element type must be a legal element type.
4818	// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4819	// -> [p, q, r, s] (Factor=2, Index=1)
4820	static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT,
4821	SDValue Src, unsigned Factor,
4822	unsigned Index, SelectionDAG &DAG) {
4823	unsigned EltBits = VT.getScalarSizeInBits();
4824	ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4825	MVT WideSrcVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits * Factor),
4826	EC: SrcEC.divideCoefficientBy(RHS: Factor));
4827	MVT ResVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits),
4828	EC: SrcEC.divideCoefficientBy(RHS: Factor));
4829	Src = DAG.getBitcast(VT: WideSrcVT, V: Src);
4830
4831	unsigned Shift = Index * EltBits;
4832	SDValue Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: WideSrcVT, N1: Src,
4833	N2: DAG.getConstant(Val: Shift, DL, VT: WideSrcVT));
4834	Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResVT, Operand: Res);
4835	MVT CastVT = ResVT.changeVectorElementType(EltVT: VT.getVectorElementType());
4836	Res = DAG.getBitcast(VT: CastVT, V: Res);
4837	return DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT), SubVec: Res, Idx: `0`);
4838	}
4839
4840	/// Match a single source shuffle which is an identity except that some
4841	/// particular element is repeated. This can be lowered as a masked
4842	/// vrgather.vi/vx. Note that the two source form of this is handled
4843	/// by the recursive splitting logic and doesn't need special handling.
4844	static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN,
4845	const RISCVSubtarget &Subtarget,
4846	SelectionDAG &DAG) {
4847
4848	SDLoc DL(SVN);
4849	MVT VT = SVN->getSimpleValueType(ResNo: `0`);
4850	SDValue V1 = SVN->getOperand(Num: `0`);
4851	assert(SVN->getOperand(`1`).isUndef());
4852	ArrayRef<int> Mask = SVN->getMask();
4853	const unsigned NumElts = VT.getVectorNumElements();
4854	MVT XLenVT = Subtarget.getXLenVT();
4855
4856	std::optional<int> SplatIdx;
4857	for (auto [I, M] : enumerate(First&: Mask)) {
4858	if (M == -`1` \|\| I == (unsigned)M)
4859	continue;
4860	if (SplatIdx && *SplatIdx != M)
4861	return SDValue ();
4862	SplatIdx = M;
4863	}
4864
4865	if (!SplatIdx)
4866	return SDValue ();
4867
4868	SmallVector<SDValue> MaskVals;
4869	for (int MaskIndex : Mask) {
4870	bool SelectMaskVal = MaskIndex == *SplatIdx;
4871	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
4872	}
4873	assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4874	MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts);
4875	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
4876	SDValue Splat = DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT),
4877	Mask: SmallVector<int>(NumElts, *SplatIdx));
4878	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: Splat, N3: V1);
4879	}
4880
4881	// Lower the following shuffle to vslidedown.
4882	// a)
4883	// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4884	// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4885	// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4886	// b)
4887	// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4888	// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4889	// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4890	// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4891	// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4892	// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4893	static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
4894	SDValue V1, SDValue V2,
4895	ArrayRef<int> Mask,
4896	const RISCVSubtarget &Subtarget,
4897	SelectionDAG &DAG) {
4898	auto findNonEXTRACT_SUBVECTORParent =
4899	[](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4900	uint64_t Offset = `0`;
4901	while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4902	// EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4903	// a scalable vector. But we don't want to match the case.
4904	Parent.getOperand(i: `0`).getSimpleValueType().isFixedLengthVector()) {
4905	Offset += Parent.getConstantOperandVal(i: `1`);
4906	Parent = Parent.getOperand(i: `0`);
4907	}
4908	return std::make_pair(x&: Parent, y&: Offset);
4909	};
4910
4911	auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent (V1);
4912	auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent (V2);
4913
4914	// Extracting from the same source.
4915	SDValue Src = V1Src;
4916	if (Src != V2Src)
4917	return SDValue ();
4918
4919	// Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4920	SmallVector<int, `16`> NewMask(Mask);
4921	for (size_t i = `0`; i != NewMask.size(); ++i) {
4922	if (NewMask [i] == -`1`)
4923	continue;
4924
4925	if (static_cast<size_t>(NewMask [i]) < NewMask.size()) {
4926	NewMask [i] = NewMask [i] + V1IndexOffset;
4927	} else {
4928	// Minus NewMask.size() is needed. Otherwise, the b case would be
4929	// <5,6,7,12> instead of <5,6,7,8>.
4930	NewMask [i] = NewMask [i] - NewMask.size() + V2IndexOffset;
4931	}
4932	}
4933
4934	// First index must be known and non-zero. It will be used as the slidedown
4935	// amount.
4936	if (NewMask [`0`] <= `0`)
4937	return SDValue ();
4938
4939	// NewMask is also continuous.
4940	for (unsigned i = `1`; i != NewMask.size(); ++i)
4941	if (NewMask [i - `1`] + `1` != NewMask [i])
4942	return SDValue ();
4943
4944	MVT XLenVT = Subtarget.getXLenVT();
4945	MVT SrcVT = Src.getSimpleValueType();
4946	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
4947	auto [TrueMask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
4948	SDValue Slidedown =
4949	getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Passthru: DAG.getUNDEF(VT: ContainerVT),
4950	Op: convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget),
4951	Offset: DAG.getConstant(Val: NewMask [`0`], DL, VT: XLenVT), Mask: TrueMask, VL);
4952	return DAG.getExtractSubvector(
4953	DL, VT, Vec: convertFromScalableVector(VT: SrcVT, V: Slidedown, DAG, Subtarget), Idx: `0`);
4954	}
4955
4956	// Because vslideup leaves the destination elements at the start intact, we can
4957	// use it to perform shuffles that insert subvectors:
4958	//
4959	// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4960	// ->
4961	// vsetvli zero, 8, e8, mf2, ta, ma
4962	// vslideup.vi v8, v9, 4
4963	//
4964	// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4965	// ->
4966	// vsetvli zero, 5, e8, mf2, tu, ma
4967	// vslideup.v1 v8, v9, 2
4968	static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
4969	SDValue V1, SDValue V2,
4970	ArrayRef<int> Mask,
4971	const RISCVSubtarget &Subtarget,
4972	SelectionDAG &DAG) {
4973	unsigned NumElts = VT.getVectorNumElements();
4974	int NumSubElts, Index;
4975	if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumSrcElts: NumElts, NumSubElts,
4976	Index))
4977	return SDValue ();
4978
4979	bool OpsSwapped = Mask [Index] < (int)NumElts;
4980	SDValue InPlace = OpsSwapped ? V2 : V1;
4981	SDValue ToInsert = OpsSwapped ? V1 : V2;
4982
4983	MVT XLenVT = Subtarget.getXLenVT();
4984	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4985	auto TrueMask = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).first;
4986	// We slide up by the index that the subvector is being inserted at, and set
4987	// VL to the index + the number of elements being inserted.
4988	unsigned Policy =
4989	RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED \| RISCVVType::MASK_AGNOSTIC;
4990	// If the we're adding a suffix to the in place vector, i.e. inserting right
4991	// up to the very end of it, then we don't actually care about the tail.
4992	if (NumSubElts + Index >= (int)NumElts)
4993	Policy \|= RISCVVType::TAIL_AGNOSTIC;
4994
4995	InPlace = convertToScalableVector(VT: ContainerVT, V: InPlace, DAG, Subtarget);
4996	ToInsert = convertToScalableVector(VT: ContainerVT, V: ToInsert, DAG, Subtarget);
4997	SDValue VL = DAG.getConstant(Val: NumSubElts + Index, DL, VT: XLenVT);
4998
4999	SDValue Res;
5000	// If we're inserting into the lowest elements, use a tail undisturbed
5001	// vmv.v.v.
5002	if (Index == `0`)
5003	Res = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: InPlace, N2: ToInsert,
5004	N3: VL);
5005	else
5006	Res = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Passthru: InPlace, Op: ToInsert,
5007	Offset: DAG.getConstant(Val: Index, DL, VT: XLenVT), Mask: TrueMask, VL, Policy);
5008	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
5009	}
5010
5011	/// Match v(f)slide1up/down idioms. These operations involve sliding
5012	/// N-1 elements to make room for an inserted scalar at one end.
5013	static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
5014	SDValue V1, SDValue V2,
5015	ArrayRef<int> Mask,
5016	const RISCVSubtarget &Subtarget,
5017	SelectionDAG &DAG) {
5018	bool OpsSwapped = false;
5019	if (!isa<BuildVectorSDNode>(Val: V1)) {
5020	if (!isa<BuildVectorSDNode>(Val: V2))
5021	return SDValue ();
5022	std::swap(a&: V1, b&: V2);
5023	OpsSwapped = true;
5024	}
5025	SDValue Splat = cast<BuildVectorSDNode>(Val&: V1)->getSplatValue();
5026	if (!Splat)
5027	return SDValue ();
5028
5029	// Return true if the mask could describe a slide of Mask.size() - 1
5030	// elements from concat_vector(V1, V2)[Base:] to [Offset:].
5031	auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5032	const unsigned S = (Offset > `0`) ? `0` : -Offset;
5033	const unsigned E = Mask.size() - ((Offset > `0`) ? Offset : `0`);
5034	for (unsigned i = S; i != E; ++i)
5035	if (Mask [i] >= `0` && (unsigned)Mask [i] != Base + i + Offset)
5036	return false;
5037	return true;
5038	};
5039
5040	const unsigned NumElts = VT.getVectorNumElements();
5041	bool IsVSlidedown = isSlideMask (Mask, OpsSwapped ? `0` : NumElts, `1`);
5042	if (!IsVSlidedown && !isSlideMask (Mask, OpsSwapped ? `0` : NumElts, -`1`))
5043	return SDValue ();
5044
5045	const int InsertIdx = Mask [IsVSlidedown ? (NumElts - `1`) : `0`];
5046	// Inserted lane must come from splat, undef scalar is legal but not profitable.
5047	if (InsertIdx < `0` \|\| InsertIdx / NumElts != (unsigned)OpsSwapped)
5048	return SDValue ();
5049
5050	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5051	auto [TrueMask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
5052
5053	// zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5054	// vslide1{down,up}.vx instead.
5055	if (VT.getVectorElementType() == MVT::bf16 \|\|
5056	(VT.getVectorElementType() == MVT::f16 &&
5057	!Subtarget.hasVInstructionsF16())) {
5058	MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5059	Splat =
5060	DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: Subtarget.getXLenVT(), Operand: Splat);
5061	V2 = DAG.getBitcast(
5062	VT: IntVT, V: convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget));
5063	SDValue Vec = DAG.getNode(
5064	Opcode: IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5065	VT: IntVT, N1: DAG.getUNDEF(VT: IntVT), N2: V2, N3: Splat, N4: TrueMask, N5: VL);
5066	Vec = DAG.getBitcast(VT: ContainerVT, V: Vec);
5067	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
5068	}
5069
5070	auto OpCode = IsVSlidedown ?
5071	(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5072	(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5073	if (!VT.isFloatingPoint())
5074	Splat = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Splat);
5075	auto Vec = DAG.getNode(Opcode: OpCode, DL, VT: ContainerVT,
5076	N1: DAG.getUNDEF(VT: ContainerVT),
5077	N2: convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget),
5078	N3: Splat, N4: TrueMask, N5: VL);
5079	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
5080	}
5081
5082	// Match a mask which "spreads" the leading elements of a vector evenly
5083	// across the result. Factor is the spread amount, and Index is the
5084	// offset applied. (on success, Index < Factor) This is the inverse
5085	// of a deinterleave with the same Factor and Index. This is analogous
5086	// to an interleave, except that all but one lane is undef.
5087	static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
5088	SmallVector<bool> LaneIsUndef(Factor, true);
5089	for (unsigned i = `0`; i < Mask.size(); i++)
5090	LaneIsUndef [i % Factor] &= (Mask [i] == -`1`);
5091
5092	bool Found = false;
5093	for (unsigned i = `0`; i < Factor; i++) {
5094	if (LaneIsUndef [i])
5095	continue;
5096	if (Found)
5097	return false;
5098	Index = i;
5099	Found = true;
5100	}
5101	if (!Found)
5102	return false;
5103
5104	for (unsigned i = `0`; i < Mask.size() / Factor; i++) {
5105	unsigned j = i * Factor + Index;
5106	if (Mask [j] != -`1` && (unsigned)Mask [j] != i)
5107	return false;
5108	}
5109	return true;
5110	}
5111
5112	static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5113	const SDLoc &DL, SelectionDAG &DAG,
5114	const RISCVSubtarget &Subtarget) {
5115	assert(RISCVISD::RI_VZIPEVEN_VL == Opc \|\| RISCVISD::RI_VZIPODD_VL == Opc \|\|
5116	RISCVISD::RI_VZIP2A_VL == Opc \|\| RISCVISD::RI_VZIP2B_VL == Opc \|\|
5117	RISCVISD::RI_VUNZIP2A_VL == Opc \|\| RISCVISD::RI_VUNZIP2B_VL == Opc);
5118	assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
5119
5120	MVT VT = Op0.getSimpleValueType();
5121	MVT IntVT = VT.changeVectorElementTypeToInteger();
5122	Op0 = DAG.getBitcast(VT: IntVT, V: Op0);
5123	Op1 = DAG.getBitcast(VT: IntVT, V: Op1);
5124
5125	MVT ContainerVT = IntVT;
5126	if (VT.isFixedLengthVector()) {
5127	ContainerVT = getContainerForFixedLengthVector(DAG, VT: IntVT, Subtarget);
5128	Op0 = convertToScalableVector(VT: ContainerVT, V: Op0, DAG, Subtarget);
5129	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
5130	}
5131
5132	MVT InnerVT = ContainerVT;
5133	auto [Mask, VL] = getDefaultVLOps(VecVT: IntVT, ContainerVT: InnerVT, DL, DAG, Subtarget);
5134	if (Op1.isUndef() &&
5135	ContainerVT.bitsGT(VT: RISCVTargetLowering::getM1VT(VT: ContainerVT)) &&
5136	(RISCVISD::RI_VUNZIP2A_VL == Opc \|\| RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5137	InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5138	VL = DAG.getConstant(Val: VT.getVectorNumElements() / `2`, DL,
5139	VT: Subtarget.getXLenVT());
5140	Mask = getAllOnesMask(VecVT: InnerVT, VL, DL, DAG);
5141	unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5142	Op1 = DAG.getExtractSubvector(DL, VT: InnerVT, Vec: Op0, Idx: HighIdx);
5143	Op0 = DAG.getExtractSubvector(DL, VT: InnerVT, Vec: Op0, Idx: `0`);
5144	}
5145
5146	SDValue Passthru = DAG.getUNDEF(VT: InnerVT);
5147	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: InnerVT, N1: Op0, N2: Op1, N3: Passthru, N4: Mask, N5: VL);
5148	if (InnerVT.bitsLT(VT: ContainerVT))
5149	Res = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: ContainerVT), SubVec: Res, Idx: `0`);
5150	if (IntVT.isFixedLengthVector())
5151	Res = convertFromScalableVector(VT: IntVT, V: Res, DAG, Subtarget);
5152	Res = DAG.getBitcast(VT, V: Res);
5153	return Res;
5154	}
5155
5156	// Given a vector a, b, c, d return a vector Factor times longer
5157	// with Factor-1 undef's between elements. Ex:
5158	// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5159	// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5160	static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5161	const SDLoc &DL, SelectionDAG &DAG) {
5162
5163	MVT VT = V.getSimpleValueType();
5164	unsigned EltBits = VT.getScalarSizeInBits();
5165	ElementCount EC = VT.getVectorElementCount();
5166	V = DAG.getBitcast(VT: VT.changeTypeToInteger(), V);
5167
5168	MVT WideVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits * Factor), EC);
5169
5170	SDValue Result = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: V);
5171	// TODO: On rv32, the constant becomes a splat_vector_parts which does not
5172	// allow the SHL to fold away if Index is 0.
5173	if (Index != `0`)
5174	Result = DAG.getNode(Opcode: ISD::SHL, DL, VT: WideVT, N1: Result,
5175	N2: DAG.getConstant(Val: EltBits * Index, DL, VT: WideVT));
5176	// Make sure to use original element type
5177	MVT ResultVT = MVT::getVectorVT(VT: VT.getVectorElementType(),
5178	EC: EC.multiplyCoefficientBy(RHS: Factor));
5179	return DAG.getBitcast(VT: ResultVT, V: Result);
5180	}
5181
5182	// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5183	// to create an interleaved vector of <[vscale x] n2 x ty>.*
5184	// This requires that the size of ty is less than the subtarget's maximum ELEN.
5185	static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
5186	const SDLoc &DL, SelectionDAG &DAG,
5187	const RISCVSubtarget &Subtarget) {
5188
5189	// FIXME: Not only does this optimize the code, it fixes some correctness
5190	// issues because MIR does not have freeze.
5191	if (EvenV.isUndef())
5192	return getWideningSpread(V: OddV, Factor: `2`, Index: `1`, DL, DAG);
5193	if (OddV.isUndef())
5194	return getWideningSpread(V: EvenV, Factor: `2`, Index: `0`, DL, DAG);
5195
5196	MVT VecVT = EvenV.getSimpleValueType();
5197	MVT VecContainerVT = VecVT; // <vscale x n x ty>
5198	// Convert fixed vectors to scalable if needed
5199	if (VecContainerVT.isFixedLengthVector()) {
5200	VecContainerVT = getContainerForFixedLengthVector(DAG, VT: VecVT, Subtarget);
5201	EvenV = convertToScalableVector(VT: VecContainerVT, V: EvenV, DAG, Subtarget);
5202	OddV = convertToScalableVector(VT: VecContainerVT, V: OddV, DAG, Subtarget);
5203	}
5204
5205	assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5206
5207	// We're working with a vector of the same size as the resulting
5208	// interleaved vector, but with half the number of elements and
5209	// twice the SEW (Hence the restriction on not using the maximum
5210	// ELEN)
5211	MVT WideVT =
5212	MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VecVT.getScalarSizeInBits() * `2`),
5213	EC: VecVT.getVectorElementCount());
5214	MVT WideContainerVT = WideVT; // <vscale x n x ty2>*
5215	if (WideContainerVT.isFixedLengthVector())
5216	WideContainerVT = getContainerForFixedLengthVector(DAG, VT: WideVT, Subtarget);
5217
5218	// Bitcast the input vectors to integers in case they are FP
5219	VecContainerVT = VecContainerVT.changeTypeToInteger();
5220	EvenV = DAG.getBitcast(VT: VecContainerVT, V: EvenV);
5221	OddV = DAG.getBitcast(VT: VecContainerVT, V: OddV);
5222
5223	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: VecContainerVT, DL, DAG, Subtarget);
5224	SDValue Passthru = DAG.getUNDEF(VT: WideContainerVT);
5225
5226	SDValue Interleaved;
5227	if (Subtarget.hasStdExtZvbb()) {
5228	// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5229	SDValue OffsetVec =
5230	DAG.getConstant(Val: VecVT.getScalarSizeInBits(), DL, VT: VecContainerVT);
5231	Interleaved = DAG.getNode(Opcode: RISCVISD::VWSLL_VL, DL, VT: WideContainerVT, N1: OddV,
5232	N2: OffsetVec, N3: Passthru, N4: Mask, N5: VL);
5233	Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_W_VL, DL, VT: WideContainerVT,
5234	N1: Interleaved, N2: EvenV, N3: Passthru, N4: Mask, N5: VL);
5235	} else {
5236	// FIXME: We should freeze the odd vector here. We already handled the case
5237	// of provably undef/poison above.
5238
5239	// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5240	// vwaddu.vv
5241	Interleaved = DAG.getNode(Opcode: RISCVISD::VWADDU_VL, DL, VT: WideContainerVT, N1: EvenV,
5242	N2: OddV, N3: Passthru, N4: Mask, N5: VL);
5243
5244	// Then get OddV by 2^(VecVT.getScalarSizeInBits() - 1)*
5245	SDValue AllOnesVec = DAG.getSplatVector(
5246	VT: VecContainerVT, DL, Op: DAG.getAllOnesConstant(DL, VT: Subtarget.getXLenVT()));
5247	SDValue OddsMul = DAG.getNode(Opcode: RISCVISD::VWMULU_VL, DL, VT: WideContainerVT,
5248	N1: OddV, N2: AllOnesVec, N3: Passthru, N4: Mask, N5: VL);
5249
5250	// Add the two together so we get
5251	// (OddV 0xff...ff) + (OddV + EvenV)*
5252	// = (OddV 0x100...00) + EvenV*
5253	// = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5254	// Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5255	Interleaved = DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: WideContainerVT,
5256	N1: Interleaved, N2: OddsMul, N3: Passthru, N4: Mask, N5: VL);
5257	}
5258
5259	// Bitcast from <vscale x n ty2> to <vscale x 2n x ty>*
5260	MVT ResultContainerVT = MVT::getVectorVT(
5261	VT: VecVT.getVectorElementType(), // Make sure to use original type
5262	EC: VecContainerVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
5263	Interleaved = DAG.getBitcast(VT: ResultContainerVT, V: Interleaved);
5264
5265	// Convert back to a fixed vector if needed
5266	MVT ResultVT =
5267	MVT::getVectorVT(VT: VecVT.getVectorElementType(),
5268	EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
5269	if (ResultVT.isFixedLengthVector())
5270	Interleaved =
5271	convertFromScalableVector(VT: ResultVT, V: Interleaved, DAG, Subtarget);
5272
5273	return Interleaved;
5274	}
5275
5276	// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5277	// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5278	static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
5279	SelectionDAG &DAG,
5280	const RISCVSubtarget &Subtarget) {
5281	SDLoc DL(SVN);
5282	MVT VT = SVN->getSimpleValueType(ResNo: `0`);
5283	SDValue V = SVN->getOperand(Num: `0`);
5284	unsigned NumElts = VT.getVectorNumElements();
5285
5286	assert(VT.getVectorElementType() == MVT::i1);
5287
5288	if (!ShuffleVectorInst::isReverseMask(Mask: SVN->getMask(),
5289	NumSrcElts: SVN->getMask().size()) \|\|
5290	!SVN->getOperand(Num: `1`).isUndef())
5291	return SDValue ();
5292
5293	unsigned ViaEltSize = std::max(a: (uint64_t)`8`, b: PowerOf2Ceil(A: NumElts));
5294	EVT ViaVT = EVT::getVectorVT(
5295	Context&: DAG.getContext(), VT: EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: ViaEltSize), NumElements: `1`);
5296	EVT ViaBitVT =
5297	EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1, NumElements: ViaVT.getScalarSizeInBits());
5298
5299	// If we don't have zvbb or the larger element type > ELEN, the operation will
5300	// be illegal.
5301	if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(Op: ISD::BITREVERSE,
5302	VT: ViaVT) \|\|
5303	!Subtarget.getTargetLowering()->isTypeLegal(VT: ViaBitVT))
5304	return SDValue ();
5305
5306	// If the bit vector doesn't fit exactly into the larger element type, we need
5307	// to insert it into the larger vector and then shift up the reversed bits
5308	// afterwards to get rid of the gap introduced.
5309	if (ViaEltSize > NumElts)
5310	V = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: ViaBitVT), SubVec: V, Idx: `0`);
5311
5312	SDValue Res =
5313	DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ViaVT, Operand: DAG.getBitcast(VT: ViaVT, V));
5314
5315	// Shift up the reversed bits if the vector didn't exactly fit into the larger
5316	// element type.
5317	if (ViaEltSize > NumElts)
5318	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: ViaVT, N1: Res,
5319	N2: DAG.getConstant(Val: ViaEltSize - NumElts, DL, VT: ViaVT));
5320
5321	Res = DAG.getBitcast(VT: ViaBitVT, V: Res);
5322
5323	if (ViaEltSize > NumElts)
5324	Res = DAG.getExtractSubvector(DL, VT, Vec: Res, Idx: `0`);
5325	return Res;
5326	}
5327
5328	static bool isLegalBitRotate(ArrayRef<int> Mask, EVT VT,
5329	const RISCVSubtarget &Subtarget,
5330	MVT &RotateVT, unsigned &RotateAmt) {
5331	unsigned NumElts = VT.getVectorNumElements();
5332	unsigned EltSizeInBits = VT.getScalarSizeInBits();
5333	unsigned NumSubElts;
5334	if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, MinSubElts: `2`,
5335	MaxSubElts: NumElts, NumSubElts, RotateAmt))
5336	return false;
5337	RotateVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSizeInBits * NumSubElts),
5338	NumElements: NumElts / NumSubElts);
5339
5340	// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5341	return Subtarget.getTargetLowering()->isTypeLegal(VT: RotateVT);
5342	}
5343
5344	// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5345	// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5346	// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5347	static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
5348	SelectionDAG &DAG,
5349	const RISCVSubtarget &Subtarget) {
5350	SDLoc DL(SVN);
5351
5352	EVT VT = SVN->getValueType(ResNo: `0`);
5353	unsigned RotateAmt;
5354	MVT RotateVT;
5355	if (!isLegalBitRotate(Mask: SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5356	return SDValue ();
5357
5358	SDValue Op = DAG.getBitcast(VT: RotateVT, V: SVN->getOperand(Num: `0`));
5359
5360	SDValue Rotate;
5361	// A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5362	// so canonicalize to vrev8.
5363	if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == `8`)
5364	Rotate = DAG.getNode(Opcode: ISD::BSWAP, DL, VT: RotateVT, Operand: Op);
5365	else
5366	Rotate = DAG.getNode(Opcode: ISD::ROTL, DL, VT: RotateVT, N1: Op,
5367	N2: DAG.getConstant(Val: RotateAmt, DL, VT: RotateVT));
5368
5369	return DAG.getBitcast(VT, V: Rotate);
5370	}
5371
5372	// If compiling with an exactly known VLEN, see if we can split a
5373	// shuffle on m2 or larger into a small number of m1 sized shuffles
5374	// which write each destination registers exactly once.
5375	static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
5376	SelectionDAG &DAG,
5377	const RISCVSubtarget &Subtarget) {
5378	SDLoc DL(SVN);
5379	MVT VT = SVN->getSimpleValueType(ResNo: `0`);
5380	SDValue V1 = SVN->getOperand(Num: `0`);
5381	SDValue V2 = SVN->getOperand(Num: `1`);
5382	ArrayRef<int> Mask = SVN->getMask();
5383
5384	// If we don't know exact data layout, not much we can do. If this
5385	// is already m1 or smaller, no point in splitting further.
5386	const auto VLen = Subtarget.getRealVLen();
5387	if (!VLen \|\| VT.getSizeInBits().getFixedValue() <= *VLen)
5388	return SDValue ();
5389
5390	// Avoid picking up bitrotate patterns which we have a linear-in-lmul
5391	// expansion for.
5392	unsigned RotateAmt;
5393	MVT RotateVT;
5394	if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5395	return SDValue ();
5396
5397	MVT ElemVT = VT.getVectorElementType();
5398	unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5399
5400	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5401	MVT OneRegVT = MVT::getVectorVT(VT: ElemVT, NumElements: ElemsPerVReg);
5402	MVT M1VT = getContainerForFixedLengthVector(DAG, VT: OneRegVT, Subtarget);
5403	assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5404	unsigned NumOpElts = M1VT.getVectorMinNumElements();
5405	unsigned NumElts = ContainerVT.getVectorMinNumElements();
5406	unsigned NumOfSrcRegs = NumElts / NumOpElts;
5407	unsigned NumOfDestRegs = NumElts / NumOpElts;
5408	// The following semantically builds up a fixed length concat_vector
5409	// of the component shuffle_vectors. We eagerly lower to scalable here
5410	// to avoid DAG combining it back to a large shuffle_vector again.
5411	V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
5412	V2 = convertToScalableVector(VT: ContainerVT, V: V2, DAG, Subtarget);
5413	SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>
5414	Operands;
5415	processShuffleMasks(
5416	Mask, NumOfSrcRegs, NumOfDestRegs, NumOfUsedRegs: NumOfDestRegs,
5417	NoInputAction: [&]() { Operands.emplace_back(); },
5418	SingleInputAction: [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5419	Operands.emplace_back().emplace_back(Args&: SrcVecIdx, UINT_MAX,
5420	Args: SmallVector<int>(SrcSubMask));
5421	},
5422	ManyInputsAction: [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5423	if (NewReg)
5424	Operands.emplace_back();
5425	Operands.back().emplace_back(Args&: Idx1, Args&: Idx2, Args: SmallVector<int>(SrcSubMask));
5426	});
5427	assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5428	// Note: check that we do not emit too many shuffles here to prevent code
5429	// size explosion.
5430	// TODO: investigate, if it can be improved by extra analysis of the masks to
5431	// check if the code is more profitable.
5432	unsigned NumShuffles = std::accumulate(
5433	first: Operands.begin(), last: Operands.end(), init: `0u`,
5434	binary_op: [&](unsigned N,
5435	ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5436	if (Data.empty())
5437	return N;
5438	N += Data.size();
5439	for (const auto &P : Data) {
5440	unsigned Idx2 = std::get<`1`>(t: P);
5441	ArrayRef<int> Mask = std::get<`2`>(t: P);
5442	if (Idx2 != UINT_MAX)
5443	++N;
5444	else if (ShuffleVectorInst::isIdentityMask(Mask, NumSrcElts: Mask.size()))
5445	--N;
5446	}
5447	return N;
5448	});
5449	if ((NumOfDestRegs > `2` && NumShuffles > NumOfDestRegs) \|\|
5450	(NumOfDestRegs <= `2` && NumShuffles >= `4`))
5451	return SDValue ();
5452	auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5453	SDValue SubVec = DAG.getExtractSubvector(DL, VT: M1VT, Vec: SrcVec, Idx: ExtractIdx);
5454	SubVec = convertFromScalableVector(VT: OneRegVT, V: SubVec, DAG, Subtarget);
5455	return SubVec;
5456	};
5457	auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5458	ArrayRef<int> Mask) {
5459	SDValue SubVec = DAG.getVectorShuffle(VT: OneRegVT, dl: DL, N1: SubVec1, N2: SubVec2, Mask);
5460	return SubVec;
5461	};
5462	SDValue Vec = DAG.getUNDEF(VT: ContainerVT);
5463	for (auto [I, Data] : enumerate(First&: Operands)) {
5464	if (Data.empty())
5465	continue;
5466	SmallDenseMap<unsigned, SDValue, `4`> Values;
5467	for (unsigned I : seq<unsigned>(Size: Data.size())) {
5468	const auto &[Idx1, Idx2, _] = Data [I];
5469	// If the shuffle contains permutation of odd number of elements,
5470	// Idx1 might be used already in the first iteration.
5471	//
5472	// Idx1 = shuffle Idx1, Idx2
5473	// Idx1 = shuffle Idx1, Idx3
5474	SDValue &V = Values.try_emplace(Key: Idx1).first ->getSecond();
5475	if (!V)
5476	V = ExtractValue (Idx1 >= NumOfSrcRegs ? V2 : V1,
5477	(Idx1 % NumOfSrcRegs) * NumOpElts);
5478	if (Idx2 != UINT_MAX) {
5479	SDValue &V = Values.try_emplace(Key: Idx2).first ->getSecond();
5480	if (!V)
5481	V = ExtractValue (Idx2 >= NumOfSrcRegs ? V2 : V1,
5482	(Idx2 % NumOfSrcRegs) * NumOpElts);
5483	}
5484	}
5485	SDValue V;
5486	for (const auto &[Idx1, Idx2, Mask] : Data) {
5487	SDValue V1 = Values.at(Val: Idx1);
5488	SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Val: Idx2);
5489	V = PerformShuffle (V1, V2, Mask);
5490	Values [Idx1] = V;
5491	}
5492
5493	unsigned InsertIdx = I * NumOpElts;
5494	V = convertToScalableVector(VT: M1VT, V, DAG, Subtarget);
5495	Vec = DAG.getInsertSubvector(DL, Vec, SubVec: V, Idx: InsertIdx);
5496	}
5497	return convertFromScalableVector(VT, V: Vec, DAG, Subtarget);
5498	}
5499
5500	// Matches a subset of compress masks with a contiguous prefix of output
5501	// elements. This could be extended to allow gaps by deciding which
5502	// source elements to spuriously demand.
5503	static bool isCompressMask(ArrayRef<int> Mask) {
5504	int Last = -`1`;
5505	bool SawUndef = false;
5506	for (unsigned i = `0`; i < Mask.size(); i++) {
5507	if (Mask [i] == -`1`) {
5508	SawUndef = true;
5509	continue;
5510	}
5511	if (SawUndef)
5512	return false;
5513	if (i > (unsigned)Mask [i])
5514	return false;
5515	if (Mask [i] <= Last)
5516	return false;
5517	Last = Mask [i];
5518	}
5519	return true;
5520	}
5521
5522	/// Given a shuffle where the indices are disjoint between the two sources,
5523	/// e.g.:
5524	///
5525	/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5526	///
5527	/// Merge the two sources into one and do a single source shuffle:
5528	///
5529	/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5530	/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5531	///
5532	/// A vselect will either be merged into a masked instruction or be lowered as a
5533	/// vmerge.vvm, which is cheaper than a vrgather.vv.
5534	static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
5535	SelectionDAG &DAG,
5536	const RISCVSubtarget &Subtarget) {
5537	MVT VT = SVN->getSimpleValueType(ResNo: `0`);
5538	MVT XLenVT = Subtarget.getXLenVT();
5539	SDLoc DL(SVN);
5540
5541	const ArrayRef<int> Mask = SVN->getMask();
5542
5543	// Work out which source each lane will come from.
5544	SmallVector<int, `16`> Srcs(Mask.size(), -`1`);
5545
5546	for (int Idx : Mask) {
5547	if (Idx == -`1`)
5548	continue;
5549	unsigned SrcIdx = Idx % Mask.size();
5550	int Src = (uint32_t)Idx < Mask.size() ? `0` : `1`;
5551	if (Srcs [SrcIdx] == -`1`)
5552	// Mark this source as using this lane.
5553	Srcs [SrcIdx] = Src;
5554	else if (Srcs [SrcIdx] != Src)
5555	// The other source is using this lane: not disjoint.
5556	return SDValue ();
5557	}
5558
5559	SmallVector<SDValue> SelectMaskVals;
5560	for (int Lane : Srcs) {
5561	if (Lane == -`1`)
5562	SelectMaskVals.push_back(Elt: DAG.getUNDEF(VT: XLenVT));
5563	else
5564	SelectMaskVals.push_back(Elt: DAG.getConstant(Val: Lane ? `0` : `1`, DL, VT: XLenVT));
5565	}
5566	MVT MaskVT = VT.changeVectorElementType(EltVT: MVT::i1);
5567	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: SelectMaskVals);
5568	SDValue Select = DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask,
5569	N2: SVN->getOperand(Num: `0`), N3: SVN->getOperand(Num: `1`));
5570
5571	// Move all indices relative to the first source.
5572	SmallVector<int> NewMask(Mask.size());
5573	for (unsigned I = `0`; I < Mask.size(); I++) {
5574	if (Mask [I] == -`1`)
5575	NewMask [I] = -`1`;
5576	else
5577	NewMask [I] = Mask [I] % Mask.size();
5578	}
5579
5580	return DAG.getVectorShuffle(VT, dl: DL, N1: Select, N2: DAG.getUNDEF(VT), Mask: NewMask);
5581	}
5582
5583	/// Is this mask local (i.e. elements only move within their local span), and
5584	/// repeating (that is, the same rearrangement is being done within each span)?
5585	static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5586	// Require a prefix from the original mask until the consumer code
5587	// is adjusted to rewrite the mask instead of just taking a prefix.
5588	for (auto [I, M] : enumerate(First&: Mask)) {
5589	if (M == -`1`)
5590	continue;
5591	if ((M / Span) != (int)(I / Span))
5592	return false;
5593	int SpanIdx = I % Span;
5594	int Expected = M % Span;
5595	if (Mask [SpanIdx] != Expected)
5596	return false;
5597	}
5598	return true;
5599	}
5600
5601	/// Is this mask only using elements from the first span of the input?
5602	static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5603	return all_of(Range&: Mask, P: [&](const auto &Idx) { return Idx == -`1` \|\| Idx < Span; });
5604	}
5605
5606	/// Return true for a mask which performs an arbitrary shuffle within the first
5607	/// span, and then repeats that same result across all remaining spans. Note
5608	/// that this doesn't check if all the inputs come from a single span!
5609	static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5610	// Require a prefix from the original mask until the consumer code
5611	// is adjusted to rewrite the mask instead of just taking a prefix.
5612	for (auto [I, M] : enumerate(First&: Mask)) {
5613	if (M == -`1`)
5614	continue;
5615	int SpanIdx = I % Span;
5616	if (Mask [SpanIdx] != M)
5617	return false;
5618	}
5619	return true;
5620	}
5621
5622	/// Try to widen element type to get a new mask value for a better permutation
5623	/// sequence. This doesn't try to inspect the widened mask for profitability;
5624	/// we speculate the widened form is equal or better. This has the effect of
5625	/// reducing mask constant sizes - allowing cheaper materialization sequences
5626	/// - and index sequence sizes - reducing register pressure and materialization
5627	/// cost, at the cost of (possibly) an extra VTYPE toggle.
5628	static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
5629	SDLoc DL(Op);
5630	MVT VT = Op.getSimpleValueType();
5631	MVT ScalarVT = VT.getVectorElementType();
5632	unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5633	SDValue V0 = Op.getOperand(i: `0`);
5634	SDValue V1 = Op.getOperand(i: `1`);
5635	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
5636
5637	// Avoid wasted work leading to isTypeLegal check failing below
5638	if (ElementSize > `32`)
5639	return SDValue ();
5640
5641	SmallVector<int, `8`> NewMask;
5642	if (!widenShuffleMaskElts(M: Mask, NewMask))
5643	return SDValue ();
5644
5645	MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: ElementSize * `2`)
5646	: MVT::getIntegerVT(BitWidth: ElementSize * `2`);
5647	MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / `2`);
5648	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT))
5649	return SDValue ();
5650	V0 = DAG.getBitcast(VT: NewVT, V: V0);
5651	V1 = DAG.getBitcast(VT: NewVT, V: V1);
5652	return DAG.getBitcast(VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: V0, N2: V1, Mask: NewMask));
5653	}
5654
5655	static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
5656	const RISCVSubtarget &Subtarget) {
5657	SDValue V1 = Op.getOperand(i: `0`);
5658	SDValue V2 = Op.getOperand(i: `1`);
5659	SDLoc DL(Op);
5660	MVT XLenVT = Subtarget.getXLenVT();
5661	MVT VT = Op.getSimpleValueType();
5662	unsigned NumElts = VT.getVectorNumElements();
5663	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode());
5664
5665	if (VT.getVectorElementType() == MVT::i1) {
5666	// Lower to a vror.vi of a larger element type if possible before we promote
5667	// i1s to i8s.
5668	if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5669	return V;
5670	if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5671	return V;
5672
5673	// Promote i1 shuffle to i8 shuffle.
5674	MVT WidenVT = MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount());
5675	V1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V1);
5676	V2 = V2.isUndef() ? DAG.getUNDEF(VT: WidenVT)
5677	: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: V2);
5678	SDValue Shuffled = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: V1, N2: V2, Mask: SVN->getMask());
5679	return DAG.getSetCC(DL, VT, LHS: Shuffled, RHS: DAG.getConstant(Val: `0`, DL, VT: WidenVT),
5680	Cond: ISD::SETNE);
5681	}
5682
5683	MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5684
5685	// Store the return value in a single variable instead of structured bindings
5686	// so that we can pass it to GetSlide below, which cannot capture structured
5687	// bindings until C++20.
5688	auto TrueMaskVL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
5689	auto [TrueMask, VL] = TrueMaskVL;
5690
5691	if (SVN->isSplat()) {
5692	const int Lane = SVN->getSplatIndex();
5693	if (Lane >= `0`) {
5694	MVT SVT = VT.getVectorElementType();
5695
5696	// Turn splatted vector load into a strided load with an X0 stride.
5697	SDValue V = V1;
5698	// Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5699	// with undef.
5700	// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5701	int Offset = Lane;
5702	if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5703	int OpElements =
5704	V.getOperand(i: `0`).getSimpleValueType().getVectorNumElements();
5705	V = V.getOperand(i: Offset / OpElements);
5706	Offset %= OpElements;
5707	}
5708
5709	// We need to ensure the load isn't atomic or volatile.
5710	if (ISD::isNormalLoad(N: V.getNode()) && cast<LoadSDNode>(Val&: V)->isSimple()) {
5711	auto *Ld = cast<LoadSDNode>(Val&: V);
5712	Offset *= SVT.getStoreSize();
5713	SDValue NewAddr = DAG.getMemBasePlusOffset(
5714	Base: Ld->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: Offset), DL);
5715
5716	// If this is SEW=64 on RV32, use a strided load with a stride of x0.
5717	if (SVT.isInteger() && SVT.bitsGT(VT: XLenVT)) {
5718	SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other});
5719	SDValue IntID =
5720	DAG.getTargetConstant(Val: Intrinsic::riscv_vlse, DL, VT: XLenVT);
5721	SDValue Ops[] = {Ld->getChain(),
5722	IntID,
5723	DAG.getUNDEF(VT: ContainerVT),
5724	NewAddr,
5725	DAG.getRegister(Reg: RISCV::X0, VT: XLenVT),
5726	VL};
5727	SDValue NewLoad = DAG.getMemIntrinsicNode(
5728	Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT: SVT,
5729	MMO: DAG.getMachineFunction().getMachineMemOperand(
5730	MMO: Ld->getMemOperand(), Offset, Size: SVT.getStoreSize()));
5731	DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: NewLoad);
5732	return convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
5733	}
5734
5735	MVT SplatVT = ContainerVT;
5736
5737	// f16 with zvfhmin and bf16 need to use an integer scalar load.
5738	if (SVT == MVT::bf16 \|\|
5739	(SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5740	SVT = MVT::i16;
5741	SplatVT = ContainerVT.changeVectorElementType(EltVT: SVT);
5742	}
5743
5744	// Otherwise use a scalar load and splat. This will give the best
5745	// opportunity to fold a splat into the operation. ISel can turn it into
5746	// the x0 strided load if we aren't able to fold away the select.
5747	if (SVT.isFloatingPoint())
5748	V = DAG.getLoad(VT: SVT, dl: DL, Chain: Ld->getChain(), Ptr: NewAddr,
5749	PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset),
5750	Alignment: Ld->getBaseAlign(), MMOFlags: Ld->getMemOperand()->getFlags());
5751	else
5752	V = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT: XLenVT, Chain: Ld->getChain(), Ptr: NewAddr,
5753	PtrInfo: Ld->getPointerInfo().getWithOffset(O: Offset), MemVT: SVT,
5754	Alignment: Ld->getBaseAlign(),
5755	MMOFlags: Ld->getMemOperand()->getFlags());
5756	DAG.makeEquivalentMemoryOrdering(OldLoad: Ld, NewMemOp: V);
5757
5758	unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5759	: RISCVISD::VMV_V_X_VL;
5760	SDValue Splat =
5761	DAG.getNode(Opcode: Opc, DL, VT: SplatVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: V, N3: VL);
5762	Splat = DAG.getBitcast(VT: ContainerVT, V: Splat);
5763	return convertFromScalableVector(VT, V: Splat, DAG, Subtarget);
5764	}
5765
5766	V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
5767	assert(Lane < (int)NumElts && "Unexpected lane!");
5768	SDValue Gather = DAG.getNode(Opcode: RISCVISD::VRGATHER_VX_VL, DL, VT: ContainerVT,
5769	N1: V1, N2: DAG.getConstant(Val: Lane, DL, VT: XLenVT),
5770	N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
5771	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
5772	}
5773	}
5774
5775	// For exact VLEN m2 or greater, try to split to m1 operations if we
5776	// can split cleanly.
5777	if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5778	return V;
5779
5780	ArrayRef<int> Mask = SVN->getMask();
5781
5782	if (SDValue V =
5783	lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5784	return V;
5785
5786	if (SDValue V =
5787	lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5788	return V;
5789
5790	// A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5791	// available.
5792	if (Subtarget.hasStdExtZvkb())
5793	if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5794	return V;
5795
5796	if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts: NumElts) && V2.isUndef() &&
5797	NumElts != `2`)
5798	return DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT, Operand: V1);
5799
5800	// If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5801	// use shift and truncate to perform the shuffle.
5802	// TODO: For Factor=6, we can perform the first step of the deinterleave via
5803	// shift-and-trunc reducing total cost for everything except an mf8 result.
5804	// TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5805	// to do the entire operation.
5806	if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5807	const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5808	assert(MaxFactor == `2` \|\| MaxFactor == `4` \|\| MaxFactor == `8`);
5809	for (unsigned Factor = `2`; Factor <= MaxFactor; Factor <<= `1`) {
5810	unsigned Index = `0`;
5811	if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5812	`1` < count_if(Range&: Mask, P: [](int Idx) { return Idx != -`1`; })) {
5813	if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5814	return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5815	if (`1` < count_if(Range&: Mask,
5816	P: [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5817	`1` < count_if(Range&: Mask, P: [&Mask](int Idx) {
5818	return Idx >= (int)Mask.size();
5819	})) {
5820	// Narrow each source and concatenate them.
5821	// FIXME: For small LMUL it is better to concatenate first.
5822	MVT EltVT = VT.getVectorElementType();
5823	auto EltCnt = VT.getVectorElementCount();
5824	MVT SubVT =
5825	MVT::getVectorVT(VT: EltVT, EC: EltCnt.divideCoefficientBy(RHS: Factor));
5826
5827	SDValue Lo =
5828	getDeinterleaveShiftAndTrunc(DL, VT: SubVT, Src: V1, Factor, Index, DAG);
5829	SDValue Hi =
5830	getDeinterleaveShiftAndTrunc(DL, VT: SubVT, Src: V2, Factor, Index, DAG);
5831
5832	SDValue Concat =
5833	DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL,
5834	VT: SubVT.getDoubleNumVectorElementsVT(), N1: Lo, N2: Hi);
5835	if (Factor == `2`)
5836	return Concat;
5837
5838	SDValue Vec = DAG.getUNDEF(VT);
5839	return DAG.getInsertSubvector(DL, Vec, SubVec: Concat, Idx: `0`);
5840	}
5841	}
5842	}
5843	}
5844
5845	// If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5846	// e64 which can't match above.
5847	unsigned Index = `0`;
5848	if (Subtarget.hasVendorXRivosVizip() &&
5849	ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor: `2`, Index) &&
5850	`1` < count_if(Range&: Mask, P: [](int Idx) { return Idx != -`1`; })) {
5851	unsigned Opc =
5852	Index == `0` ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5853	if (V2.isUndef())
5854	return lowerVZIP(Opc, Op0: V1, Op1: V2, DL, DAG, Subtarget);
5855	if (auto VLEN = Subtarget.getRealVLen();
5856	VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == `0`)
5857	return lowerVZIP(Opc, Op0: V1, Op1: V2, DL, DAG, Subtarget);
5858	if (SDValue Src = foldConcatVector(V1, V2)) {
5859	EVT NewVT = VT.getDoubleNumVectorElementsVT();
5860	Src = DAG.getExtractSubvector(DL, VT: NewVT, Vec: Src, Idx: `0`);
5861	SDValue Res =
5862	lowerVZIP(Opc, Op0: Src, Op1: DAG.getUNDEF(VT: NewVT), DL, DAG, Subtarget);
5863	return DAG.getExtractSubvector(DL, VT, Vec: Res, Idx: `0`);
5864	}
5865	// Deinterleave each source and concatenate them, or concat first, then
5866	// deinterleave.
5867	if (`1` < count_if(Range&: Mask,
5868	P: [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5869	`1` < count_if(Range&: Mask,
5870	P: [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
5871
5872	const unsigned EltSize = VT.getScalarSizeInBits();
5873	const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
5874	if (NumElts < MinVLMAX) {
5875	MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
5876	SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT, N1: V1, N2: V2);
5877	SDValue Res =
5878	lowerVZIP(Opc, Op0: Concat, Op1: DAG.getUNDEF(VT: ConcatVT), DL, DAG, Subtarget);
5879	return DAG.getExtractSubvector(DL, VT, Vec: Res, Idx: `0`);
5880	}
5881
5882	SDValue Lo = lowerVZIP(Opc, Op0: V1, Op1: DAG.getUNDEF(VT), DL, DAG, Subtarget);
5883	SDValue Hi = lowerVZIP(Opc, Op0: V2, Op1: DAG.getUNDEF(VT), DL, DAG, Subtarget);
5884
5885	MVT SubVT = VT.getHalfNumVectorElementsVT();
5886	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT,
5887	N1: DAG.getExtractSubvector(DL, VT: SubVT, Vec: Lo, Idx: `0`),
5888	N2: DAG.getExtractSubvector(DL, VT: SubVT, Vec: Hi, Idx: `0`));
5889	}
5890	}
5891
5892	if (SDValue V =
5893	lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5894	return V;
5895
5896	// Detect an interleave shuffle and lower to
5897	// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5898	int EvenSrc, OddSrc;
5899	if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
5900	!(NumElts == `2` &&
5901	ShuffleVectorInst::isSingleSourceMask(Mask, NumSrcElts: Mask.size()))) {
5902	// Extract the halves of the vectors.
5903	MVT HalfVT = VT.getHalfNumVectorElementsVT();
5904
5905	// Recognize if one half is actually undef; the matching above will
5906	// otherwise reuse the even stream for the undef one. This improves
5907	// spread(2) shuffles.
5908	bool LaneIsUndef[`2`] = { true, true};
5909	for (unsigned i = `0`; i < Mask.size(); i++)
5910	LaneIsUndef[i % `2`] &= (Mask [i] == -`1`);
5911
5912	int Size = Mask.size();
5913	SDValue EvenV, OddV;
5914	if (LaneIsUndef[`0`]) {
5915	EvenV = DAG.getUNDEF(VT: HalfVT);
5916	} else {
5917	assert(EvenSrc >= `0` && "Undef source?");
5918	EvenV = (EvenSrc / Size) == `0` ? V1 : V2;
5919	EvenV = DAG.getExtractSubvector(DL, VT: HalfVT, Vec: EvenV, Idx: EvenSrc % Size);
5920	}
5921
5922	if (LaneIsUndef[`1`]) {
5923	OddV = DAG.getUNDEF(VT: HalfVT);
5924	} else {
5925	assert(OddSrc >= `0` && "Undef source?");
5926	OddV = (OddSrc / Size) == `0` ? V1 : V2;
5927	OddV = DAG.getExtractSubvector(DL, VT: HalfVT, Vec: OddV, Idx: OddSrc % Size);
5928	}
5929
5930	// Prefer vzip2a if available.
5931	// TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
5932	if (Subtarget.hasVendorXRivosVizip()) {
5933	EvenV = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT), SubVec: EvenV, Idx: `0`);
5934	OddV = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT), SubVec: OddV, Idx: `0`);
5935	return lowerVZIP(Opc: RISCVISD::RI_VZIP2A_VL, Op0: EvenV, Op1: OddV, DL, DAG, Subtarget);
5936	}
5937	return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5938	}
5939
5940	// Recognize a pattern which can handled via a pair of vslideup/vslidedown
5941	// instructions (in any combination) with masking on the second instruction.
5942	// Also handles masked slides into an identity source, and single slides
5943	// without masking. Avoid matching bit rotates (which are not also element
5944	// rotates) as slide pairs. This is a performance heuristic, not a
5945	// functional check.
5946	std::array<std::pair<int, int>, `2`> SrcInfo;
5947	unsigned RotateAmt;
5948	MVT RotateVT;
5949	if (::isMaskedSlidePair(Mask, SrcInfo) &&
5950	(isElementRotate(SrcInfo, NumElts) \|\|
5951	!isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
5952	SDValue Sources[`2`];
5953	auto GetSourceFor = [&](const std::pair<int, int> &Info) {
5954	int SrcIdx = Info.first;
5955	assert(SrcIdx == `0` \|\| SrcIdx == `1`);
5956	SDValue &Src = Sources[SrcIdx];
5957	if (!Src) {
5958	SDValue SrcV = SrcIdx == `0` ? V1 : V2;
5959	Src = convertToScalableVector(VT: ContainerVT, V: SrcV, DAG, Subtarget);
5960	}
5961	return Src;
5962	};
5963	auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
5964	SDValue Passthru) {
5965	auto [TrueMask, VL] = TrueMaskVL;
5966	SDValue SrcV = GetSourceFor (Src);
5967	int SlideAmt = Src.second;
5968	if (SlideAmt == `0`) {
5969	// Should never be second operation
5970	assert(Mask == TrueMask);
5971	return SrcV;
5972	}
5973	if (SlideAmt < `0`)
5974	return getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT, Passthru, Op: SrcV,
5975	Offset: DAG.getConstant(Val: -SlideAmt, DL, VT: XLenVT), Mask, VL,
5976	Policy: RISCVVType::TAIL_AGNOSTIC);
5977	return getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Passthru, Op: SrcV,
5978	Offset: DAG.getConstant(Val: SlideAmt, DL, VT: XLenVT), Mask, VL,
5979	Policy: RISCVVType::TAIL_AGNOSTIC);
5980	};
5981
5982	if (SrcInfo [`1`].first == -`1`) {
5983	SDValue Res = DAG.getUNDEF(VT: ContainerVT);
5984	Res = GetSlide (SrcInfo [`0`], TrueMask, Res);
5985	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
5986	}
5987
5988	if (Subtarget.hasVendorXRivosVizip()) {
5989	bool TryWiden = false;
5990	unsigned Factor;
5991	if (isZipEven(SrcInfo, Mask, Factor)) {
5992	if (Factor == `1`) {
5993	SDValue Src1 = SrcInfo [`0`].first == `0` ? V1 : V2;
5994	SDValue Src2 = SrcInfo [`1`].first == `0` ? V1 : V2;
5995	return lowerVZIP(Opc: RISCVISD::RI_VZIPEVEN_VL, Op0: Src1, Op1: Src2, DL, DAG,
5996	Subtarget);
5997	}
5998	TryWiden = true;
5999	}
6000	if (isZipOdd(SrcInfo, Mask, Factor)) {
6001	if (Factor == `1`) {
6002	SDValue Src1 = SrcInfo [`1`].first == `0` ? V1 : V2;
6003	SDValue Src2 = SrcInfo [`0`].first == `0` ? V1 : V2;
6004	return lowerVZIP(Opc: RISCVISD::RI_VZIPODD_VL, Op0: Src1, Op1: Src2, DL, DAG,
6005	Subtarget);
6006	}
6007	TryWiden = true;
6008	}
6009	// If we found a widening oppurtunity which would let us form a
6010	// zipeven or zipodd, use the generic code to widen the shuffle
6011	// and recurse through this logic.
6012	if (TryWiden)
6013	if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6014	return V;
6015	}
6016
6017	// Build the mask. Note that vslideup unconditionally preserves elements
6018	// below the slide amount in the destination, and thus those elements are
6019	// undefined in the mask. If the mask ends up all true (or undef), it
6020	// will be folded away by general logic.
6021	SmallVector<SDValue> MaskVals;
6022	for (unsigned i = `0`; i != Mask.size(); ++i) {
6023	int M = Mask [i];
6024	if (M < `0` \|\| (SrcInfo [`1`].second > `0` && i < (unsigned)SrcInfo [`1`].second)) {
6025	MaskVals.push_back(Elt: DAG.getUNDEF(VT: XLenVT));
6026	continue;
6027	}
6028	int Src = M >= (int)NumElts;
6029	int Diff = (int)i - (M % NumElts);
6030	bool C = Src == SrcInfo [`1`].first && Diff == SrcInfo [`1`].second;
6031	assert(C ^ (Src == SrcInfo[`0`].first && Diff == SrcInfo[`0`].second) &&
6032	"Must match exactly one of the two slides");
6033	MaskVals.push_back(Elt: DAG.getConstant(Val: C, DL, VT: XLenVT));
6034	}
6035	assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6036	MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts);
6037	SDValue SelectMask = convertToScalableVector(
6038	VT: ContainerVT.changeVectorElementType(EltVT: MVT::i1),
6039	V: DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals), DAG, Subtarget);
6040
6041	SDValue Res = DAG.getUNDEF(VT: ContainerVT);
6042	Res = GetSlide (SrcInfo [`0`], TrueMask, Res);
6043	Res = GetSlide (SrcInfo [`1`], SelectMask, Res);
6044	return convertFromScalableVector(VT, V: Res, DAG, Subtarget);
6045	}
6046
6047	// Handle any remaining single source shuffles
6048	assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6049	if (V2.isUndef()) {
6050	// We might be able to express the shuffle as a bitrotate. But even if we
6051	// don't have Zvkb and have to expand, the expanded sequence of approx. 2
6052	// shifts and a vor will have a higher throughput than a vrgather.
6053	if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6054	return V;
6055
6056	if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6057	return V;
6058
6059	// Match a spread(4,8) which can be done via extend and shift. Spread(2)
6060	// is fully covered in interleave(2) above, so it is ignored here.
6061	if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6062	unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6063	assert(MaxFactor == `2` \|\| MaxFactor == `4` \|\| MaxFactor == `8`);
6064	for (unsigned Factor = `4`; Factor <= MaxFactor; Factor <<= `1`) {
6065	unsigned Index;
6066	if (isSpreadMask(Mask, Factor, Index)) {
6067	MVT NarrowVT =
6068	MVT::getVectorVT(VT: VT.getVectorElementType(), NumElements: NumElts / Factor);
6069	SDValue Src = DAG.getExtractSubvector(DL, VT: NarrowVT, Vec: V1, Idx: `0`);
6070	return getWideningSpread(V: Src, Factor, Index, DL, DAG);
6071	}
6072	}
6073	}
6074
6075	// If only a prefix of the source elements influence a prefix of the
6076	// destination elements, try to see if we can reduce the required LMUL
6077	unsigned MinVLen = Subtarget.getRealMinVLen();
6078	unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6079	if (NumElts > MinVLMAX) {
6080	unsigned MaxIdx = `0`;
6081	for (auto [I, M] : enumerate(First&: Mask)) {
6082	if (M == -`1`)
6083	continue;
6084	MaxIdx = std::max(a: std::max(a: (unsigned)I, b: (unsigned)M), b: MaxIdx);
6085	}
6086	unsigned NewNumElts =
6087	std::max(a: (uint64_t)MinVLMAX, b: PowerOf2Ceil(A: MaxIdx + `1`));
6088	if (NewNumElts != NumElts) {
6089	MVT NewVT = MVT::getVectorVT(VT: VT.getVectorElementType(), NumElements: NewNumElts);
6090	V1 = DAG.getExtractSubvector(DL, VT: NewVT, Vec: V1, Idx: `0`);
6091	SDValue Res = DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT: NewVT),
6092	Mask: Mask.take_front(N: NewNumElts));
6093	return DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT), SubVec: Res, Idx: `0`);
6094	}
6095	}
6096
6097	// Before hitting generic lowering fallbacks, try to widen the mask
6098	// to a wider SEW.
6099	if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6100	return V;
6101
6102	// Can we generate a vcompress instead of a vrgather? These scale better
6103	// at high LMUL, at the cost of not being able to fold a following select
6104	// into them. The mask constants are also smaller than the index vector
6105	// constants, and thus easier to materialize.
6106	if (isCompressMask(Mask)) {
6107	SmallVector<SDValue> MaskVals(NumElts,
6108	DAG.getConstant(Val: false, DL, VT: XLenVT));
6109	for (auto Idx : Mask) {
6110	if (Idx == -`1`)
6111	break;
6112	assert(Idx >= `0` && (unsigned)Idx < NumElts);
6113	MaskVals [Idx] = DAG.getConstant(Val: true, DL, VT: XLenVT);
6114	}
6115	MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts);
6116	SDValue CompressMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
6117	return DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL, VT, N1: V1, N2: CompressMask,
6118	N3: DAG.getUNDEF(VT));
6119	}
6120
6121	if (VT.getScalarSizeInBits() == `8` &&
6122	any_of(Range&: Mask, P: [&](const auto &Idx) { return Idx > `255`; })) {
6123	// On such a vector we're unable to use i8 as the index type.
6124	// FIXME: We could promote the index to i16 and use vrgatherei16, but that
6125	// may involve vector splitting if we're already at LMUL=8, or our
6126	// user-supplied maximum fixed-length LMUL.
6127	return SDValue ();
6128	}
6129
6130	// Base case for the two operand recursion below - handle the worst case
6131	// single source shuffle.
6132	unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6133	MVT IndexVT = VT.changeTypeToInteger();
6134	// Since we can't introduce illegal index types at this stage, use i16 and
6135	// vrgatherei16 if the corresponding index type for plain vrgather is greater
6136	// than XLenVT.
6137	if (IndexVT.getScalarType().bitsGT(VT: XLenVT)) {
6138	GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6139	IndexVT = IndexVT.changeVectorElementType(EltVT: MVT::i16);
6140	}
6141
6142	// If the mask allows, we can do all the index computation in 16 bits. This
6143	// requires less work and less register pressure at high LMUL, and creates
6144	// smaller constants which may be cheaper to materialize.
6145	if (IndexVT.getScalarType().bitsGT(VT: MVT::i16) && isUInt<`16`>(x: NumElts - `1`) &&
6146	(IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > `1`) {
6147	GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6148	IndexVT = IndexVT.changeVectorElementType(EltVT: MVT::i16);
6149	}
6150
6151	MVT IndexContainerVT =
6152	ContainerVT.changeVectorElementType(EltVT: IndexVT.getScalarType());
6153
6154	V1 = convertToScalableVector(VT: ContainerVT, V: V1, DAG, Subtarget);
6155	SmallVector<SDValue> GatherIndicesLHS;
6156	for (int MaskIndex : Mask) {
6157	bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= `0`;
6158	GatherIndicesLHS.push_back(Elt: IsLHSIndex
6159	? DAG.getConstant(Val: MaskIndex, DL, VT: XLenVT)
6160	: DAG.getUNDEF(VT: XLenVT));
6161	}
6162	SDValue LHSIndices = DAG.getBuildVector(VT: IndexVT, DL, Ops: GatherIndicesLHS);
6163	LHSIndices =
6164	convertToScalableVector(VT: IndexContainerVT, V: LHSIndices, DAG, Subtarget);
6165	// At m1 and less, there's no point trying any of the high LMUL splitting
6166	// techniques. TODO: Should we reconsider this for DLEN < VLEN?
6167	if (NumElts <= MinVLMAX) {
6168	SDValue Gather = DAG.getNode(Opcode: GatherVVOpc, DL, VT: ContainerVT, N1: V1, N2: LHSIndices,
6169	N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
6170	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
6171	}
6172
6173	const MVT M1VT = RISCVTargetLowering::getM1VT(VT: ContainerVT);
6174	EVT SubIndexVT = M1VT.changeVectorElementType(EltVT: IndexVT.getScalarType());
6175	auto [InnerTrueMask, InnerVL] =
6176	getDefaultScalableVLOps(VecVT: M1VT, DL, DAG, Subtarget);
6177	int N =
6178	ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6179	assert(isPowerOf2_32(N) && N <= `8`);
6180
6181	// If we have a locally repeating mask, then we can reuse the first
6182	// register in the index register group for all registers within the
6183	// source register group. TODO: This generalizes to m2, and m4.
6184	if (isLocalRepeatingShuffle(Mask, Span: MinVLMAX)) {
6185	SDValue SubIndex = DAG.getExtractSubvector(DL, VT: SubIndexVT, Vec: LHSIndices, Idx: `0`);
6186	SDValue Gather = DAG.getUNDEF(VT: ContainerVT);
6187	for (int i = `0`; i < N; i++) {
6188	unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6189	SDValue SubV1 = DAG.getExtractSubvector(DL, VT: M1VT, Vec: V1, Idx: SubIdx);
6190	SDValue SubVec =
6191	DAG.getNode(Opcode: GatherVVOpc, DL, VT: M1VT, N1: SubV1, N2: SubIndex,
6192	N3: DAG.getUNDEF(VT: M1VT), N4: InnerTrueMask, N5: InnerVL);
6193	Gather = DAG.getInsertSubvector(DL, Vec: Gather, SubVec, Idx: SubIdx);
6194	}
6195	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
6196	}
6197
6198	// If we have a shuffle which only uses the first register in our source
6199	// register group, and repeats the same index across all spans, we can
6200	// use a single vrgather (and possibly some register moves).
6201	// TODO: This can be generalized for m2 or m4, or for any shuffle for
6202	// which we can do a linear number of shuffles to form an m1 which
6203	// contains all the output elements.
6204	if (isLowSourceShuffle(Mask, Span: MinVLMAX) &&
6205	isSpanSplatShuffle(Mask, Span: MinVLMAX)) {
6206	SDValue SubV1 = DAG.getExtractSubvector(DL, VT: M1VT, Vec: V1, Idx: `0`);
6207	SDValue SubIndex = DAG.getExtractSubvector(DL, VT: SubIndexVT, Vec: LHSIndices, Idx: `0`);
6208	SDValue SubVec = DAG.getNode(Opcode: GatherVVOpc, DL, VT: M1VT, N1: SubV1, N2: SubIndex,
6209	N3: DAG.getUNDEF(VT: M1VT), N4: InnerTrueMask, N5: InnerVL);
6210	SDValue Gather = DAG.getUNDEF(VT: ContainerVT);
6211	for (int i = `0`; i < N; i++)
6212	Gather = DAG.getInsertSubvector(DL, Vec: Gather, SubVec,
6213	Idx: M1VT.getVectorMinNumElements() * i);
6214	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
6215	}
6216
6217	// If we have a shuffle which only uses the first register in our
6218	// source register group, we can do a linear number of m1 vrgathers
6219	// reusing the same source register (but with different indices)
6220	// TODO: This can be generalized for m2 or m4, or for any shuffle
6221	// for which we can do a vslidedown followed by this expansion.
6222	if (isLowSourceShuffle(Mask, Span: MinVLMAX)) {
6223	SDValue SlideAmt =
6224	DAG.getElementCount(DL, VT: XLenVT, EC: M1VT.getVectorElementCount());
6225	SDValue SubV1 = DAG.getExtractSubvector(DL, VT: M1VT, Vec: V1, Idx: `0`);
6226	SDValue Gather = DAG.getUNDEF(VT: ContainerVT);
6227	for (int i = `0`; i < N; i++) {
6228	if (i != `0`)
6229	LHSIndices = getVSlidedown(DAG, Subtarget, DL, VT: IndexContainerVT,
6230	Passthru: DAG.getUNDEF(VT: IndexContainerVT), Op: LHSIndices,
6231	Offset: SlideAmt, Mask: TrueMask, VL);
6232	SDValue SubIndex =
6233	DAG.getExtractSubvector(DL, VT: SubIndexVT, Vec: LHSIndices, Idx: `0`);
6234	SDValue SubVec =
6235	DAG.getNode(Opcode: GatherVVOpc, DL, VT: M1VT, N1: SubV1, N2: SubIndex,
6236	N3: DAG.getUNDEF(VT: M1VT), N4: InnerTrueMask, N5: InnerVL);
6237	Gather = DAG.getInsertSubvector(DL, Vec: Gather, SubVec,
6238	Idx: M1VT.getVectorMinNumElements() * i);
6239	}
6240	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
6241	}
6242
6243	// Fallback to generic vrgather if we can't find anything better.
6244	// On many machines, this will be O(LMUL^2)
6245	SDValue Gather = DAG.getNode(Opcode: GatherVVOpc, DL, VT: ContainerVT, N1: V1, N2: LHSIndices,
6246	N3: DAG.getUNDEF(VT: ContainerVT), N4: TrueMask, N5: VL);
6247	return convertFromScalableVector(VT, V: Gather, DAG, Subtarget);
6248	}
6249
6250	// As a backup, shuffles can be lowered via a vrgather instruction, possibly
6251	// merged with a second vrgather.
6252	SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6253
6254	// Now construct the mask that will be used by the blended vrgather operation.
6255	// Construct the appropriate indices into each vector.
6256	for (int MaskIndex : Mask) {
6257	bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6258	ShuffleMaskLHS.push_back(Elt: IsLHSOrUndefIndex && MaskIndex >= `0`
6259	? MaskIndex : -`1`);
6260	ShuffleMaskRHS.push_back(Elt: IsLHSOrUndefIndex ? -`1` : (MaskIndex - NumElts));
6261	}
6262
6263	// If the mask indices are disjoint between the two sources, we can lower it
6264	// as a vselect + a single source vrgather.vv. Don't do this if we think the
6265	// operands may end up being lowered to something cheaper than a vrgather.vv.
6266	if (!DAG.isSplatValue(V: V2) && !DAG.isSplatValue(V: V1) &&
6267	!ShuffleVectorSDNode::isSplatMask(Mask: ShuffleMaskLHS) &&
6268	!ShuffleVectorSDNode::isSplatMask(Mask: ShuffleMaskRHS) &&
6269	!ShuffleVectorInst::isIdentityMask(Mask: ShuffleMaskLHS, NumSrcElts: NumElts) &&
6270	!ShuffleVectorInst::isIdentityMask(Mask: ShuffleMaskRHS, NumSrcElts: NumElts))
6271	if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6272	return V;
6273
6274	// Before hitting generic lowering fallbacks, try to widen the mask
6275	// to a wider SEW.
6276	if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6277	return V;
6278
6279	// Try to pick a profitable operand order.
6280	bool SwapOps = DAG.isSplatValue(V: V2) && !DAG.isSplatValue(V: V1);
6281	SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(Mask: ShuffleMaskRHS, NumSrcElts: NumElts);
6282
6283	// Recursively invoke lowering for each operand if we had two
6284	// independent single source shuffles, and then combine the result via a
6285	// vselect. Note that the vselect will likely be folded back into the
6286	// second permute (vrgather, or other) by the post-isel combine.
6287	V1 = DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskLHS);
6288	V2 = DAG.getVectorShuffle(VT, dl: DL, N1: V2, N2: DAG.getUNDEF(VT), Mask: ShuffleMaskRHS);
6289
6290	SmallVector<SDValue> MaskVals;
6291	for (int MaskIndex : Mask) {
6292	bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6293	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
6294	}
6295
6296	assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6297	MVT MaskVT = MVT::getVectorVT(VT: MVT::i1, NumElements: NumElts);
6298	SDValue SelectMask = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
6299
6300	if (SwapOps)
6301	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V1, N3: V2);
6302	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectMask, N2: V2, N3: V1);
6303	}
6304
6305	bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
6306	// Only support legal VTs for other shuffles for now.
6307	if (!isTypeLegal(VT))
6308	return false;
6309
6310	// Support splats for any type. These should type legalize well.
6311	if (ShuffleVectorSDNode::isSplatMask(Mask: M))
6312	return true;
6313
6314	const unsigned NumElts = M.size();
6315	MVT SVT = VT.getSimpleVT();
6316
6317	// Not for i1 vectors.
6318	if (SVT.getScalarType() == MVT::i1)
6319	return false;
6320
6321	std::array<std::pair<int, int>, `2`> SrcInfo;
6322	int Dummy1, Dummy2;
6323	return ShuffleVectorInst::isReverseMask(Mask: M, NumSrcElts: NumElts) \|\|
6324	(::isMaskedSlidePair(Mask: M, SrcInfo) &&
6325	isElementRotate(SrcInfo, NumElts)) \|\|
6326	isInterleaveShuffle(Mask: M, VT: SVT, EvenSrc&: Dummy1, OddSrc&: Dummy2, Subtarget);
6327	}
6328
6329	// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6330	// the exponent.
6331	SDValue
6332	RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6333	SelectionDAG &DAG) const {
6334	MVT VT = Op.getSimpleValueType();
6335	unsigned EltSize = VT.getScalarSizeInBits();
6336	SDValue Src = Op.getOperand(i: `0`);
6337	SDLoc DL(Op);
6338	MVT ContainerVT = VT;
6339
6340	SDValue Mask, VL;
6341	if (Op ->isVPOpcode()) {
6342	Mask = Op.getOperand(i: `1`);
6343	if (VT.isFixedLengthVector())
6344	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
6345	Subtarget);
6346	VL = Op.getOperand(i: `2`);
6347	}
6348
6349	// We choose FP type that can represent the value if possible. Otherwise, we
6350	// use rounding to zero conversion for correct exponent of the result.
6351	// TODO: Use f16 for i8 when possible?
6352	MVT FloatEltVT = (EltSize >= `32`) ? MVT::f64 : MVT::f32;
6353	if (!isTypeLegal(VT: MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount())))
6354	FloatEltVT = MVT::f32;
6355	MVT FloatVT = MVT::getVectorVT(VT: FloatEltVT, EC: VT.getVectorElementCount());
6356
6357	// Legal types should have been checked in the RISCVTargetLowering
6358	// constructor.
6359	// TODO: Splitting may make sense in some cases.
6360	assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6361	"Expected legal float type!");
6362
6363	// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6364	// The trailing zero count is equal to log2 of this single bit value.
6365	if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6366	SDValue Neg = DAG.getNegative(Val: Src, DL, VT);
6367	Src = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: Neg);
6368	} else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6369	SDValue Neg = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT),
6370	N2: Src, N3: Mask, N4: VL);
6371	Src = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Src, N2: Neg, N3: Mask, N4: VL);
6372	}
6373
6374	// We have a legal FP type, convert to it.
6375	SDValue FloatVal;
6376	if (FloatVT.bitsGT(VT)) {
6377	if (Op ->isVPOpcode())
6378	FloatVal = DAG.getNode(Opcode: ISD::VP_UINT_TO_FP, DL, VT: FloatVT, N1: Src, N2: Mask, N3: VL);
6379	else
6380	FloatVal = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVT, Operand: Src);
6381	} else {
6382	// Use RTZ to avoid rounding influencing exponent of FloatVal.
6383	if (VT.isFixedLengthVector()) {
6384	ContainerVT = getContainerForFixedLengthVector(VT);
6385	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
6386	}
6387	if (!Op ->isVPOpcode())
6388	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
6389	SDValue RTZRM =
6390	DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: Subtarget.getXLenVT());
6391	MVT ContainerFloatVT =
6392	MVT::getVectorVT(VT: FloatEltVT, EC: ContainerVT.getVectorElementCount());
6393	FloatVal = DAG.getNode(Opcode: RISCVISD::VFCVT_RM_F_XU_VL, DL, VT: ContainerFloatVT,
6394	N1: Src, N2: Mask, N3: RTZRM, N4: VL);
6395	if (VT.isFixedLengthVector())
6396	FloatVal = convertFromScalableVector(VT: FloatVT, V: FloatVal, DAG, Subtarget);
6397	}
6398	// Bitcast to integer and shift the exponent to the LSB.
6399	EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6400	SDValue Bitcast = DAG.getBitcast(VT: IntVT, V: FloatVal);
6401	unsigned ShiftAmt = FloatEltVT == MVT::f64 ? `52` : `23`;
6402
6403	SDValue Exp;
6404	// Restore back to original type. Truncation after SRL is to generate vnsrl.
6405	if (Op ->isVPOpcode()) {
6406	Exp = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT: IntVT, N1: Bitcast,
6407	N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT), N3: Mask, N4: VL);
6408	Exp = DAG.getVPZExtOrTrunc(DL, VT, Op: Exp, Mask, EVL: VL);
6409	} else {
6410	Exp = DAG.getNode(Opcode: ISD::SRL, DL, VT: IntVT, N1: Bitcast,
6411	N2: DAG.getConstant(Val: ShiftAmt, DL, VT: IntVT));
6412	if (IntVT.bitsLT(VT))
6413	Exp = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Exp);
6414	else if (IntVT.bitsGT(VT))
6415	Exp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Exp);
6416	}
6417
6418	// The exponent contains log2 of the value in biased form.
6419	unsigned ExponentBias = FloatEltVT == MVT::f64 ? `1023` : `127`;
6420	// For trailing zeros, we just need to subtract the bias.
6421	if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6422	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Exp,
6423	N2: DAG.getConstant(Val: ExponentBias, DL, VT));
6424	if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6425	return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Exp,
6426	N2: DAG.getConstant(Val: ExponentBias, DL, VT), N3: Mask, N4: VL);
6427
6428	// For leading zeros, we need to remove the bias and convert from log2 to
6429	// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6430	unsigned Adjust = ExponentBias + (EltSize - `1`);
6431	SDValue Res;
6432	if (Op ->isVPOpcode())
6433	Res = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp,
6434	N3: Mask, N4: VL);
6435	else
6436	Res = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: Adjust, DL, VT), N2: Exp);
6437
6438	// The above result with zero input equals to Adjust which is greater than
6439	// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6440	if (Op.getOpcode() == ISD::CTLZ)
6441	Res = DAG.getNode(Opcode: ISD::UMIN, DL, VT, N1: Res, N2: DAG.getConstant(Val: EltSize, DL, VT));
6442	else if (Op.getOpcode() == ISD::VP_CTLZ)
6443	Res = DAG.getNode(Opcode: ISD::VP_UMIN, DL, VT, N1: Res,
6444	N2: DAG.getConstant(Val: EltSize, DL, VT), N3: Mask, N4: VL);
6445	return Res;
6446	}
6447
6448	SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6449	SelectionDAG &DAG) const {
6450	SDLoc DL(Op);
6451	MVT XLenVT = Subtarget.getXLenVT();
6452	SDValue Source = Op ->getOperand(Num: `0`);
6453	MVT SrcVT = Source.getSimpleValueType();
6454	SDValue Mask = Op ->getOperand(Num: `1`);
6455	SDValue EVL = Op ->getOperand(Num: `2`);
6456
6457	if (SrcVT.isFixedLengthVector()) {
6458	MVT ContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
6459	Source = convertToScalableVector(VT: ContainerVT, V: Source, DAG, Subtarget);
6460	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
6461	Subtarget);
6462	SrcVT = ContainerVT;
6463	}
6464
6465	// Convert to boolean vector.
6466	if (SrcVT.getScalarType() != MVT::i1) {
6467	SDValue AllZero = DAG.getConstant(Val: `0`, DL, VT: SrcVT);
6468	SrcVT = MVT::getVectorVT(VT: MVT::i1, EC: SrcVT.getVectorElementCount());
6469	Source = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: SrcVT,
6470	Ops: {Source, AllZero, DAG.getCondCode(Cond: ISD::SETNE),
6471	DAG.getUNDEF(VT: SrcVT), Mask, EVL});
6472	}
6473
6474	SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Source, N2: Mask, N3: EVL);
6475	if (Op ->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6476	// In this case, we can interpret poison as -1, so nothing to do further.
6477	return Res;
6478
6479	// Convert -1 to VL.
6480	SDValue SetCC =
6481	DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETLT);
6482	Res = DAG.getSelect(DL, VT: XLenVT, Cond: SetCC, LHS: EVL, RHS: Res);
6483	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res);
6484	}
6485
6486	// While RVV has alignment restrictions, we should always be able to load as a
6487	// legal equivalently-sized byte-typed vector instead. This method is
6488	// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6489	// the load is already correctly-aligned, it returns SDValue().
6490	SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6491	SelectionDAG &DAG) const {
6492	auto *Load = cast<LoadSDNode>(Val&: Op);
6493	assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6494
6495	if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
6496	VT: Load->getMemoryVT(),
6497	MMO: *Load->getMemOperand()))
6498	return SDValue ();
6499
6500	SDLoc DL(Op);
6501	MVT VT = Op.getSimpleValueType();
6502	unsigned EltSizeBits = VT.getScalarSizeInBits();
6503	assert((EltSizeBits == `16` \|\| EltSizeBits == `32` \|\| EltSizeBits == `64`) &&
6504	"Unexpected unaligned RVV load type");
6505	MVT NewVT =
6506	MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount() * (EltSizeBits / `8`));
6507	assert(NewVT.isValid() &&
6508	"Expecting equally-sized RVV vector types to be legal");
6509	SDValue L = DAG.getLoad(VT: NewVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
6510	PtrInfo: Load->getPointerInfo(), Alignment: Load->getBaseAlign(),
6511	MMOFlags: Load->getMemOperand()->getFlags());
6512	return DAG.getMergeValues(Ops: {DAG.getBitcast(VT, V: L), L.getValue(R: `1`)}, dl: DL);
6513	}
6514
6515	// While RVV has alignment restrictions, we should always be able to store as a
6516	// legal equivalently-sized byte-typed vector instead. This method is
6517	// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6518	// returns SDValue() if the store is already correctly aligned.
6519	SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6520	SelectionDAG &DAG) const {
6521	auto *Store = cast<StoreSDNode>(Val&: Op);
6522	assert(Store && Store->getValue().getValueType().isVector() &&
6523	"Expected vector store");
6524
6525	if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
6526	VT: Store->getMemoryVT(),
6527	MMO: *Store->getMemOperand()))
6528	return SDValue ();
6529
6530	SDLoc DL(Op);
6531	SDValue StoredVal = Store->getValue();
6532	MVT VT = StoredVal.getSimpleValueType();
6533	unsigned EltSizeBits = VT.getScalarSizeInBits();
6534	assert((EltSizeBits == `16` \|\| EltSizeBits == `32` \|\| EltSizeBits == `64`) &&
6535	"Unexpected unaligned RVV store type");
6536	MVT NewVT =
6537	MVT::getVectorVT(VT: MVT::i8, EC: VT.getVectorElementCount() * (EltSizeBits / `8`));
6538	assert(NewVT.isValid() &&
6539	"Expecting equally-sized RVV vector types to be legal");
6540	StoredVal = DAG.getBitcast(VT: NewVT, V: StoredVal);
6541	return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: StoredVal, Ptr: Store->getBasePtr(),
6542	PtrInfo: Store->getPointerInfo(), Alignment: Store->getBaseAlign(),
6543	MMOFlags: Store->getMemOperand()->getFlags());
6544	}
6545
6546	static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
6547	const RISCVSubtarget &Subtarget) {
6548	assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6549
6550	int64_t Imm = cast<ConstantSDNode>(Val&: Op)->getSExtValue();
6551
6552	// All simm32 constants should be handled by isel.
6553	// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6554	// this check redundant, but small immediates are common so this check
6555	// should have better compile time.
6556	if (isInt<`32`>(x: Imm))
6557	return Op;
6558
6559	// We only need to cost the immediate, if constant pool lowering is enabled.
6560	if (!Subtarget.useConstantPoolForLargeInts())
6561	return Op;
6562
6563	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI: Subtarget);
6564	if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6565	return Op;
6566
6567	// Optimizations below are disabled for opt size. If we're optimizing for
6568	// size, use a constant pool.
6569	if (DAG.shouldOptForSize())
6570	return SDValue ();
6571
6572	// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6573	// that if it will avoid a constant pool.
6574	// It will require an extra temporary register though.
6575	// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6576	// low and high 32 bits are the same and bit 31 and 63 are set.
6577	unsigned ShiftAmt, AddOpc;
6578	RISCVMatInt::InstSeq SeqLo =
6579	RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI: Subtarget, ShiftAmt, AddOpc);
6580	if (!SeqLo.empty() && (SeqLo.size() + `2`) <= Subtarget.getMaxBuildIntsCost())
6581	return Op;
6582
6583	return SDValue ();
6584	}
6585
6586	SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6587	SelectionDAG &DAG) const {
6588	MVT VT = Op.getSimpleValueType();
6589	const APFloat &Imm = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
6590
6591	// Can this constant be selected by a Zfa FLI instruction?
6592	bool Negate = false;
6593	int Index = getLegalZfaFPImm(Imm, VT);
6594
6595	// If the constant is negative, try negating.
6596	if (Index < `0` && Imm.isNegative()) {
6597	Index = getLegalZfaFPImm(Imm: -Imm, VT);
6598	Negate = true;
6599	}
6600
6601	// If we couldn't find a FLI lowering, fall back to generic code.
6602	if (Index < `0`)
6603	return SDValue ();
6604
6605	// Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6606	SDLoc DL(Op);
6607	SDValue Const =
6608	DAG.getNode(Opcode: RISCVISD::FLI, DL, VT,
6609	Operand: DAG.getTargetConstant(Val: Index, DL, VT: Subtarget.getXLenVT()));
6610	if (!Negate)
6611	return Const;
6612
6613	return DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: Const);
6614	}
6615
6616	static SDValue LowerPREFETCH(SDValue Op, const RISCVSubtarget &Subtarget,
6617	SelectionDAG &DAG) {
6618
6619	unsigned IsData = Op.getConstantOperandVal(i: `4`);
6620
6621	// mips-p8700 we support data prefetch for now.
6622	if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6623	return Op.getOperand(i: `0`);
6624	return Op;
6625	}
6626
6627	static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
6628	const RISCVSubtarget &Subtarget) {
6629	SDLoc dl(Op);
6630	AtomicOrdering FenceOrdering =
6631	static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: `1`));
6632	SyncScope::ID FenceSSID =
6633	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
6634
6635	if (Subtarget.hasStdExtZtso()) {
6636	// The only fence that needs an instruction is a sequentially-consistent
6637	// cross-thread fence.
6638	if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6639	FenceSSID == SyncScope::System)
6640	return Op;
6641
6642	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
6643	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL: dl, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
6644	}
6645
6646	// singlethread fences only synchronize with signal handlers on the same
6647	// thread and thus only need to preserve instruction order, not actually
6648	// enforce memory ordering.
6649	if (FenceSSID == SyncScope::SingleThread)
6650	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
6651	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL: dl, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
6652
6653	return Op;
6654	}
6655
6656	SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6657	SelectionDAG &DAG) const {
6658	SDLoc DL(Op);
6659	MVT VT = Op.getSimpleValueType();
6660	MVT XLenVT = Subtarget.getXLenVT();
6661	unsigned Check = Op.getConstantOperandVal(i: `1`);
6662	unsigned TDCMask = `0`;
6663	if (Check & fcSNan)
6664	TDCMask \|= RISCV::FPMASK_Signaling_NaN;
6665	if (Check & fcQNan)
6666	TDCMask \|= RISCV::FPMASK_Quiet_NaN;
6667	if (Check & fcPosInf)
6668	TDCMask \|= RISCV::FPMASK_Positive_Infinity;
6669	if (Check & fcNegInf)
6670	TDCMask \|= RISCV::FPMASK_Negative_Infinity;
6671	if (Check & fcPosNormal)
6672	TDCMask \|= RISCV::FPMASK_Positive_Normal;
6673	if (Check & fcNegNormal)
6674	TDCMask \|= RISCV::FPMASK_Negative_Normal;
6675	if (Check & fcPosSubnormal)
6676	TDCMask \|= RISCV::FPMASK_Positive_Subnormal;
6677	if (Check & fcNegSubnormal)
6678	TDCMask \|= RISCV::FPMASK_Negative_Subnormal;
6679	if (Check & fcPosZero)
6680	TDCMask \|= RISCV::FPMASK_Positive_Zero;
6681	if (Check & fcNegZero)
6682	TDCMask \|= RISCV::FPMASK_Negative_Zero;
6683
6684	bool IsOneBitMask = isPowerOf2_32(Value: TDCMask);
6685
6686	SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: XLenVT);
6687
6688	if (VT.isVector()) {
6689	SDValue Op0 = Op.getOperand(i: `0`);
6690	MVT VT0 = Op.getOperand(i: `0`).getSimpleValueType();
6691
6692	if (VT.isScalableVector()) {
6693	MVT DstVT = VT0.changeVectorElementTypeToInteger();
6694	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT0, DL, DAG, Subtarget);
6695	if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6696	Mask = Op.getOperand(i: `2`);
6697	VL = Op.getOperand(i: `3`);
6698	}
6699	SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: DstVT, N1: Op0, N2: Mask,
6700	N3: VL, Flags: Op ->getFlags());
6701	if (IsOneBitMask)
6702	return DAG.getSetCC(DL, VT, LHS: FPCLASS,
6703	RHS: DAG.getConstant(Val: TDCMask, DL, VT: DstVT),
6704	Cond: ISD::CondCode::SETEQ);
6705	SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: DstVT, N1: FPCLASS,
6706	N2: DAG.getConstant(Val: TDCMask, DL, VT: DstVT));
6707	return DAG.getSetCC(DL, VT, LHS: AND, RHS: DAG.getConstant(Val: `0`, DL, VT: DstVT),
6708	Cond: ISD::SETNE);
6709	}
6710
6711	MVT ContainerVT0 = getContainerForFixedLengthVector(VT: VT0);
6712	MVT ContainerVT = getContainerForFixedLengthVector(VT);
6713	MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6714	auto [Mask, VL] = getDefaultVLOps(VecVT: VT0, ContainerVT: ContainerVT0, DL, DAG, Subtarget);
6715	if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6716	Mask = Op.getOperand(i: `2`);
6717	MVT MaskContainerVT =
6718	getContainerForFixedLengthVector(VT: Mask.getSimpleValueType());
6719	Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
6720	VL = Op.getOperand(i: `3`);
6721	}
6722	Op0 = convertToScalableVector(VT: ContainerVT0, V: Op0, DAG, Subtarget);
6723
6724	SDValue FPCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS_VL, DL, VT: ContainerDstVT, N1: Op0,
6725	N2: Mask, N3: VL, Flags: Op ->getFlags());
6726
6727	TDCMaskV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT,
6728	N1: DAG.getUNDEF(VT: ContainerDstVT), N2: TDCMaskV, N3: VL);
6729	if (IsOneBitMask) {
6730	SDValue VMSEQ =
6731	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
6732	Ops: {FPCLASS, TDCMaskV, DAG.getCondCode(Cond: ISD::SETEQ),
6733	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
6734	return convertFromScalableVector(VT, V: VMSEQ, DAG, Subtarget);
6735	}
6736	SDValue AND = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerDstVT, N1: FPCLASS,
6737	N2: TDCMaskV, N3: DAG.getUNDEF(VT: ContainerDstVT), N4: Mask, N5: VL);
6738
6739	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
6740	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerDstVT,
6741	N1: DAG.getUNDEF(VT: ContainerDstVT), N2: SplatZero, N3: VL);
6742
6743	SDValue VMSNE = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
6744	Ops: {AND, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
6745	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
6746	return convertFromScalableVector(VT, V: VMSNE, DAG, Subtarget);
6747	}
6748
6749	SDValue FCLASS = DAG.getNode(Opcode: RISCVISD::FCLASS, DL, VT: XLenVT, Operand: Op.getOperand(i: `0`));
6750	SDValue AND = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: FCLASS, N2: TDCMaskV);
6751	SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: AND, RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT),
6752	Cond: ISD::CondCode::SETNE);
6753	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res);
6754	}
6755
6756	// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6757	// operations propagate nans.
6758	static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
6759	const RISCVSubtarget &Subtarget) {
6760	SDLoc DL(Op);
6761	MVT VT = Op.getSimpleValueType();
6762
6763	SDValue X = Op.getOperand(i: `0`);
6764	SDValue Y = Op.getOperand(i: `1`);
6765
6766	if (!VT.isVector()) {
6767	MVT XLenVT = Subtarget.getXLenVT();
6768
6769	// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6770	// ensures that when one input is a nan, the other will also be a nan
6771	// allowing the nan to propagate. If both inputs are nan, this will swap the
6772	// inputs which is harmless.
6773
6774	SDValue NewY = Y;
6775	if (!Op ->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: X)) {
6776	SDValue XIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: X, RHS: X, Cond: ISD::SETOEQ);
6777	NewY = DAG.getSelect(DL, VT, Cond: XIsNonNan, LHS: Y, RHS: X);
6778	}
6779
6780	SDValue NewX = X;
6781	if (!Op ->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Op: Y)) {
6782	SDValue YIsNonNan = DAG.getSetCC(DL, VT: XLenVT, LHS: Y, RHS: Y, Cond: ISD::SETOEQ);
6783	NewX = DAG.getSelect(DL, VT, Cond: YIsNonNan, LHS: X, RHS: Y);
6784	}
6785
6786	unsigned Opc =
6787	Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6788	return DAG.getNode(Opcode: Opc, DL, VT, N1: NewX, N2: NewY);
6789	}
6790
6791	// Check no NaNs before converting to fixed vector scalable.
6792	bool XIsNeverNan = Op ->getFlags().hasNoNaNs() \|\| DAG.isKnownNeverNaN(Op: X);
6793	bool YIsNeverNan = Op ->getFlags().hasNoNaNs() \|\| DAG.isKnownNeverNaN(Op: Y);
6794
6795	MVT ContainerVT = VT;
6796	if (VT.isFixedLengthVector()) {
6797	ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6798	X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget);
6799	Y = convertToScalableVector(VT: ContainerVT, V: Y, DAG, Subtarget);
6800	}
6801
6802	SDValue Mask, VL;
6803	if (Op ->isVPOpcode()) {
6804	Mask = Op.getOperand(i: `2`);
6805	if (VT.isFixedLengthVector())
6806	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
6807	Subtarget);
6808	VL = Op.getOperand(i: `3`);
6809	} else {
6810	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
6811	}
6812
6813	SDValue NewY = Y;
6814	if (!XIsNeverNan) {
6815	SDValue XIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
6816	Ops: {X, X, DAG.getCondCode(Cond: ISD::SETOEQ),
6817	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
6818	NewY = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: XIsNonNan, N2: Y, N3: X,
6819	N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
6820	}
6821
6822	SDValue NewX = X;
6823	if (!YIsNeverNan) {
6824	SDValue YIsNonNan = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
6825	Ops: {Y, Y, DAG.getCondCode(Cond: ISD::SETOEQ),
6826	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
6827	NewX = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: YIsNonNan, N2: X, N3: Y,
6828	N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
6829	}
6830
6831	unsigned Opc =
6832	Op.getOpcode() == ISD::FMAXIMUM \|\| Op ->getOpcode() == ISD::VP_FMAXIMUM
6833	? RISCVISD::VFMAX_VL
6834	: RISCVISD::VFMIN_VL;
6835	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: NewX, N2: NewY,
6836	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
6837	if (VT.isFixedLengthVector())
6838	Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
6839	return Res;
6840	}
6841
6842	static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG,
6843	const RISCVSubtarget &Subtarget) {
6844	bool IsFABS = Op.getOpcode() == ISD::FABS;
6845	assert((IsFABS \|\| Op.getOpcode() == ISD::FNEG) &&
6846	"Wrong opcode for lowering FABS or FNEG.");
6847
6848	MVT XLenVT = Subtarget.getXLenVT();
6849	MVT VT = Op.getSimpleValueType();
6850	assert((VT == MVT::f16 \|\| VT == MVT::bf16) && "Unexpected type");
6851
6852	SDLoc DL(Op);
6853	SDValue Fmv =
6854	DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op.getOperand(i: `0`));
6855
6856	APInt Mask = IsFABS ? APInt::getSignedMaxValue(numBits: `16`) : APInt::getSignMask(BitWidth: `16`);
6857	Mask = Mask.sext(width: Subtarget.getXLen());
6858
6859	unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6860	SDValue Logic =
6861	DAG.getNode(Opcode: LogicOpc, DL, VT: XLenVT, N1: Fmv, N2: DAG.getConstant(Val: Mask, DL, VT: XLenVT));
6862	return DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT, Operand: Logic);
6863	}
6864
6865	static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG,
6866	const RISCVSubtarget &Subtarget) {
6867	assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6868
6869	MVT XLenVT = Subtarget.getXLenVT();
6870	MVT VT = Op.getSimpleValueType();
6871	assert((VT == MVT::f16 \|\| VT == MVT::bf16) && "Unexpected type");
6872
6873	SDValue Mag = Op.getOperand(i: `0`);
6874	SDValue Sign = Op.getOperand(i: `1`);
6875
6876	SDLoc DL(Op);
6877
6878	// Get sign bit into an integer value.
6879	unsigned SignSize = Sign.getValueSizeInBits();
6880	SDValue SignAsInt = [&]() {
6881	if (SignSize == Subtarget.getXLen())
6882	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: XLenVT, Operand: Sign);
6883	switch (SignSize) {
6884	case `16`:
6885	return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Sign);
6886	case `32`:
6887	return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: XLenVT, Operand: Sign);
6888	case `64`: {
6889	assert(XLenVT == MVT::i32 && "Unexpected type");
6890	// Copy the upper word to integer.
6891	SignSize = `32`;
6892	return DAG.getNode(Opcode: RISCVISD::SplitF64, DL, ResultTys: {MVT::i32, MVT::i32}, Ops: Sign)
6893	.getValue(R: `1`);
6894	}
6895	default:
6896	llvm_unreachable("Unexpected sign size");
6897	}
6898	}();
6899
6900	// Get the signbit at the right position for MagAsInt.
6901	if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
6902	SignAsInt = DAG.getNode(Opcode: ShiftAmount > `0` ? ISD::SRL : ISD::SHL, DL, VT: XLenVT,
6903	N1: SignAsInt,
6904	N2: DAG.getConstant(Val: std::abs(x: ShiftAmount), DL, VT: XLenVT));
6905
6906	// Mask the sign bit and any bits above it. The extra bits will be dropped
6907	// when we convert back to FP.
6908	SDValue SignMask = DAG.getConstant(
6909	Val: APInt::getSignMask(BitWidth: `16`).sext(width: Subtarget.getXLen()), DL, VT: XLenVT);
6910	SDValue SignBit = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: SignAsInt, N2: SignMask);
6911
6912	// Transform Mag value to integer, and clear the sign bit.
6913	SDValue MagAsInt = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Mag);
6914	SDValue ClearSignMask = DAG.getConstant(
6915	Val: APInt::getSignedMaxValue(numBits: `16`).sext(width: Subtarget.getXLen()), DL, VT: XLenVT);
6916	SDValue ClearedSign =
6917	DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: MagAsInt, N2: ClearSignMask);
6918
6919	SDValue CopiedSign = DAG.getNode(Opcode: ISD::OR, DL, VT: XLenVT, N1: ClearedSign, N2: SignBit,
6920	Flags: SDNodeFlags::Disjoint);
6921
6922	return DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT, Operand: CopiedSign);
6923	}
6924
6925	/// Get a RISC-V target specified VL op for a given SDNode.
6926	static unsigned getRISCVVLOp(SDValue Op) {
6927	#define OP_CASE(NODE) \
6928	case ISD::NODE: \
6929	return RISCVISD::NODE##_VL;
6930	#define VP_CASE(NODE) \
6931	case ISD::VP_##NODE: \
6932	return RISCVISD::NODE##_VL;
6933	// clang-format off
6934	switch (Op.getOpcode()) {
6935	default:
6936	llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6937	OP_CASE(ADD)
6938	OP_CASE(SUB)
6939	OP_CASE(MUL)
6940	OP_CASE(MULHS)
6941	OP_CASE(MULHU)
6942	OP_CASE(SDIV)
6943	OP_CASE(SREM)
6944	OP_CASE(UDIV)
6945	OP_CASE(UREM)
6946	OP_CASE(SHL)
6947	OP_CASE(SRA)
6948	OP_CASE(SRL)
6949	OP_CASE(ROTL)
6950	OP_CASE(ROTR)
6951	OP_CASE(BSWAP)
6952	OP_CASE(CTTZ)
6953	OP_CASE(CTLZ)
6954	OP_CASE(CTPOP)
6955	OP_CASE(BITREVERSE)
6956	OP_CASE(SADDSAT)
6957	OP_CASE(UADDSAT)
6958	OP_CASE(SSUBSAT)
6959	OP_CASE(USUBSAT)
6960	OP_CASE(AVGFLOORS)
6961	OP_CASE(AVGFLOORU)
6962	OP_CASE(AVGCEILS)
6963	OP_CASE(AVGCEILU)
6964	OP_CASE(FADD)
6965	OP_CASE(FSUB)
6966	OP_CASE(FMUL)
6967	OP_CASE(FDIV)
6968	OP_CASE(FNEG)
6969	OP_CASE(FABS)
6970	OP_CASE(FSQRT)
6971	OP_CASE(SMIN)
6972	OP_CASE(SMAX)
6973	OP_CASE(UMIN)
6974	OP_CASE(UMAX)
6975	OP_CASE(STRICT_FADD)
6976	OP_CASE(STRICT_FSUB)
6977	OP_CASE(STRICT_FMUL)
6978	OP_CASE(STRICT_FDIV)
6979	OP_CASE(STRICT_FSQRT)
6980	VP_CASE(ADD) // VP_ADD
6981	VP_CASE(SUB) // VP_SUB
6982	VP_CASE(MUL) // VP_MUL
6983	VP_CASE(SDIV) // VP_SDIV
6984	VP_CASE(SREM) // VP_SREM
6985	VP_CASE(UDIV) // VP_UDIV
6986	VP_CASE(UREM) // VP_UREM
6987	VP_CASE(SHL) // VP_SHL
6988	VP_CASE(FADD) // VP_FADD
6989	VP_CASE(FSUB) // VP_FSUB
6990	VP_CASE(FMUL) // VP_FMUL
6991	VP_CASE(FDIV) // VP_FDIV
6992	VP_CASE(FNEG) // VP_FNEG
6993	VP_CASE(FABS) // VP_FABS
6994	VP_CASE(SMIN) // VP_SMIN
6995	VP_CASE(SMAX) // VP_SMAX
6996	VP_CASE(UMIN) // VP_UMIN
6997	VP_CASE(UMAX) // VP_UMAX
6998	VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6999	VP_CASE(SETCC) // VP_SETCC
7000	VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7001	VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7002	VP_CASE(BITREVERSE) // VP_BITREVERSE
7003	VP_CASE(SADDSAT) // VP_SADDSAT
7004	VP_CASE(UADDSAT) // VP_UADDSAT
7005	VP_CASE(SSUBSAT) // VP_SSUBSAT
7006	VP_CASE(USUBSAT) // VP_USUBSAT
7007	VP_CASE(BSWAP) // VP_BSWAP
7008	VP_CASE(CTLZ) // VP_CTLZ
7009	VP_CASE(CTTZ) // VP_CTTZ
7010	VP_CASE(CTPOP) // VP_CTPOP
7011	case ISD::CTLZ_ZERO_UNDEF:
7012	case ISD::VP_CTLZ_ZERO_UNDEF:
7013	return RISCVISD::CTLZ_VL;
7014	case ISD::CTTZ_ZERO_UNDEF:
7015	case ISD::VP_CTTZ_ZERO_UNDEF:
7016	return RISCVISD::CTTZ_VL;
7017	case ISD::FMA:
7018	case ISD::VP_FMA:
7019	return RISCVISD::VFMADD_VL;
7020	case ISD::STRICT_FMA:
7021	return RISCVISD::STRICT_VFMADD_VL;
7022	case ISD::AND:
7023	case ISD::VP_AND:
7024	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7025	return RISCVISD::VMAND_VL;
7026	return RISCVISD::AND_VL;
7027	case ISD::OR:
7028	case ISD::VP_OR:
7029	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7030	return RISCVISD::VMOR_VL;
7031	return RISCVISD::OR_VL;
7032	case ISD::XOR:
7033	case ISD::VP_XOR:
7034	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7035	return RISCVISD::VMXOR_VL;
7036	return RISCVISD::XOR_VL;
7037	case ISD::VP_SELECT:
7038	case ISD::VP_MERGE:
7039	return RISCVISD::VMERGE_VL;
7040	case ISD::VP_SRA:
7041	return RISCVISD::SRA_VL;
7042	case ISD::VP_SRL:
7043	return RISCVISD::SRL_VL;
7044	case ISD::VP_SQRT:
7045	return RISCVISD::FSQRT_VL;
7046	case ISD::VP_SIGN_EXTEND:
7047	return RISCVISD::VSEXT_VL;
7048	case ISD::VP_ZERO_EXTEND:
7049	return RISCVISD::VZEXT_VL;
7050	case ISD::VP_FP_TO_SINT:
7051	return RISCVISD::VFCVT_RTZ_X_F_VL;
7052	case ISD::VP_FP_TO_UINT:
7053	return RISCVISD::VFCVT_RTZ_XU_F_VL;
7054	case ISD::FMINNUM:
7055	case ISD::FMINIMUMNUM:
7056	case ISD::VP_FMINNUM:
7057	return RISCVISD::VFMIN_VL;
7058	case ISD::FMAXNUM:
7059	case ISD::FMAXIMUMNUM:
7060	case ISD::VP_FMAXNUM:
7061	return RISCVISD::VFMAX_VL;
7062	case ISD::LRINT:
7063	case ISD::VP_LRINT:
7064	case ISD::LLRINT:
7065	case ISD::VP_LLRINT:
7066	return RISCVISD::VFCVT_RM_X_F_VL;
7067	}
7068	// clang-format on
7069	#undef OP_CASE
7070	#undef VP_CASE
7071	}
7072
7073	static bool isPromotedOpNeedingSplit(SDValue Op,
7074	const RISCVSubtarget &Subtarget) {
7075	return (Op.getValueType() == MVT::nxv32f16 &&
7076	(Subtarget.hasVInstructionsF16Minimal() &&
7077	!Subtarget.hasVInstructionsF16())) \|\|
7078	Op.getValueType() == MVT::nxv32bf16;
7079	}
7080
7081	static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
7082	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType());
7083	SDLoc DL(Op);
7084
7085	SmallVector<SDValue, `4`> LoOperands(Op.getNumOperands());
7086	SmallVector<SDValue, `4`> HiOperands(Op.getNumOperands());
7087
7088	for (unsigned j = `0`; j != Op.getNumOperands(); ++j) {
7089	if (!Op.getOperand(i: j).getValueType().isVector()) {
7090	LoOperands [j] = Op.getOperand(i: j);
7091	HiOperands [j] = Op.getOperand(i: j);
7092	continue;
7093	}
7094	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
7095	DAG.SplitVector(N: Op.getOperand(i: j), DL);
7096	}
7097
7098	SDValue LoRes =
7099	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op ->getFlags());
7100	SDValue HiRes =
7101	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op ->getFlags());
7102
7103	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes);
7104	}
7105
7106	static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {
7107	assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7108	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op.getValueType());
7109	SDLoc DL(Op);
7110
7111	SmallVector<SDValue, `4`> LoOperands(Op.getNumOperands());
7112	SmallVector<SDValue, `4`> HiOperands(Op.getNumOperands());
7113
7114	for (unsigned j = `0`; j != Op.getNumOperands(); ++j) {
7115	if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == j) {
7116	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
7117	DAG.SplitEVL(N: Op.getOperand(i: j), VecVT: Op.getValueType(), DL);
7118	continue;
7119	}
7120	if (!Op.getOperand(i: j).getValueType().isVector()) {
7121	LoOperands [j] = Op.getOperand(i: j);
7122	HiOperands [j] = Op.getOperand(i: j);
7123	continue;
7124	}
7125	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
7126	DAG.SplitVector(N: Op.getOperand(i: j), DL);
7127	}
7128
7129	SDValue LoRes =
7130	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: LoVT, Ops: LoOperands, Flags: Op ->getFlags());
7131	SDValue HiRes =
7132	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: HiVT, Ops: HiOperands, Flags: Op ->getFlags());
7133
7134	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op.getValueType(), N1: LoRes, N2: HiRes);
7135	}
7136
7137	static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
7138	SDLoc DL(Op);
7139
7140	auto [Lo, Hi] = DAG.SplitVector(N: Op.getOperand(i: `1`), DL);
7141	auto [MaskLo, MaskHi] = DAG.SplitVector(N: Op.getOperand(i: `2`), DL);
7142	auto [EVLLo, EVLHi] =
7143	DAG.SplitEVL(N: Op.getOperand(i: `3`), VecVT: Op.getOperand(i: `1`).getValueType(), DL);
7144
7145	SDValue ResLo =
7146	DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
7147	Ops: {Op.getOperand(i: `0`), Lo, MaskLo, EVLLo}, Flags: Op ->getFlags());
7148	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
7149	Ops: {ResLo, Hi, MaskHi, EVLHi}, Flags: Op ->getFlags());
7150	}
7151
7152	static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
7153
7154	assert(Op->isStrictFPOpcode());
7155
7156	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: Op ->getValueType(ResNo: `0`));
7157
7158	SDVTList LoVTs = DAG.getVTList(VT1: LoVT, VT2: Op ->getValueType(ResNo: `1`));
7159	SDVTList HiVTs = DAG.getVTList(VT1: HiVT, VT2: Op ->getValueType(ResNo: `1`));
7160
7161	SDLoc DL(Op);
7162
7163	SmallVector<SDValue, `4`> LoOperands(Op.getNumOperands());
7164	SmallVector<SDValue, `4`> HiOperands(Op.getNumOperands());
7165
7166	for (unsigned j = `0`; j != Op.getNumOperands(); ++j) {
7167	if (!Op.getOperand(i: j).getValueType().isVector()) {
7168	LoOperands [j] = Op.getOperand(i: j);
7169	HiOperands [j] = Op.getOperand(i: j);
7170	continue;
7171	}
7172	std::tie(args&: LoOperands [j], args&: HiOperands [j]) =
7173	DAG.SplitVector(N: Op.getOperand(i: j), DL);
7174	}
7175
7176	SDValue LoRes =
7177	DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: LoVTs, Ops: LoOperands, Flags: Op ->getFlags());
7178	HiOperands [`0`] = LoRes.getValue(R: `1`);
7179	SDValue HiRes =
7180	DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: HiVTs, Ops: HiOperands, Flags: Op ->getFlags());
7181
7182	SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: Op ->getValueType(ResNo: `0`),
7183	N1: LoRes.getValue(R: `0`), N2: HiRes.getValue(R: `0`));
7184	return DAG.getMergeValues(Ops: {V, HiRes.getValue(R: `1`)}, dl: DL);
7185	}
7186
7187	SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
7188	SelectionDAG &DAG) const {
7189	switch (Op.getOpcode()) {
7190	default:
7191	report_fatal_error(reason: "unimplemented operand");
7192	case ISD::PREFETCH:
7193	return LowerPREFETCH(Op, Subtarget, DAG);
7194	case ISD::ATOMIC_FENCE:
7195	return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7196	case ISD::GlobalAddress:
7197	return lowerGlobalAddress(Op, DAG);
7198	case ISD::BlockAddress:
7199	return lowerBlockAddress(Op, DAG);
7200	case ISD::ConstantPool:
7201	return lowerConstantPool(Op, DAG);
7202	case ISD::JumpTable:
7203	return lowerJumpTable(Op, DAG);
7204	case ISD::GlobalTLSAddress:
7205	return lowerGlobalTLSAddress(Op, DAG);
7206	case ISD::Constant:
7207	return lowerConstant(Op, DAG, Subtarget);
7208	case ISD::ConstantFP:
7209	return lowerConstantFP(Op, DAG);
7210	case ISD::SELECT:
7211	return lowerSELECT(Op, DAG);
7212	case ISD::BRCOND:
7213	return lowerBRCOND(Op, DAG);
7214	case ISD::VASTART:
7215	return lowerVASTART(Op, DAG);
7216	case ISD::FRAMEADDR:
7217	return lowerFRAMEADDR(Op, DAG);
7218	case ISD::RETURNADDR:
7219	return lowerRETURNADDR(Op, DAG);
7220	case ISD::SHL_PARTS:
7221	return lowerShiftLeftParts(Op, DAG);
7222	case ISD::SRA_PARTS:
7223	return lowerShiftRightParts(Op, DAG, IsSRA: true);
7224	case ISD::SRL_PARTS:
7225	return lowerShiftRightParts(Op, DAG, IsSRA: false);
7226	case ISD::ROTL:
7227	case ISD::ROTR:
7228	if (Op.getValueType().isFixedLengthVector()) {
7229	assert(Subtarget.hasStdExtZvkb());
7230	return lowerToScalableOp(Op, DAG);
7231	}
7232	assert(Subtarget.hasVendorXTHeadBb() &&
7233	!(Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()) &&
7234	"Unexpected custom legalization");
7235	// XTHeadBb only supports rotate by constant.
7236	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
7237	return SDValue ();
7238	return Op;
7239	case ISD::BITCAST: {
7240	SDLoc DL(Op);
7241	EVT VT = Op.getValueType();
7242	SDValue Op0 = Op.getOperand(i: `0`);
7243	EVT Op0VT = Op0.getValueType();
7244	MVT XLenVT = Subtarget.getXLenVT();
7245	if (Op0VT == MVT::i16 &&
7246	((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) \|\|
7247	(VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7248	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Op0);
7249	return DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT, Operand: NewOp0);
7250	}
7251	if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7252	Subtarget.hasStdExtFOrZfinx()) {
7253	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
7254	return DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: NewOp0);
7255	}
7256	if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7257	Subtarget.hasStdExtDOrZdinx()) {
7258	SDValue Lo, Hi;
7259	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
7260	return DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
7261	}
7262
7263	// Consider other scalar<->scalar casts as legal if the types are legal.
7264	// Otherwise expand them.
7265	if (!VT.isVector() && !Op0VT.isVector()) {
7266	if (isTypeLegal(VT) && isTypeLegal(VT: Op0VT))
7267	return Op;
7268	return SDValue ();
7269	}
7270
7271	assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7272	"Unexpected types");
7273
7274	if (VT.isFixedLengthVector()) {
7275	// We can handle fixed length vector bitcasts with a simple replacement
7276	// in isel.
7277	if (Op0VT.isFixedLengthVector())
7278	return Op;
7279	// When bitcasting from scalar to fixed-length vector, insert the scalar
7280	// into a one-element vector of the result type, and perform a vector
7281	// bitcast.
7282	if (!Op0VT.isVector()) {
7283	EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: Op0VT, NumElements: `1`);
7284	if (!isTypeLegal(VT: BVT))
7285	return SDValue ();
7286	return DAG.getBitcast(
7287	VT, V: DAG.getInsertVectorElt(DL, Vec: DAG.getUNDEF(VT: BVT), Elt: Op0, Idx: `0`));
7288	}
7289	return SDValue ();
7290	}
7291	// Custom-legalize bitcasts from fixed-length vector types to scalar types
7292	// thus: bitcast the vector to a one-element vector type whose element type
7293	// is the same as the result type, and extract the first element.
7294	if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7295	EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: `1`);
7296	if (!isTypeLegal(VT: BVT))
7297	return SDValue ();
7298	SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0);
7299	return DAG.getExtractVectorElt(DL, VT, Vec: BVec, Idx: `0`);
7300	}
7301	return SDValue ();
7302	}
7303	case ISD::INTRINSIC_WO_CHAIN:
7304	return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7305	case ISD::INTRINSIC_W_CHAIN:
7306	return LowerINTRINSIC_W_CHAIN(Op, DAG);
7307	case ISD::INTRINSIC_VOID:
7308	return LowerINTRINSIC_VOID(Op, DAG);
7309	case ISD::IS_FPCLASS:
7310	return LowerIS_FPCLASS(Op, DAG);
7311	case ISD::BITREVERSE: {
7312	MVT VT = Op.getSimpleValueType();
7313	if (VT.isFixedLengthVector()) {
7314	assert(Subtarget.hasStdExtZvbb());
7315	return lowerToScalableOp(Op, DAG);
7316	}
7317	SDLoc DL(Op);
7318	assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7319	assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7320	// Expand bitreverse to a bswap(rev8) followed by brev8.
7321	SDValue BSwap = DAG.getNode(Opcode: ISD::BSWAP, DL, VT, Operand: Op.getOperand(i: `0`));
7322	return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: BSwap);
7323	}
7324	case ISD::TRUNCATE:
7325	case ISD::TRUNCATE_SSAT_S:
7326	case ISD::TRUNCATE_USAT_U:
7327	// Only custom-lower vector truncates
7328	if (!Op.getSimpleValueType().isVector())
7329	return Op;
7330	return lowerVectorTruncLike(Op, DAG);
7331	case ISD::ANY_EXTEND:
7332	case ISD::ZERO_EXTEND:
7333	if (Op.getOperand(i: `0`).getValueType().isVector() &&
7334	Op.getOperand(i: `0`).getValueType().getVectorElementType() == MVT::i1)
7335	return lowerVectorMaskExt(Op, DAG, /ExtVal/ ExtTrueVal: `1`);
7336	return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VZEXT_VL);
7337	case ISD::SIGN_EXTEND:
7338	if (Op.getOperand(i: `0`).getValueType().isVector() &&
7339	Op.getOperand(i: `0`).getValueType().getVectorElementType() == MVT::i1)
7340	return lowerVectorMaskExt(Op, DAG, /ExtVal/ ExtTrueVal: -`1`);
7341	return lowerFixedLengthVectorExtendToRVV(Op, DAG, ExtendOpc: RISCVISD::VSEXT_VL);
7342	case ISD::SPLAT_VECTOR_PARTS:
7343	return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7344	case ISD::INSERT_VECTOR_ELT:
7345	return lowerINSERT_VECTOR_ELT(Op, DAG);
7346	case ISD::EXTRACT_VECTOR_ELT:
7347	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7348	case ISD::SCALAR_TO_VECTOR: {
7349	MVT VT = Op.getSimpleValueType();
7350	SDLoc DL(Op);
7351	SDValue Scalar = Op.getOperand(i: `0`);
7352	if (VT.getVectorElementType() == MVT::i1) {
7353	MVT WideVT = VT.changeVectorElementType(EltVT: MVT::i8);
7354	SDValue V = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: WideVT, Operand: Scalar);
7355	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: V);
7356	}
7357	MVT ContainerVT = VT;
7358	if (VT.isFixedLengthVector())
7359	ContainerVT = getContainerForFixedLengthVector(VT);
7360	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
7361
7362	SDValue V;
7363	if (VT.isFloatingPoint()) {
7364	V = DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT: ContainerVT,
7365	N1: DAG.getUNDEF(VT: ContainerVT), N2: Scalar, N3: VL);
7366	} else {
7367	Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getXLenVT(), Operand: Scalar);
7368	V = DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: ContainerVT,
7369	N1: DAG.getUNDEF(VT: ContainerVT), N2: Scalar, N3: VL);
7370	}
7371	if (VT.isFixedLengthVector())
7372	V = convertFromScalableVector(VT, V, DAG, Subtarget);
7373	return V;
7374	}
7375	case ISD::VSCALE: {
7376	MVT XLenVT = Subtarget.getXLenVT();
7377	MVT VT = Op.getSimpleValueType();
7378	SDLoc DL(Op);
7379	SDValue Res = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT);
7380	// We define our scalable vector types for lmul=1 to use a 64 bit known
7381	// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7382	// vscale as VLENB / 8.
7383	static_assert(RISCV::RVVBitsPerBlock == `64`, "Unexpected bits per block!");
7384	if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7385	report_fatal_error(reason: "Support for VLEN==32 is incomplete.");
7386	// We assume VLENB is a multiple of 8. We manually choose the best shift
7387	// here because SimplifyDemandedBits isn't always able to simplify it.
7388	uint64_t Val = Op.getConstantOperandVal(i: `0`);
7389	if (isPowerOf2_64(Value: Val)) {
7390	uint64_t Log2 = Log2_64(Value: Val);
7391	if (Log2 < `3`) {
7392	SDNodeFlags Flags;
7393	Flags.setExact(true);
7394	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res,
7395	N2: DAG.getConstant(Val: `3` - Log2, DL, VT: XLenVT), Flags);
7396	} else if (Log2 > `3`) {
7397	Res = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: Res,
7398	N2: DAG.getConstant(Val: Log2 - `3`, DL, VT: XLenVT));
7399	}
7400	} else if ((Val % `8`) == `0`) {
7401	// If the multiplier is a multiple of 8, scale it down to avoid needing
7402	// to shift the VLENB value.
7403	Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Res,
7404	N2: DAG.getConstant(Val: Val / `8`, DL, VT: XLenVT));
7405	} else {
7406	SDNodeFlags Flags;
7407	Flags.setExact(true);
7408	SDValue VScale = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: Res,
7409	N2: DAG.getConstant(Val: `3`, DL, VT: XLenVT), Flags);
7410	Res = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: VScale,
7411	N2: DAG.getConstant(Val, DL, VT: XLenVT));
7412	}
7413	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Res);
7414	}
7415	case ISD::FPOWI: {
7416	// Custom promote f16 powi with illegal i32 integer type on RV64. Once
7417	// promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7418	if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7419	Op.getOperand(i: `1`).getValueType() == MVT::i32) {
7420	SDLoc DL(Op);
7421	SDValue Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op.getOperand(i: `0`));
7422	SDValue Powi =
7423	DAG.getNode(Opcode: ISD::FPOWI, DL, VT: MVT::f32, N1: Op0, N2: Op.getOperand(i: `1`));
7424	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: MVT::f16, N1: Powi,
7425	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
7426	}
7427	return SDValue ();
7428	}
7429	case ISD::FMAXIMUM:
7430	case ISD::FMINIMUM:
7431	if (isPromotedOpNeedingSplit(Op, Subtarget))
7432	return SplitVectorOp(Op, DAG);
7433	return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7434	case ISD::FP_EXTEND:
7435	case ISD::FP_ROUND:
7436	return lowerVectorFPExtendOrRoundLike(Op, DAG);
7437	case ISD::STRICT_FP_ROUND:
7438	case ISD::STRICT_FP_EXTEND:
7439	return lowerStrictFPExtendOrRoundLike(Op, DAG);
7440	case ISD::SINT_TO_FP:
7441	case ISD::UINT_TO_FP:
7442	if (Op.getValueType().isVector() &&
7443	((Op.getValueType().getScalarType() == MVT::f16 &&
7444	(Subtarget.hasVInstructionsF16Minimal() &&
7445	!Subtarget.hasVInstructionsF16())) \|\|
7446	Op.getValueType().getScalarType() == MVT::bf16)) {
7447	if (isPromotedOpNeedingSplit(Op, Subtarget))
7448	return SplitVectorOp(Op, DAG);
7449	// int -> f32
7450	SDLoc DL(Op);
7451	MVT NVT =
7452	MVT::getVectorVT(VT: MVT::f32, EC: Op.getValueType().getVectorElementCount());
7453	SDValue NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op ->ops());
7454	// f32 -> [b]f16
7455	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC,
7456	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
7457	}
7458	[[fallthrough]];
7459	case ISD::FP_TO_SINT:
7460	case ISD::FP_TO_UINT:
7461	if (SDValue Op1 = Op.getOperand(i: `0`);
7462	Op1.getValueType().isVector() &&
7463	((Op1.getValueType().getScalarType() == MVT::f16 &&
7464	(Subtarget.hasVInstructionsF16Minimal() &&
7465	!Subtarget.hasVInstructionsF16())) \|\|
7466	Op1.getValueType().getScalarType() == MVT::bf16)) {
7467	if (isPromotedOpNeedingSplit(Op: Op1, Subtarget))
7468	return SplitVectorOp(Op, DAG);
7469	// [b]f16 -> f32
7470	SDLoc DL(Op);
7471	MVT NVT = MVT::getVectorVT(VT: MVT::f32,
7472	EC: Op1.getValueType().getVectorElementCount());
7473	SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1);
7474	// f32 -> int
7475	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), Operand: WidenVec);
7476	}
7477	[[fallthrough]];
7478	case ISD::STRICT_FP_TO_SINT:
7479	case ISD::STRICT_FP_TO_UINT:
7480	case ISD::STRICT_SINT_TO_FP:
7481	case ISD::STRICT_UINT_TO_FP: {
7482	// RVV can only do fp<->int conversions to types half/double the size as
7483	// the source. We custom-lower any conversions that do two hops into
7484	// sequences.
7485	MVT VT = Op.getSimpleValueType();
7486	if (VT.isScalarInteger())
7487	return lowerFP_TO_INT(Op, DAG, Subtarget);
7488	bool IsStrict = Op ->isStrictFPOpcode();
7489	SDValue Src = Op.getOperand(i: `0` + IsStrict);
7490	MVT SrcVT = Src.getSimpleValueType();
7491	if (SrcVT.isScalarInteger())
7492	return lowerINT_TO_FP(Op, DAG, Subtarget);
7493	if (!VT.isVector())
7494	return Op;
7495	SDLoc DL(Op);
7496	MVT EltVT = VT.getVectorElementType();
7497	MVT SrcEltVT = SrcVT.getVectorElementType();
7498	unsigned EltSize = EltVT.getSizeInBits();
7499	unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7500	assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7501	"Unexpected vector element types");
7502
7503	bool IsInt2FP = SrcEltVT.isInteger();
7504	// Widening conversions
7505	if (EltSize > (`2` * SrcEltSize)) {
7506	if (IsInt2FP) {
7507	// Do a regular integer sign/zero extension then convert to float.
7508	MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltSize / `2`),
7509	EC: VT.getVectorElementCount());
7510	unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP \|\|
7511	Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7512	? ISD::ZERO_EXTEND
7513	: ISD::SIGN_EXTEND;
7514	SDValue Ext = DAG.getNode(Opcode: ExtOpcode, DL, VT: IVecVT, Operand: Src);
7515	if (IsStrict)
7516	return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op ->getVTList(),
7517	N1: Op.getOperand(i: `0`), N2: Ext);
7518	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: Ext);
7519	}
7520	// FP2Int
7521	assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7522	// Do one doubling fp_extend then complete the operation by converting
7523	// to int.
7524	MVT InterimFVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
7525	if (IsStrict) {
7526	auto [FExt, Chain] =
7527	DAG.getStrictFPExtendOrRound(Op: Src, Chain: Op.getOperand(i: `0`), DL, VT: InterimFVT);
7528	return DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: Op ->getVTList(), N1: Chain, N2: FExt);
7529	}
7530	SDValue FExt = DAG.getFPExtendOrRound(Op: Src, DL, VT: InterimFVT);
7531	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT, Operand: FExt);
7532	}
7533
7534	// Narrowing conversions
7535	if (SrcEltSize > (`2` * EltSize)) {
7536	if (IsInt2FP) {
7537	// One narrowing int_to_fp, then an fp_round.
7538	assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7539	MVT InterimFVT = MVT::getVectorVT(VT: MVT::f32, EC: VT.getVectorElementCount());
7540	if (IsStrict) {
7541	SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL,
7542	VTList: DAG.getVTList(VT1: InterimFVT, VT2: MVT::Other),
7543	N1: Op.getOperand(i: `0`), N2: Src);
7544	SDValue Chain = Int2FP.getValue(R: `1`);
7545	return DAG.getStrictFPExtendOrRound(Op: Int2FP, Chain, DL, VT).first;
7546	}
7547	SDValue Int2FP = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: InterimFVT, Operand: Src);
7548	return DAG.getFPExtendOrRound(Op: Int2FP, DL, VT);
7549	}
7550	// FP2Int
7551	// One narrowing fp_to_int, then truncate the integer. If the float isn't
7552	// representable by the integer, the result is poison.
7553	MVT IVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / `2`),
7554	EC: VT.getVectorElementCount());
7555	if (IsStrict) {
7556	SDValue FP2Int =
7557	DAG.getNode(Opcode: Op.getOpcode(), DL, VTList: DAG.getVTList(VT1: IVecVT, VT2: MVT::Other),
7558	N1: Op.getOperand(i: `0`), N2: Src);
7559	SDValue Res = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int);
7560	return DAG.getMergeValues(Ops: {Res, FP2Int.getValue(R: `1`)}, dl: DL);
7561	}
7562	SDValue FP2Int = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: IVecVT, Operand: Src);
7563	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FP2Int);
7564	}
7565
7566	// Scalable vectors can exit here. Patterns will handle equally-sized
7567	// conversions halving/doubling ones.
7568	if (!VT.isFixedLengthVector())
7569	return Op;
7570
7571	// For fixed-length vectors we lower to a custom "VL" node.
7572	unsigned RVVOpc = `0`;
7573	switch (Op.getOpcode()) {
7574	default:
7575	llvm_unreachable("Impossible opcode");
7576	case ISD::FP_TO_SINT:
7577	RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7578	break;
7579	case ISD::FP_TO_UINT:
7580	RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7581	break;
7582	case ISD::SINT_TO_FP:
7583	RVVOpc = RISCVISD::SINT_TO_FP_VL;
7584	break;
7585	case ISD::UINT_TO_FP:
7586	RVVOpc = RISCVISD::UINT_TO_FP_VL;
7587	break;
7588	case ISD::STRICT_FP_TO_SINT:
7589	RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7590	break;
7591	case ISD::STRICT_FP_TO_UINT:
7592	RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7593	break;
7594	case ISD::STRICT_SINT_TO_FP:
7595	RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7596	break;
7597	case ISD::STRICT_UINT_TO_FP:
7598	RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7599	break;
7600	}
7601
7602	MVT ContainerVT = getContainerForFixedLengthVector(VT);
7603	MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
7604	assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7605	"Expected same element count");
7606
7607	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
7608
7609	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
7610	if (IsStrict) {
7611	Src = DAG.getNode(Opcode: RVVOpc, DL, VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other),
7612	N1: Op.getOperand(i: `0`), N2: Src, N3: Mask, N4: VL);
7613	SDValue SubVec = convertFromScalableVector(VT, V: Src, DAG, Subtarget);
7614	return DAG.getMergeValues(Ops: {SubVec, Src.getValue(R: `1`)}, dl: DL);
7615	}
7616	Src = DAG.getNode(Opcode: RVVOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
7617	return convertFromScalableVector(VT, V: Src, DAG, Subtarget);
7618	}
7619	case ISD::FP_TO_SINT_SAT:
7620	case ISD::FP_TO_UINT_SAT:
7621	return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7622	case ISD::FP_TO_BF16: {
7623	// Custom lower to ensure the libcall return is passed in an FPR on hard
7624	// float ABIs.
7625	assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7626	SDLoc DL(Op);
7627	MakeLibCallOptions CallOptions;
7628	RTLIB::Libcall LC =
7629	RTLIB::getFPROUND(OpVT: Op.getOperand(i: `0`).getValueType(), RetVT: MVT::bf16);
7630	SDValue Res =
7631	makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: `0`), CallOptions, dl: DL).first;
7632	if (Subtarget.is64Bit())
7633	return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Res);
7634	return DAG.getBitcast(VT: MVT::i32, V: Res);
7635	}
7636	case ISD::BF16_TO_FP: {
7637	assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7638	MVT VT = Op.getSimpleValueType();
7639	SDLoc DL(Op);
7640	Op = DAG.getNode(
7641	Opcode: ISD::SHL, DL, VT: Op.getOperand(i: `0`).getValueType(), N1: Op.getOperand(i: `0`),
7642	N2: DAG.getShiftAmountConstant(Val: `16`, VT: Op.getOperand(i: `0`).getValueType(), DL));
7643	SDValue Res = Subtarget.is64Bit()
7644	? DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: Op)
7645	: DAG.getBitcast(VT: MVT::f32, V: Op);
7646	// fp_extend if the target VT is bigger than f32.
7647	if (VT != MVT::f32)
7648	return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
7649	return Res;
7650	}
7651	case ISD::STRICT_FP_TO_FP16:
7652	case ISD::FP_TO_FP16: {
7653	// Custom lower to ensure the libcall return is passed in an FPR on hard
7654	// float ABIs.
7655	assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7656	SDLoc DL(Op);
7657	MakeLibCallOptions CallOptions;
7658	bool IsStrict = Op ->isStrictFPOpcode();
7659	SDValue Op0 = IsStrict ? Op.getOperand(i: `1`) : Op.getOperand(i: `0`);
7660	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
7661	RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
7662	SDValue Res;
7663	std::tie(args&: Res, args&: Chain) =
7664	makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
7665	if (Subtarget.is64Bit())
7666	return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Res);
7667	SDValue Result = DAG.getBitcast(VT: MVT::i32, V: IsStrict ? Res.getValue(R: `0`) : Res);
7668	if (IsStrict)
7669	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
7670	return Result;
7671	}
7672	case ISD::STRICT_FP16_TO_FP:
7673	case ISD::FP16_TO_FP: {
7674	// Custom lower to ensure the libcall argument is passed in an FPR on hard
7675	// float ABIs.
7676	assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7677	SDLoc DL(Op);
7678	MakeLibCallOptions CallOptions;
7679	bool IsStrict = Op ->isStrictFPOpcode();
7680	SDValue Op0 = IsStrict ? Op.getOperand(i: `1`) : Op.getOperand(i: `0`);
7681	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
7682	SDValue Arg = Subtarget.is64Bit()
7683	? DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: Op0)
7684	: DAG.getBitcast(VT: MVT::f32, V: Op0);
7685	SDValue Res;
7686	std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
7687	CallOptions, dl: DL, Chain);
7688	if (IsStrict)
7689	return DAG.getMergeValues(Ops: {Res, Chain}, dl: DL);
7690	return Res;
7691	}
7692	case ISD::FTRUNC:
7693	case ISD::FCEIL:
7694	case ISD::FFLOOR:
7695	case ISD::FNEARBYINT:
7696	case ISD::FRINT:
7697	case ISD::FROUND:
7698	case ISD::FROUNDEVEN:
7699	if (isPromotedOpNeedingSplit(Op, Subtarget))
7700	return SplitVectorOp(Op, DAG);
7701	return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7702	case ISD::LRINT:
7703	case ISD::LLRINT:
7704	if (Op.getValueType().isVector())
7705	return lowerVectorXRINT(Op, DAG, Subtarget);
7706	[[fallthrough]];
7707	case ISD::LROUND:
7708	case ISD::LLROUND: {
7709	assert(Op.getOperand(`0`).getValueType() == MVT::f16 &&
7710	"Unexpected custom legalisation");
7711	SDLoc DL(Op);
7712	SDValue Ext = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op.getOperand(i: `0`));
7713	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(), Operand: Ext);
7714	}
7715	case ISD::STRICT_LRINT:
7716	case ISD::STRICT_LLRINT:
7717	case ISD::STRICT_LROUND:
7718	case ISD::STRICT_LLROUND: {
7719	assert(Op.getOperand(`1`).getValueType() == MVT::f16 &&
7720	"Unexpected custom legalisation");
7721	SDLoc DL(Op);
7722	SDValue Ext = DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {MVT::f32, MVT::Other},
7723	Ops: {Op.getOperand(i: `0`), Op.getOperand(i: `1`)});
7724	return DAG.getNode(Opcode: Op.getOpcode(), DL, ResultTys: {Op.getValueType(), MVT::Other},
7725	Ops: {Ext.getValue(R: `1`), Ext.getValue(R: `0`)});
7726	}
7727	case ISD::VECREDUCE_ADD:
7728	case ISD::VECREDUCE_UMAX:
7729	case ISD::VECREDUCE_SMAX:
7730	case ISD::VECREDUCE_UMIN:
7731	case ISD::VECREDUCE_SMIN:
7732	return lowerVECREDUCE(Op, DAG);
7733	case ISD::VECREDUCE_AND:
7734	case ISD::VECREDUCE_OR:
7735	case ISD::VECREDUCE_XOR:
7736	if (Op.getOperand(i: `0`).getValueType().getVectorElementType() == MVT::i1)
7737	return lowerVectorMaskVecReduction(Op, DAG, /IsVP/ false);
7738	return lowerVECREDUCE(Op, DAG);
7739	case ISD::VECREDUCE_FADD:
7740	case ISD::VECREDUCE_SEQ_FADD:
7741	case ISD::VECREDUCE_FMIN:
7742	case ISD::VECREDUCE_FMAX:
7743	case ISD::VECREDUCE_FMAXIMUM:
7744	case ISD::VECREDUCE_FMINIMUM:
7745	return lowerFPVECREDUCE(Op, DAG);
7746	case ISD::VP_REDUCE_ADD:
7747	case ISD::VP_REDUCE_UMAX:
7748	case ISD::VP_REDUCE_SMAX:
7749	case ISD::VP_REDUCE_UMIN:
7750	case ISD::VP_REDUCE_SMIN:
7751	case ISD::VP_REDUCE_FADD:
7752	case ISD::VP_REDUCE_SEQ_FADD:
7753	case ISD::VP_REDUCE_FMIN:
7754	case ISD::VP_REDUCE_FMAX:
7755	case ISD::VP_REDUCE_FMINIMUM:
7756	case ISD::VP_REDUCE_FMAXIMUM:
7757	if (isPromotedOpNeedingSplit(Op: Op.getOperand(i: `1`), Subtarget))
7758	return SplitVectorReductionOp(Op, DAG);
7759	return lowerVPREDUCE(Op, DAG);
7760	case ISD::VP_REDUCE_AND:
7761	case ISD::VP_REDUCE_OR:
7762	case ISD::VP_REDUCE_XOR:
7763	if (Op.getOperand(i: `1`).getValueType().getVectorElementType() == MVT::i1)
7764	return lowerVectorMaskVecReduction(Op, DAG, /IsVP/ true);
7765	return lowerVPREDUCE(Op, DAG);
7766	case ISD::VP_CTTZ_ELTS:
7767	case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7768	return lowerVPCttzElements(Op, DAG);
7769	case ISD::UNDEF: {
7770	MVT ContainerVT = getContainerForFixedLengthVector(VT: Op.getSimpleValueType());
7771	return convertFromScalableVector(VT: Op.getSimpleValueType(),
7772	V: DAG.getUNDEF(VT: ContainerVT), DAG, Subtarget);
7773	}
7774	case ISD::INSERT_SUBVECTOR:
7775	return lowerINSERT_SUBVECTOR(Op, DAG);
7776	case ISD::EXTRACT_SUBVECTOR:
7777	return lowerEXTRACT_SUBVECTOR(Op, DAG);
7778	case ISD::VECTOR_DEINTERLEAVE:
7779	return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7780	case ISD::VECTOR_INTERLEAVE:
7781	return lowerVECTOR_INTERLEAVE(Op, DAG);
7782	case ISD::STEP_VECTOR:
7783	return lowerSTEP_VECTOR(Op, DAG);
7784	case ISD::VECTOR_REVERSE:
7785	return lowerVECTOR_REVERSE(Op, DAG);
7786	case ISD::VECTOR_SPLICE:
7787	return lowerVECTOR_SPLICE(Op, DAG);
7788	case ISD::BUILD_VECTOR: {
7789	MVT VT = Op.getSimpleValueType();
7790	MVT EltVT = VT.getVectorElementType();
7791	if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7792	return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7793	return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7794	}
7795	case ISD::SPLAT_VECTOR: {
7796	MVT VT = Op.getSimpleValueType();
7797	MVT EltVT = VT.getVectorElementType();
7798	if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) \|\|
7799	EltVT == MVT::bf16) {
7800	SDLoc DL(Op);
7801	SDValue Elt;
7802	if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) \|\|
7803	(EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7804	Elt = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: Subtarget.getXLenVT(),
7805	Operand: Op.getOperand(i: `0`));
7806	else
7807	Elt = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Op.getOperand(i: `0`));
7808	MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16);
7809	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
7810	Operand: DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT: IVT, Operand: Elt));
7811	}
7812
7813	if (EltVT == MVT::i1)
7814	return lowerVectorMaskSplat(Op, DAG);
7815	return SDValue ();
7816	}
7817	case ISD::VECTOR_SHUFFLE:
7818	return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7819	case ISD::CONCAT_VECTORS: {
7820	// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7821	// better than going through the stack, as the default expansion does.
7822	SDLoc DL(Op);
7823	MVT VT = Op.getSimpleValueType();
7824	MVT ContainerVT = VT;
7825	if (VT.isFixedLengthVector())
7826	ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7827
7828	// Recursively split concat_vectors with more than 2 operands:
7829	//
7830	// concat_vector op1, op2, op3, op4
7831	// ->
7832	// concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7833	//
7834	// This reduces the length of the chain of vslideups and allows us to
7835	// perform the vslideups at a smaller LMUL, limited to MF2.
7836	if (Op.getNumOperands() > `2` &&
7837	ContainerVT.bitsGE(VT: RISCVTargetLowering::getM1VT(VT: ContainerVT))) {
7838	MVT HalfVT = VT.getHalfNumVectorElementsVT();
7839	assert(isPowerOf2_32(Op.getNumOperands()));
7840	size_t HalfNumOps = Op.getNumOperands() / `2`;
7841	SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
7842	Ops: Op ->ops().take_front(N: HalfNumOps));
7843	SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
7844	Ops: Op ->ops().drop_front(N: HalfNumOps));
7845	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi);
7846	}
7847
7848	unsigned NumOpElts =
7849	Op.getOperand(i: `0`).getSimpleValueType().getVectorMinNumElements();
7850	SDValue Vec = DAG.getUNDEF(VT);
7851	for (const auto &OpIdx : enumerate(First: Op ->ops())) {
7852	SDValue SubVec = OpIdx.value();
7853	// Don't insert undef subvectors.
7854	if (SubVec.isUndef())
7855	continue;
7856	Vec = DAG.getInsertSubvector(DL, Vec, SubVec, Idx: OpIdx.index() * NumOpElts);
7857	}
7858	return Vec;
7859	}
7860	case ISD::LOAD: {
7861	auto *Load = cast<LoadSDNode>(Val&: Op);
7862	EVT VT = Load->getValueType(ResNo: `0`);
7863	if (VT == MVT::f64) {
7864	assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
7865	!Subtarget.is64Bit() && "Unexpected custom legalisation");
7866
7867	// Replace a double precision load with two i32 loads and a BuildPairF64.
7868	SDLoc DL(Op);
7869	SDValue BasePtr = Load->getBasePtr();
7870	SDValue Chain = Load->getChain();
7871
7872	SDValue Lo =
7873	DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: BasePtr, PtrInfo: Load->getPointerInfo(),
7874	Alignment: Load->getBaseAlign(), MMOFlags: Load->getMemOperand()->getFlags());
7875	BasePtr = DAG.getObjectPtrOffset(SL: DL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: `4`));
7876	SDValue Hi = DAG.getLoad(
7877	VT: MVT::i32, dl: DL, Chain, Ptr: BasePtr, PtrInfo: Load->getPointerInfo().getWithOffset(O: `4`),
7878	Alignment: Load->getBaseAlign(), MMOFlags: Load->getMemOperand()->getFlags());
7879	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo.getValue(R: `1`),
7880	N2: Hi.getValue(R: `1`));
7881
7882	SDValue Pair = DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
7883	return DAG.getMergeValues(Ops: {Pair, Chain}, dl: DL);
7884	}
7885
7886	// Handle normal vector tuple load.
7887	if (VT.isRISCVVectorTuple()) {
7888	SDLoc DL(Op);
7889	MVT XLenVT = Subtarget.getXLenVT();
7890	unsigned NF = VT.getRISCVVectorTupleNumFields();
7891	unsigned Sz = VT.getSizeInBits().getKnownMinValue();
7892	unsigned NumElts = Sz / (NF * `8`);
7893	int Log2LMUL = Log2_64(Value: NumElts) - `3`;
7894
7895	auto Flag = SDNodeFlags ();
7896	Flag.setNoUnsignedWrap(true);
7897	SDValue Ret = DAG.getUNDEF(VT);
7898	SDValue BasePtr = Load->getBasePtr();
7899	SDValue VROffset = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT);
7900	VROffset =
7901	DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VROffset,
7902	N2: DAG.getConstant(Val: std::max(a: Log2LMUL, b: `0`), DL, VT: XLenVT));
7903	SmallVector<SDValue, `8`> OutChains;
7904
7905	// Load NF vector registers and combine them to a vector tuple.
7906	for (unsigned i = `0`; i < NF; ++i) {
7907	SDValue LoadVal = DAG.getLoad(
7908	VT: MVT::getScalableVectorVT(VT: MVT::i8, NumElements: NumElts), dl: DL, Chain: Load->getChain(),
7909	Ptr: BasePtr, PtrInfo: MachinePointerInfo (Load->getAddressSpace()), Alignment: Align (`8`));
7910	OutChains.push_back(Elt: LoadVal.getValue(R: `1`));
7911	Ret = DAG.getNode(Opcode: RISCVISD::TUPLE_INSERT, DL, VT, N1: Ret, N2: LoadVal,
7912	N3: DAG.getVectorIdxConstant(Val: i, DL));
7913	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: BasePtr, N2: VROffset, Flags: Flag);
7914	}
7915	return DAG.getMergeValues(
7916	Ops: {Ret, DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains)}, dl: DL);
7917	}
7918
7919	if (auto V = expandUnalignedRVVLoad(Op, DAG))
7920	return V;
7921	if (Op.getValueType().isFixedLengthVector())
7922	return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7923	return Op;
7924	}
7925	case ISD::STORE: {
7926	auto *Store = cast<StoreSDNode>(Val&: Op);
7927	SDValue StoredVal = Store->getValue();
7928	EVT VT = StoredVal.getValueType();
7929	if (VT == MVT::f64) {
7930	assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
7931	!Subtarget.is64Bit() && "Unexpected custom legalisation");
7932
7933	// Replace a double precision store with a SplitF64 and i32 stores.
7934	SDValue DL(Op);
7935	SDValue BasePtr = Store->getBasePtr();
7936	SDValue Chain = Store->getChain();
7937	SDValue Split = DAG.getNode(Opcode: RISCVISD::SplitF64, DL,
7938	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: StoredVal);
7939
7940	SDValue Lo = DAG.getStore(Chain, dl: DL, Val: Split.getValue(R: `0`), Ptr: BasePtr,
7941	PtrInfo: Store->getPointerInfo(), Alignment: Store->getBaseAlign(),
7942	MMOFlags: Store->getMemOperand()->getFlags());
7943	BasePtr = DAG.getObjectPtrOffset(SL: DL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: `4`));
7944	SDValue Hi = DAG.getStore(Chain, dl: DL, Val: Split.getValue(R: `1`), Ptr: BasePtr,
7945	PtrInfo: Store->getPointerInfo().getWithOffset(O: `4`),
7946	Alignment: Store->getBaseAlign(),
7947	MMOFlags: Store->getMemOperand()->getFlags());
7948	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Lo, N2: Hi);
7949	}
7950	if (VT == MVT::i64) {
7951	assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
7952	"Unexpected custom legalisation");
7953	if (Store->isTruncatingStore())
7954	return SDValue ();
7955
7956	if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < `8`)
7957	return SDValue ();
7958
7959	SDLoc DL(Op);
7960	SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i32, N1: StoredVal,
7961	N2: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i32));
7962	SDValue Hi = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i32, N1: StoredVal,
7963	N2: DAG.getTargetConstant(Val: `1`, DL, VT: MVT::i32));
7964
7965	return DAG.getMemIntrinsicNode(
7966	Opcode: RISCVISD::SD_RV32, dl: DL, VTList: DAG.getVTList(VT: MVT::Other),
7967	Ops: {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MemVT: MVT::i64,
7968	MMO: Store->getMemOperand());
7969	}
7970	// Handle normal vector tuple store.
7971	if (VT.isRISCVVectorTuple()) {
7972	SDLoc DL(Op);
7973	MVT XLenVT = Subtarget.getXLenVT();
7974	unsigned NF = VT.getRISCVVectorTupleNumFields();
7975	unsigned Sz = VT.getSizeInBits().getKnownMinValue();
7976	unsigned NumElts = Sz / (NF * `8`);
7977	int Log2LMUL = Log2_64(Value: NumElts) - `3`;
7978
7979	auto Flag = SDNodeFlags ();
7980	Flag.setNoUnsignedWrap(true);
7981	SDValue Ret;
7982	SDValue Chain = Store->getChain();
7983	SDValue BasePtr = Store->getBasePtr();
7984	SDValue VROffset = DAG.getNode(Opcode: RISCVISD::READ_VLENB, DL, VT: XLenVT);
7985	VROffset =
7986	DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VROffset,
7987	N2: DAG.getConstant(Val: std::max(a: Log2LMUL, b: `0`), DL, VT: XLenVT));
7988
7989	// Extract subregisters in a vector tuple and store them individually.
7990	for (unsigned i = `0`; i < NF; ++i) {
7991	auto Extract = DAG.getNode(Opcode: RISCVISD::TUPLE_EXTRACT, DL,
7992	VT: MVT::getScalableVectorVT(VT: MVT::i8, NumElements: NumElts),
7993	N1: StoredVal, N2: DAG.getVectorIdxConstant(Val: i, DL));
7994	Ret = DAG.getStore(Chain, dl: DL, Val: Extract, Ptr: BasePtr,
7995	PtrInfo: MachinePointerInfo (Store->getAddressSpace()),
7996	Alignment: Store->getBaseAlign(),
7997	MMOFlags: Store->getMemOperand()->getFlags());
7998	Chain = Ret.getValue(R: `0`);
7999	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: BasePtr, N2: VROffset, Flags: Flag);
8000	}
8001	return Ret;
8002	}
8003
8004	if (auto V = expandUnalignedRVVStore(Op, DAG))
8005	return V;
8006	if (Op.getOperand(i: `1`).getValueType().isFixedLengthVector())
8007	return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8008	return Op;
8009	}
8010	case ISD::MLOAD:
8011	case ISD::VP_LOAD:
8012	return lowerMaskedLoad(Op, DAG);
8013	case ISD::MSTORE:
8014	case ISD::VP_STORE:
8015	return lowerMaskedStore(Op, DAG);
8016	case ISD::VECTOR_COMPRESS:
8017	return lowerVectorCompress(Op, DAG);
8018	case ISD::SELECT_CC: {
8019	// This occurs because we custom legalize SETGT and SETUGT for setcc. That
8020	// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8021	// into separate SETCC+SELECT just like LegalizeDAG.
8022	SDValue Tmp1 = Op.getOperand(i: `0`);
8023	SDValue Tmp2 = Op.getOperand(i: `1`);
8024	SDValue True = Op.getOperand(i: `2`);
8025	SDValue False = Op.getOperand(i: `3`);
8026	EVT VT = Op.getValueType();
8027	SDValue CC = Op.getOperand(i: `4`);
8028	EVT CmpVT = Tmp1.getValueType();
8029	EVT CCVT =
8030	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: CmpVT);
8031	SDLoc DL(Op);
8032	SDValue Cond =
8033	DAG.getNode(Opcode: ISD::SETCC, DL, VT: CCVT, N1: Tmp1, N2: Tmp2, N3: CC, Flags: Op ->getFlags());
8034	return DAG.getSelect(DL, VT, Cond, LHS: True, RHS: False);
8035	}
8036	case ISD::SETCC: {
8037	MVT OpVT = Op.getOperand(i: `0`).getSimpleValueType();
8038	if (OpVT.isScalarInteger()) {
8039	MVT VT = Op.getSimpleValueType();
8040	SDValue LHS = Op.getOperand(i: `0`);
8041	SDValue RHS = Op.getOperand(i: `1`);
8042	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
8043	assert((CCVal == ISD::SETGT \|\| CCVal == ISD::SETUGT) &&
8044	"Unexpected CondCode");
8045
8046	SDLoc DL(Op);
8047
8048	// If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8049	// convert this to the equivalent of (set(u)ge X, C+1) by using
8050	// (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8051	// in a register.
8052	if (isa<ConstantSDNode>(Val: RHS)) {
8053	int64_t Imm = cast<ConstantSDNode>(Val&: RHS)->getSExtValue();
8054	if (Imm != `0` && isInt<`12`>(x: (uint64_t)Imm + `1`)) {
8055	// If this is an unsigned compare and the constant is -1, incrementing
8056	// the constant would change behavior. The result should be false.
8057	if (CCVal == ISD::SETUGT && Imm == -`1`)
8058	return DAG.getConstant(Val: `0`, DL, VT);
8059	// Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8060	CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal);
8061	SDValue SetCC = DAG.getSetCC(
8062	DL, VT, LHS, RHS: DAG.getSignedConstant(Val: Imm + `1`, DL, VT: OpVT), Cond: CCVal);
8063	return DAG.getLogicalNOT(DL, Val: SetCC, VT);
8064	}
8065	}
8066
8067	// Not a constant we could handle, swap the operands and condition code to
8068	// SETLT/SETULT.
8069	CCVal = ISD::getSetCCSwappedOperands(Operation: CCVal);
8070	return DAG.getSetCC(DL, VT, LHS: RHS, RHS: LHS, Cond: CCVal);
8071	}
8072
8073	if (isPromotedOpNeedingSplit(Op: Op.getOperand(i: `0`), Subtarget))
8074	return SplitVectorOp(Op, DAG);
8075
8076	return lowerFixedLengthVectorSetccToRVV(Op, DAG);
8077	}
8078	case ISD::ADD:
8079	case ISD::SUB:
8080	case ISD::MUL:
8081	case ISD::MULHS:
8082	case ISD::MULHU:
8083	case ISD::AND:
8084	case ISD::OR:
8085	case ISD::XOR:
8086	case ISD::SDIV:
8087	case ISD::SREM:
8088	case ISD::UDIV:
8089	case ISD::UREM:
8090	case ISD::BSWAP:
8091	case ISD::CTPOP:
8092	return lowerToScalableOp(Op, DAG);
8093	case ISD::SHL:
8094	case ISD::SRA:
8095	case ISD::SRL:
8096	if (Op.getSimpleValueType().isFixedLengthVector())
8097	return lowerToScalableOp(Op, DAG);
8098	// This can be called for an i32 shift amount that needs to be promoted.
8099	assert(Op.getOperand(`1`).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8100	"Unexpected custom legalisation");
8101	return SDValue ();
8102	case ISD::FABS:
8103	case ISD::FNEG:
8104	if (Op.getValueType() == MVT::f16 \|\| Op.getValueType() == MVT::bf16)
8105	return lowerFABSorFNEG(Op, DAG, Subtarget);
8106	[[fallthrough]];
8107	case ISD::FADD:
8108	case ISD::FSUB:
8109	case ISD::FMUL:
8110	case ISD::FDIV:
8111	case ISD::FSQRT:
8112	case ISD::FMA:
8113	case ISD::FMINNUM:
8114	case ISD::FMAXNUM:
8115	case ISD::FMINIMUMNUM:
8116	case ISD::FMAXIMUMNUM:
8117	if (isPromotedOpNeedingSplit(Op, Subtarget))
8118	return SplitVectorOp(Op, DAG);
8119	[[fallthrough]];
8120	case ISD::AVGFLOORS:
8121	case ISD::AVGFLOORU:
8122	case ISD::AVGCEILS:
8123	case ISD::AVGCEILU:
8124	case ISD::SMIN:
8125	case ISD::SMAX:
8126	case ISD::UMIN:
8127	case ISD::UMAX:
8128	case ISD::UADDSAT:
8129	case ISD::USUBSAT:
8130	case ISD::SADDSAT:
8131	case ISD::SSUBSAT:
8132	return lowerToScalableOp(Op, DAG);
8133	case ISD::ABDS:
8134	case ISD::ABDU: {
8135	SDLoc dl(Op);
8136	EVT VT = Op ->getValueType(ResNo: `0`);
8137	SDValue LHS = DAG.getFreeze(V: Op ->getOperand(Num: `0`));
8138	SDValue RHS = DAG.getFreeze(V: Op ->getOperand(Num: `1`));
8139	bool IsSigned = Op ->getOpcode() == ISD::ABDS;
8140
8141	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8142	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8143	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8144	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8145	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
8146	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
8147	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
8148	}
8149	case ISD::ABS:
8150	case ISD::VP_ABS:
8151	return lowerABS(Op, DAG);
8152	case ISD::CTLZ:
8153	case ISD::CTLZ_ZERO_UNDEF:
8154	case ISD::CTTZ:
8155	case ISD::CTTZ_ZERO_UNDEF:
8156	if (Subtarget.hasStdExtZvbb())
8157	return lowerToScalableOp(Op, DAG);
8158	assert(Op.getOpcode() != ISD::CTTZ);
8159	return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8160	case ISD::VSELECT:
8161	return lowerFixedLengthVectorSelectToRVV(Op, DAG);
8162	case ISD::FCOPYSIGN:
8163	if (Op.getValueType() == MVT::f16 \|\| Op.getValueType() == MVT::bf16)
8164	return lowerFCOPYSIGN(Op, DAG, Subtarget);
8165	if (isPromotedOpNeedingSplit(Op, Subtarget))
8166	return SplitVectorOp(Op, DAG);
8167	return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
8168	case ISD::STRICT_FADD:
8169	case ISD::STRICT_FSUB:
8170	case ISD::STRICT_FMUL:
8171	case ISD::STRICT_FDIV:
8172	case ISD::STRICT_FSQRT:
8173	case ISD::STRICT_FMA:
8174	if (isPromotedOpNeedingSplit(Op, Subtarget))
8175	return SplitStrictFPVectorOp(Op, DAG);
8176	return lowerToScalableOp(Op, DAG);
8177	case ISD::STRICT_FSETCC:
8178	case ISD::STRICT_FSETCCS:
8179	return lowerVectorStrictFSetcc(Op, DAG);
8180	case ISD::STRICT_FCEIL:
8181	case ISD::STRICT_FRINT:
8182	case ISD::STRICT_FFLOOR:
8183	case ISD::STRICT_FTRUNC:
8184	case ISD::STRICT_FNEARBYINT:
8185	case ISD::STRICT_FROUND:
8186	case ISD::STRICT_FROUNDEVEN:
8187	return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8188	case ISD::MGATHER:
8189	case ISD::VP_GATHER:
8190	return lowerMaskedGather(Op, DAG);
8191	case ISD::MSCATTER:
8192	case ISD::VP_SCATTER:
8193	return lowerMaskedScatter(Op, DAG);
8194	case ISD::GET_ROUNDING:
8195	return lowerGET_ROUNDING(Op, DAG);
8196	case ISD::SET_ROUNDING:
8197	return lowerSET_ROUNDING(Op, DAG);
8198	case ISD::GET_FPENV:
8199	return lowerGET_FPENV(Op, DAG);
8200	case ISD::SET_FPENV:
8201	return lowerSET_FPENV(Op, DAG);
8202	case ISD::RESET_FPENV:
8203	return lowerRESET_FPENV(Op, DAG);
8204	case ISD::EH_DWARF_CFA:
8205	return lowerEH_DWARF_CFA(Op, DAG);
8206	case ISD::VP_MERGE:
8207	if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8208	return lowerVPMergeMask(Op, DAG);
8209	[[fallthrough]];
8210	case ISD::VP_SELECT:
8211	case ISD::VP_ADD:
8212	case ISD::VP_SUB:
8213	case ISD::VP_MUL:
8214	case ISD::VP_SDIV:
8215	case ISD::VP_UDIV:
8216	case ISD::VP_SREM:
8217	case ISD::VP_UREM:
8218	case ISD::VP_UADDSAT:
8219	case ISD::VP_USUBSAT:
8220	case ISD::VP_SADDSAT:
8221	case ISD::VP_SSUBSAT:
8222	case ISD::VP_LRINT:
8223	case ISD::VP_LLRINT:
8224	return lowerVPOp(Op, DAG);
8225	case ISD::VP_AND:
8226	case ISD::VP_OR:
8227	case ISD::VP_XOR:
8228	return lowerLogicVPOp(Op, DAG);
8229	case ISD::VP_FADD:
8230	case ISD::VP_FSUB:
8231	case ISD::VP_FMUL:
8232	case ISD::VP_FDIV:
8233	case ISD::VP_FNEG:
8234	case ISD::VP_FABS:
8235	case ISD::VP_SQRT:
8236	case ISD::VP_FMA:
8237	case ISD::VP_FMINNUM:
8238	case ISD::VP_FMAXNUM:
8239	case ISD::VP_FCOPYSIGN:
8240	if (isPromotedOpNeedingSplit(Op, Subtarget))
8241	return SplitVPOp(Op, DAG);
8242	[[fallthrough]];
8243	case ISD::VP_SRA:
8244	case ISD::VP_SRL:
8245	case ISD::VP_SHL:
8246	return lowerVPOp(Op, DAG);
8247	case ISD::VP_IS_FPCLASS:
8248	return LowerIS_FPCLASS(Op, DAG);
8249	case ISD::VP_SIGN_EXTEND:
8250	case ISD::VP_ZERO_EXTEND:
8251	if (Op.getOperand(i: `0`).getSimpleValueType().getVectorElementType() == MVT::i1)
8252	return lowerVPExtMaskOp(Op, DAG);
8253	return lowerVPOp(Op, DAG);
8254	case ISD::VP_TRUNCATE:
8255	return lowerVectorTruncLike(Op, DAG);
8256	case ISD::VP_FP_EXTEND:
8257	case ISD::VP_FP_ROUND:
8258	return lowerVectorFPExtendOrRoundLike(Op, DAG);
8259	case ISD::VP_SINT_TO_FP:
8260	case ISD::VP_UINT_TO_FP:
8261	if (Op.getValueType().isVector() &&
8262	((Op.getValueType().getScalarType() == MVT::f16 &&
8263	(Subtarget.hasVInstructionsF16Minimal() &&
8264	!Subtarget.hasVInstructionsF16())) \|\|
8265	Op.getValueType().getScalarType() == MVT::bf16)) {
8266	if (isPromotedOpNeedingSplit(Op, Subtarget))
8267	return SplitVectorOp(Op, DAG);
8268	// int -> f32
8269	SDLoc DL(Op);
8270	MVT NVT =
8271	MVT::getVectorVT(VT: MVT::f32, EC: Op.getValueType().getVectorElementCount());
8272	auto NC = DAG.getNode(Opcode: Op.getOpcode(), DL, VT: NVT, Ops: Op ->ops());
8273	// f32 -> [b]f16
8274	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Op.getValueType(), N1: NC,
8275	N2: DAG.getIntPtrConstant(Val: `0`, DL, /isTarget=/true));
8276	}
8277	[[fallthrough]];
8278	case ISD::VP_FP_TO_SINT:
8279	case ISD::VP_FP_TO_UINT:
8280	if (SDValue Op1 = Op.getOperand(i: `0`);
8281	Op1.getValueType().isVector() &&
8282	((Op1.getValueType().getScalarType() == MVT::f16 &&
8283	(Subtarget.hasVInstructionsF16Minimal() &&
8284	!Subtarget.hasVInstructionsF16())) \|\|
8285	Op1.getValueType().getScalarType() == MVT::bf16)) {
8286	if (isPromotedOpNeedingSplit(Op: Op1, Subtarget))
8287	return SplitVectorOp(Op, DAG);
8288	// [b]f16 -> f32
8289	SDLoc DL(Op);
8290	MVT NVT = MVT::getVectorVT(VT: MVT::f32,
8291	EC: Op1.getValueType().getVectorElementCount());
8292	SDValue WidenVec = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NVT, Operand: Op1);
8293	// f32 -> int
8294	return DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
8295	Ops: {WidenVec, Op.getOperand(i: `1`), Op.getOperand(i: `2`)});
8296	}
8297	return lowerVPFPIntConvOp(Op, DAG);
8298	case ISD::VP_SETCC:
8299	if (isPromotedOpNeedingSplit(Op: Op.getOperand(i: `0`), Subtarget))
8300	return SplitVPOp(Op, DAG);
8301	if (Op.getOperand(i: `0`).getSimpleValueType().getVectorElementType() == MVT::i1)
8302	return lowerVPSetCCMaskOp(Op, DAG);
8303	[[fallthrough]];
8304	case ISD::VP_SMIN:
8305	case ISD::VP_SMAX:
8306	case ISD::VP_UMIN:
8307	case ISD::VP_UMAX:
8308	case ISD::VP_BITREVERSE:
8309	case ISD::VP_BSWAP:
8310	return lowerVPOp(Op, DAG);
8311	case ISD::VP_CTLZ:
8312	case ISD::VP_CTLZ_ZERO_UNDEF:
8313	if (Subtarget.hasStdExtZvbb())
8314	return lowerVPOp(Op, DAG);
8315	return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8316	case ISD::VP_CTTZ:
8317	case ISD::VP_CTTZ_ZERO_UNDEF:
8318	if (Subtarget.hasStdExtZvbb())
8319	return lowerVPOp(Op, DAG);
8320	return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8321	case ISD::VP_CTPOP:
8322	return lowerVPOp(Op, DAG);
8323	case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8324	return lowerVPStridedLoad(Op, DAG);
8325	case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8326	return lowerVPStridedStore(Op, DAG);
8327	case ISD::VP_FCEIL:
8328	case ISD::VP_FFLOOR:
8329	case ISD::VP_FRINT:
8330	case ISD::VP_FNEARBYINT:
8331	case ISD::VP_FROUND:
8332	case ISD::VP_FROUNDEVEN:
8333	case ISD::VP_FROUNDTOZERO:
8334	if (isPromotedOpNeedingSplit(Op, Subtarget))
8335	return SplitVPOp(Op, DAG);
8336	return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8337	case ISD::VP_FMAXIMUM:
8338	case ISD::VP_FMINIMUM:
8339	if (isPromotedOpNeedingSplit(Op, Subtarget))
8340	return SplitVPOp(Op, DAG);
8341	return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8342	case ISD::EXPERIMENTAL_VP_SPLICE:
8343	return lowerVPSpliceExperimental(Op, DAG);
8344	case ISD::EXPERIMENTAL_VP_REVERSE:
8345	return lowerVPReverseExperimental(Op, DAG);
8346	case ISD::EXPERIMENTAL_VP_SPLAT:
8347	return lowerVPSplatExperimental(Op, DAG);
8348	case ISD::CLEAR_CACHE: {
8349	assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8350	"llvm.clear_cache only needs custom lower on Linux targets");
8351	SDLoc DL(Op);
8352	SDValue Flags = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT());
8353	return emitFlushICache(DAG, InChain: Op.getOperand(i: `0`), Start: Op.getOperand(i: `1`),
8354	End: Op.getOperand(i: `2`), Flags, DL);
8355	}
8356	case ISD::DYNAMIC_STACKALLOC:
8357	return lowerDYNAMIC_STACKALLOC(Op, DAG);
8358	case ISD::INIT_TRAMPOLINE:
8359	return lowerINIT_TRAMPOLINE(Op, DAG);
8360	case ISD::ADJUST_TRAMPOLINE:
8361	return lowerADJUST_TRAMPOLINE(Op, DAG);
8362	case ISD::PARTIAL_REDUCE_UMLA:
8363	case ISD::PARTIAL_REDUCE_SMLA:
8364	case ISD::PARTIAL_REDUCE_SUMLA:
8365	return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8366	}
8367	}
8368
8369	SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8370	SDValue Start, SDValue End,
8371	SDValue Flags, SDLoc DL) const {
8372	MakeLibCallOptions CallOptions;
8373	std::pair<SDValue, SDValue> CallResult =
8374	makeLibCall(DAG, LC: RTLIB::RISCV_FLUSH_ICACHE, RetVT: MVT::isVoid,
8375	Ops: {Start, End, Flags}, CallOptions, dl: DL, Chain: InChain);
8376
8377	// This function returns void so only the out chain matters.
8378	return CallResult.second;
8379	}
8380
8381	SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8382	SelectionDAG &DAG) const {
8383	if (!Subtarget.is64Bit())
8384	llvm::report_fatal_error(reason: "Trampolines only implemented for RV64");
8385
8386	// Create an MCCodeEmitter to encode instructions.
8387	TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8388	assert(TLO);
8389	MCContext &MCCtx = TLO->getContext();
8390
8391	std::unique_ptr<MCCodeEmitter> CodeEmitter(
8392	createRISCVMCCodeEmitter(MCII: *getTargetMachine().getMCInstrInfo(), Ctx&: MCCtx));
8393
8394	SDValue Root = Op.getOperand(i: `0`);
8395	SDValue Trmp = Op.getOperand(i: `1`); // trampoline
8396	SDLoc dl(Op);
8397
8398	const Value *TrmpAddr = cast<SrcValueSDNode>(Val: Op.getOperand(i: `4`))->getValue();
8399
8400	// We store in the trampoline buffer the following instructions and data.
8401	// Offset:
8402	// 0: auipc t2, 0
8403	// 4: ld t0, 24(t2)
8404	// 8: ld t2, 16(t2)
8405	// 12: jalr t0
8406	// 16: <StaticChainOffset>
8407	// 24: <FunctionAddressOffset>
8408	// 32:
8409	// Offset with branch control flow protection enabled:
8410	// 0: lpad <imm20>
8411	// 4: auipc t3, 0
8412	// 8: ld t2, 28(t3)
8413	// 12: ld t3, 20(t3)
8414	// 16: jalr t2
8415	// 20: <StaticChainOffset>
8416	// 28: <FunctionAddressOffset>
8417	// 36:
8418
8419	const bool HasCFBranch =
8420	Subtarget.hasStdExtZicfilp() &&
8421	DAG.getMachineFunction().getFunction().getParent()->getModuleFlag(
8422	Key: "cf-protection-branch");
8423	const unsigned StaticChainIdx = HasCFBranch ? `5` : `4`;
8424	const unsigned StaticChainOffset = StaticChainIdx * `4`;
8425	const unsigned FunctionAddressOffset = StaticChainOffset + `8`;
8426
8427	const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8428	assert(STI);
8429	auto GetEncoding = [&](const MCInst &MC) {
8430	SmallVector<char, `4`> CB;
8431	SmallVector<MCFixup> Fixups;
8432	CodeEmitter ->encodeInstruction(Inst: MC, CB, Fixups, STI: *STI);
8433	uint32_t Encoding = support::endian::read32le(P: CB.data());
8434	return Encoding;
8435	};
8436
8437	SmallVector<SDValue> OutChains;
8438
8439	SmallVector<uint32_t> Encodings;
8440	if (!HasCFBranch) {
8441	Encodings.append(
8442	IL: {// auipc t2, 0
8443	// Loads the current PC into t2.
8444	GetEncoding (MCInstBuilder (RISCV::AUIPC).addReg(Reg: RISCV::X7).addImm(Val: `0`)),
8445	// ld t0, 24(t2)
8446	// Loads the function address into t0. Note that we are using offsets
8447	// pc-relative to the first instruction of the trampoline.
8448	GetEncoding (MCInstBuilder (RISCV::LD)
8449	.addReg(Reg: RISCV::X5)
8450	.addReg(Reg: RISCV::X7)
8451	.addImm(Val: FunctionAddressOffset)),
8452	// ld t2, 16(t2)
8453	// Load the value of the static chain.
8454	GetEncoding (MCInstBuilder (RISCV::LD)
8455	.addReg(Reg: RISCV::X7)
8456	.addReg(Reg: RISCV::X7)
8457	.addImm(Val: StaticChainOffset)),
8458	// jalr t0
8459	// Jump to the function.
8460	GetEncoding (MCInstBuilder (RISCV::JALR)
8461	.addReg(Reg: RISCV::X0)
8462	.addReg(Reg: RISCV::X5)
8463	.addImm(Val: `0`))});
8464	} else {
8465	Encodings.append(
8466	IL: {// auipc x0, <imm20> (lpad <imm20>)
8467	// Landing pad.
8468	GetEncoding (MCInstBuilder (RISCV::AUIPC).addReg(Reg: RISCV::X0).addImm(Val: `0`)),
8469	// auipc t3, 0
8470	// Loads the current PC into t3.
8471	GetEncoding (MCInstBuilder (RISCV::AUIPC).addReg(Reg: RISCV::X28).addImm(Val: `0`)),
8472	// ld t2, (FunctionAddressOffset - 4)(t3)
8473	// Loads the function address into t2. Note that we are using offsets
8474	// pc-relative to the SECOND instruction of the trampoline.
8475	GetEncoding (MCInstBuilder (RISCV::LD)
8476	.addReg(Reg: RISCV::X7)
8477	.addReg(Reg: RISCV::X28)
8478	.addImm(Val: FunctionAddressOffset - `4`)),
8479	// ld t3, (StaticChainOffset - 4)(t3)
8480	// Load the value of the static chain.
8481	GetEncoding (MCInstBuilder (RISCV::LD)
8482	.addReg(Reg: RISCV::X28)
8483	.addReg(Reg: RISCV::X28)
8484	.addImm(Val: StaticChainOffset - `4`)),
8485	// jalr t2
8486	// Software-guarded jump to the function.
8487	GetEncoding (MCInstBuilder (RISCV::JALR)
8488	.addReg(Reg: RISCV::X0)
8489	.addReg(Reg: RISCV::X7)
8490	.addImm(Val: `0`))});
8491	}
8492
8493	// Store encoded instructions.
8494	for (auto [Idx, Encoding] : llvm::enumerate(First&: Encodings)) {
8495	SDValue Addr = Idx > `0` ? DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64, N1: Trmp,
8496	N2: DAG.getConstant(Val: Idx * `4`, DL: dl, VT: MVT::i64))
8497	: Trmp;
8498	OutChains.push_back(Elt: DAG.getTruncStore(
8499	Chain: Root, dl, Val: DAG.getConstant(Val: Encoding, DL: dl, VT: MVT::i64), Ptr: Addr,
8500	PtrInfo: MachinePointerInfo (TrmpAddr, Idx * `4`), SVT: MVT::i32));
8501	}
8502
8503	// Now store the variable part of the trampoline.
8504	SDValue FunctionAddress = Op.getOperand(i: `2`);
8505	SDValue StaticChain = Op.getOperand(i: `3`);
8506
8507	// Store the given static chain and function pointer in the trampoline buffer.
8508	struct OffsetValuePair {
8509	const unsigned Offset;
8510	const SDValue Value;
8511	SDValue Addr = SDValue (); // Used to cache the address.
8512	} OffsetValues[] = {
8513	{.Offset: StaticChainOffset, .Value: StaticChain},
8514	{.Offset: FunctionAddressOffset, .Value: FunctionAddress},
8515	};
8516	for (auto &OffsetValue : OffsetValues) {
8517	SDValue Addr =
8518	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64, N1: Trmp,
8519	N2: DAG.getConstant(Val: OffsetValue.Offset, DL: dl, VT: MVT::i64));
8520	OffsetValue.Addr = Addr;
8521	OutChains.push_back(
8522	Elt: DAG.getStore(Chain: Root, dl, Val: OffsetValue.Value, Ptr: Addr,
8523	PtrInfo: MachinePointerInfo (TrmpAddr, OffsetValue.Offset)));
8524	}
8525
8526	assert(OutChains.size() == StaticChainIdx + `2` &&
8527	"Size of OutChains mismatch");
8528	SDValue StoreToken = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OutChains);
8529
8530	// The end of instructions of trampoline is the same as the static chain
8531	// address that we computed earlier.
8532	SDValue EndOfTrmp = OffsetValues[`0`].Addr;
8533
8534	// Call clear cache on the trampoline instructions.
8535	SDValue Chain = DAG.getNode(Opcode: ISD::CLEAR_CACHE, DL: dl, VT: MVT::Other, N1: StoreToken,
8536	N2: Trmp, N3: EndOfTrmp);
8537
8538	return Chain;
8539	}
8540
8541	SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8542	SelectionDAG &DAG) const {
8543	if (!Subtarget.is64Bit())
8544	llvm::report_fatal_error(reason: "Trampolines only implemented for RV64");
8545
8546	return Op.getOperand(i: `0`);
8547	}
8548
8549	SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8550	SelectionDAG &DAG) const {
8551	// Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8552	// TODO: There are many other sub-cases we could potentially lower, are
8553	// any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8554	SDLoc DL(Op);
8555	MVT VT = Op.getSimpleValueType();
8556	SDValue Accum = Op.getOperand(i: `0`);
8557	assert(Accum.getSimpleValueType() == VT &&
8558	VT.getVectorElementType() == MVT::i32);
8559	SDValue A = Op.getOperand(i: `1`);
8560	SDValue B = Op.getOperand(i: `2`);
8561	MVT ArgVT = A.getSimpleValueType();
8562	assert(ArgVT == B.getSimpleValueType() &&
8563	ArgVT.getVectorElementType() == MVT::i8);
8564	(void)ArgVT;
8565
8566	// The zvqdotq pseudos are defined with sources and destination both
8567	// being i32. This cast is needed for correctness to avoid incorrect
8568	// .vx matching of i8 splats.
8569	A = DAG.getBitcast(VT, V: A);
8570	B = DAG.getBitcast(VT, V: B);
8571
8572	MVT ContainerVT = VT;
8573	if (VT.isFixedLengthVector()) {
8574	ContainerVT = getContainerForFixedLengthVector(VT);
8575	Accum = convertToScalableVector(VT: ContainerVT, V: Accum, DAG, Subtarget);
8576	A = convertToScalableVector(VT: ContainerVT, V: A, DAG, Subtarget);
8577	B = convertToScalableVector(VT: ContainerVT, V: B, DAG, Subtarget);
8578	}
8579
8580	unsigned Opc;
8581	switch (Op.getOpcode()) {
8582	case ISD::PARTIAL_REDUCE_SMLA:
8583	Opc = RISCVISD::VQDOT_VL;
8584	break;
8585	case ISD::PARTIAL_REDUCE_UMLA:
8586	Opc = RISCVISD::VQDOTU_VL;
8587	break;
8588	case ISD::PARTIAL_REDUCE_SUMLA:
8589	Opc = RISCVISD::VQDOTSU_VL;
8590	break;
8591	default:
8592	llvm_unreachable("Unexpected opcode");
8593	}
8594	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
8595	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, Ops: {A, B, Accum, Mask, VL});
8596	if (VT.isFixedLengthVector())
8597	Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
8598	return Res;
8599	}
8600
8601	static SDValue getTargetNode(GlobalAddressSDNode N, const* SDLoc &DL, EVT Ty,
8602	SelectionDAG &DAG, unsigned Flags) {
8603	return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: Flags);
8604	}
8605
8606	static SDValue getTargetNode(BlockAddressSDNode N, const* SDLoc &DL, EVT Ty,
8607	SelectionDAG &DAG, unsigned Flags) {
8608	return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
8609	TargetFlags: Flags);
8610	}
8611
8612	static SDValue getTargetNode(ConstantPoolSDNode N, const* SDLoc &DL, EVT Ty,
8613	SelectionDAG &DAG, unsigned Flags) {
8614	return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
8615	Offset: N->getOffset(), TargetFlags: Flags);
8616	}
8617
8618	static SDValue getTargetNode(JumpTableSDNode N, const* SDLoc &DL, EVT Ty,
8619	SelectionDAG &DAG, unsigned Flags) {
8620	return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
8621	}
8622
8623	static SDValue getLargeGlobalAddress(GlobalAddressSDNode N, const* SDLoc &DL,
8624	EVT Ty, SelectionDAG &DAG) {
8625	RISCVConstantPoolValue *CPV = RISCVConstantPoolValue::Create(GV: N->getGlobal());
8626	SDValue CPAddr = DAG.getTargetConstantPool(C: CPV, VT: Ty, Align: Align (`8`));
8627	SDValue LC = DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: CPAddr);
8628	return DAG.getLoad(
8629	VT: Ty, dl: DL, Chain: DAG.getEntryNode(), Ptr: LC,
8630	PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction()));
8631	}
8632
8633	static SDValue getLargeExternalSymbol(ExternalSymbolSDNode N, const* SDLoc &DL,
8634	EVT Ty, SelectionDAG &DAG) {
8635	RISCVConstantPoolValue *CPV =
8636	RISCVConstantPoolValue::Create(C&: *DAG.getContext(), S: N->getSymbol());
8637	SDValue CPAddr = DAG.getTargetConstantPool(C: CPV, VT: Ty, Align: Align (`8`));
8638	SDValue LC = DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: CPAddr);
8639	return DAG.getLoad(
8640	VT: Ty, dl: DL, Chain: DAG.getEntryNode(), Ptr: LC,
8641	PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction()));
8642	}
8643
8644	template <class NodeTy>
8645	SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8646	bool IsLocal, bool IsExternWeak) const {
8647	SDLoc DL(N);
8648	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
8649
8650	// When HWASAN is used and tagging of global variables is enabled
8651	// they should be accessed via the GOT, since the tagged address of a global
8652	// is incompatible with existing code models. This also applies to non-pic
8653	// mode.
8654	if (isPositionIndependent() \|\| Subtarget.allowTaggedGlobals()) {
8655	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
8656	if (IsLocal && !Subtarget.allowTaggedGlobals())
8657	// Use PC-relative addressing to access the symbol. This generates the
8658	// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8659	// %pcrel_lo(auipc)).
8660	return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
8661
8662	// Use PC-relative addressing to access the GOT for this symbol, then load
8663	// the address from the GOT. This generates the pattern (PseudoLGA sym),
8664	// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8665	SDValue Load =
8666	SDValue (DAG.getMachineNode(Opcode: RISCV::PseudoLGA, dl: DL, VT: Ty, Op1: Addr), `0`);
8667	MachineFunction &MF = DAG.getMachineFunction();
8668	MachineMemOperand *MemOp = MF.getMachineMemOperand(
8669	PtrInfo: MachinePointerInfo::getGOT(MF),
8670	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
8671	MachineMemOperand::MOInvariant,
8672	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
8673	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
8674	return Load;
8675	}
8676
8677	switch (getTargetMachine().getCodeModel()) {
8678	default:
8679	report_fatal_error(reason: "Unsupported code model for lowering");
8680	case CodeModel::Small: {
8681	// Generate a sequence for accessing addresses within the first 2 GiB of
8682	// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8683	SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8684	SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8685	SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi);
8686	return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNHi, N2: AddrLo);
8687	}
8688	case CodeModel::Medium: {
8689	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
8690	if (IsExternWeak) {
8691	// An extern weak symbol may be undefined, i.e. have value 0, which may
8692	// not be within 2GiB of PC, so use GOT-indirect addressing to access the
8693	// symbol. This generates the pattern (PseudoLGA sym), which expands to
8694	// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8695	SDValue Load =
8696	SDValue (DAG.getMachineNode(Opcode: RISCV::PseudoLGA, dl: DL, VT: Ty, Op1: Addr), `0`);
8697	MachineFunction &MF = DAG.getMachineFunction();
8698	MachineMemOperand *MemOp = MF.getMachineMemOperand(
8699	PtrInfo: MachinePointerInfo::getGOT(MF),
8700	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
8701	MachineMemOperand::MOInvariant,
8702	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
8703	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
8704	return Load;
8705	}
8706
8707	// Generate a sequence for accessing addresses within any 2GiB range within
8708	// the address space. This generates the pattern (PseudoLLA sym), which
8709	// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8710	return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
8711	}
8712	case CodeModel::Large: {
8713	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8714	return getLargeGlobalAddress(N: G, DL, Ty, DAG);
8715
8716	// Using pc-relative mode for other node type.
8717	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
8718	return DAG.getNode(Opcode: RISCVISD::LLA, DL, VT: Ty, Operand: Addr);
8719	}
8720	}
8721	}
8722
8723	SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8724	SelectionDAG &DAG) const {
8725	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
8726	assert(N->getOffset() == `0` && "unexpected offset in global node");
8727	const GlobalValue *GV = N->getGlobal();
8728	return getAddr(N, DAG, IsLocal: GV->isDSOLocal(), IsExternWeak: GV->hasExternalWeakLinkage());
8729	}
8730
8731	SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8732	SelectionDAG &DAG) const {
8733	BlockAddressSDNode *N = cast<BlockAddressSDNode>(Val&: Op);
8734
8735	return getAddr(N, DAG);
8736	}
8737
8738	SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8739	SelectionDAG &DAG) const {
8740	ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op);
8741
8742	return getAddr(N, DAG);
8743	}
8744
8745	SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8746	SelectionDAG &DAG) const {
8747	JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op);
8748
8749	return getAddr(N, DAG);
8750	}
8751
8752	SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8753	SelectionDAG &DAG,
8754	bool UseGOT) const {
8755	SDLoc DL(N);
8756	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
8757	const GlobalValue *GV = N->getGlobal();
8758	MVT XLenVT = Subtarget.getXLenVT();
8759
8760	if (UseGOT) {
8761	// Use PC-relative addressing to access the GOT for this TLS symbol, then
8762	// load the address from the GOT and add the thread pointer. This generates
8763	// the pattern (PseudoLA_TLS_IE sym), which expands to
8764	// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8765	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
8766	SDValue Load =
8767	SDValue (DAG.getMachineNode(Opcode: RISCV::PseudoLA_TLS_IE, dl: DL, VT: Ty, Op1: Addr), `0`);
8768	MachineFunction &MF = DAG.getMachineFunction();
8769	MachineMemOperand *MemOp = MF.getMachineMemOperand(
8770	PtrInfo: MachinePointerInfo::getGOT(MF),
8771	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
8772	MachineMemOperand::MOInvariant,
8773	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
8774	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
8775
8776	// Add the thread pointer.
8777	SDValue TPReg = DAG.getRegister(Reg: RISCV::X4, VT: XLenVT);
8778	return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Load, N2: TPReg);
8779	}
8780
8781	// Generate a sequence for accessing the address relative to the thread
8782	// pointer, with the appropriate adjustment for the thread pointer offset.
8783	// This generates the pattern
8784	// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8785	SDValue AddrHi =
8786	DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: RISCVII::MO_TPREL_HI);
8787	SDValue AddrAdd =
8788	DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: RISCVII::MO_TPREL_ADD);
8789	SDValue AddrLo =
8790	DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: RISCVII::MO_TPREL_LO);
8791
8792	SDValue MNHi = DAG.getNode(Opcode: RISCVISD::HI, DL, VT: Ty, Operand: AddrHi);
8793	SDValue TPReg = DAG.getRegister(Reg: RISCV::X4, VT: XLenVT);
8794	SDValue MNAdd =
8795	DAG.getNode(Opcode: RISCVISD::ADD_TPREL, DL, VT: Ty, N1: MNHi, N2: TPReg, N3: AddrAdd);
8796	return DAG.getNode(Opcode: RISCVISD::ADD_LO, DL, VT: Ty, N1: MNAdd, N2: AddrLo);
8797	}
8798
8799	SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8800	SelectionDAG &DAG) const {
8801	SDLoc DL(N);
8802	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
8803	IntegerType CallTy = Type::getIntNTy(C&: DAG.getContext(), N: Ty.getSizeInBits());
8804	const GlobalValue *GV = N->getGlobal();
8805
8806	// Use a PC-relative addressing mode to access the global dynamic GOT address.
8807	// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8808	// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8809	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
8810	SDValue Load =
8811	SDValue (DAG.getMachineNode(Opcode: RISCV::PseudoLA_TLS_GD, dl: DL, VT: Ty, Op1: Addr), `0`);
8812
8813	// Prepare argument list to generate call.
8814	ArgListTy Args;
8815	ArgListEntry Entry;
8816	Entry.Node = Load;
8817	Entry.Ty = CallTy;
8818	Args.push_back(x: Entry);
8819
8820	// Setup call to __tls_get_addr.
8821	TargetLowering::CallLoweringInfo CLI(DAG);
8822	CLI.setDebugLoc(DL)
8823	.setChain(DAG.getEntryNode())
8824	.setLibCallee(CC: CallingConv::C, ResultType: CallTy,
8825	Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
8826	ArgsList: std::move(Args));
8827
8828	return LowerCallTo(CLI).first;
8829	}
8830
8831	SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8832	SelectionDAG &DAG) const {
8833	SDLoc DL(N);
8834	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
8835	const GlobalValue *GV = N->getGlobal();
8836
8837	// Use a PC-relative addressing mode to access the global dynamic GOT address.
8838	// This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8839	//
8840	// auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8841	// lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8842	// addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8843	// jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8844	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
8845	return SDValue (DAG.getMachineNode(Opcode: RISCV::PseudoLA_TLSDESC, dl: DL, VT: Ty, Op1: Addr), `0`);
8846	}
8847
8848	SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8849	SelectionDAG &DAG) const {
8850	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
8851	assert(N->getOffset() == `0` && "unexpected offset in global node");
8852
8853	if (DAG.getTarget().useEmulatedTLS())
8854	return LowerToTLSEmulatedModel(GA: N, DAG);
8855
8856	TLSModel::Model Model = getTargetMachine().getTLSModel(GV: N->getGlobal());
8857
8858	if (DAG.getMachineFunction().getFunction().getCallingConv() ==
8859	CallingConv::GHC)
8860	report_fatal_error(reason: "In GHC calling convention TLS is not supported");
8861
8862	SDValue Addr;
8863	switch (Model) {
8864	case TLSModel::LocalExec:
8865	Addr = getStaticTLSAddr(N, DAG, /UseGOT=/false);
8866	break;
8867	case TLSModel::InitialExec:
8868	Addr = getStaticTLSAddr(N, DAG, /UseGOT=/true);
8869	break;
8870	case TLSModel::LocalDynamic:
8871	case TLSModel::GeneralDynamic:
8872	Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8873	: getDynamicTLSAddr(N, DAG);
8874	break;
8875	}
8876
8877	return Addr;
8878	}
8879
8880	// Return true if Val is equal to (setcc LHS, RHS, CC).
8881	// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8882	// Otherwise, return std::nullopt.
8883	static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8884	ISD::CondCode CC, SDValue Val) {
8885	assert(Val->getOpcode() == ISD::SETCC);
8886	SDValue LHS2 = Val.getOperand(i: `0`);
8887	SDValue RHS2 = Val.getOperand(i: `1`);
8888	ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: `2`))->get();
8889
8890	if (LHS == LHS2 && RHS == RHS2) {
8891	if (CC == CC2)
8892	return true;
8893	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
8894	return false;
8895	} else if (LHS == RHS2 && RHS == LHS2) {
8896	CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
8897	if (CC == CC2)
8898	return true;
8899	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
8900	return false;
8901	}
8902
8903	return std::nullopt;
8904	}
8905
8906	static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
8907	const RISCVSubtarget &Subtarget) {
8908	SDValue CondV = N->getOperand(Num: `0`);
8909	SDValue TrueV = N->getOperand(Num: `1`);
8910	SDValue FalseV = N->getOperand(Num: `2`);
8911	MVT VT = N->getSimpleValueType(ResNo: `0`);
8912	SDLoc DL(N);
8913
8914	if (!Subtarget.hasConditionalMoveFusion()) {
8915	// (select c, -1, y) -> -c \| y
8916	if (isAllOnesConstant(V: TrueV)) {
8917	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
8918	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
8919	}
8920	// (select c, y, -1) -> (c-1) \| y
8921	if (isAllOnesConstant(V: FalseV)) {
8922	SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV,
8923	N2: DAG.getAllOnesConstant(DL, VT));
8924	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
8925	}
8926
8927	// (select c, 0, y) -> (c-1) & y
8928	if (isNullConstant(V: TrueV)) {
8929	SDValue Neg = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV,
8930	N2: DAG.getAllOnesConstant(DL, VT));
8931	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
8932	}
8933	// (select c, y, 0) -> -c & y
8934	if (isNullConstant(V: FalseV)) {
8935	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
8936	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
8937	}
8938	}
8939
8940	// select c, ~x, x --> xor -c, x
8941	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
8942	const APInt &TrueVal = TrueV ->getAsAPIntVal();
8943	const APInt &FalseVal = FalseV ->getAsAPIntVal();
8944	if (~TrueVal == FalseVal) {
8945	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
8946	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
8947	}
8948	}
8949
8950	// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8951	// when both truev and falsev are also setcc.
8952	if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8953	FalseV.getOpcode() == ISD::SETCC) {
8954	SDValue LHS = CondV.getOperand(i: `0`);
8955	SDValue RHS = CondV.getOperand(i: `1`);
8956	ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
8957
8958	// (select x, x, y) -> x \| y
8959	// (select !x, x, y) -> x & y
8960	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
8961	return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
8962	N2: DAG.getFreeze(V: FalseV));
8963	}
8964	// (select x, y, x) -> x & y
8965	// (select !x, y, x) -> x \| y
8966	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
8967	return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
8968	N1: DAG.getFreeze(V: TrueV), N2: FalseV);
8969	}
8970	}
8971
8972	return SDValue ();
8973	}
8974
8975	// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8976	// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8977	// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8978	// being `0` or `-1`. In such cases we can replace `select` with `and`.
8979	// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8980	// than `c0`?
8981	static SDValue
8982	foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
8983	const RISCVSubtarget &Subtarget) {
8984	if (Subtarget.hasShortForwardBranchOpt())
8985	return SDValue ();
8986
8987	unsigned SelOpNo = `0`;
8988	SDValue Sel = BO->getOperand(Num: `0`);
8989	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
8990	SelOpNo = `1`;
8991	Sel = BO->getOperand(Num: `1`);
8992	}
8993
8994	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
8995	return SDValue ();
8996
8997	unsigned ConstSelOpNo = `1`;
8998	unsigned OtherSelOpNo = `2`;
8999	if (!isa<ConstantSDNode>(Val: Sel ->getOperand(Num: ConstSelOpNo))) {
9000	ConstSelOpNo = `2`;
9001	OtherSelOpNo = `1`;
9002	}
9003	SDValue ConstSelOp = Sel ->getOperand(Num: ConstSelOpNo);
9004	ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
9005	if (!ConstSelOpNode \|\| ConstSelOpNode->isOpaque())
9006	return SDValue ();
9007
9008	SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ `1`);
9009	ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
9010	if (!ConstBinOpNode \|\| ConstBinOpNode->isOpaque())
9011	return SDValue ();
9012
9013	SDLoc DL(Sel);
9014	EVT VT = BO->getValueType(ResNo: `0`);
9015
9016	SDValue NewConstOps[`2`] = {ConstSelOp, ConstBinOp};
9017	if (SelOpNo == `1`)
9018	std::swap(a&: NewConstOps[`0`], b&: NewConstOps[`1`]);
9019
9020	SDValue NewConstOp =
9021	DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
9022	if (!NewConstOp)
9023	return SDValue ();
9024
9025	const APInt &NewConstAPInt = NewConstOp ->getAsAPIntVal();
9026	if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9027	return SDValue ();
9028
9029	SDValue OtherSelOp = Sel ->getOperand(Num: OtherSelOpNo);
9030	SDValue NewNonConstOps[`2`] = {OtherSelOp, ConstBinOp};
9031	if (SelOpNo == `1`)
9032	std::swap(a&: NewNonConstOps[`0`], b&: NewNonConstOps[`1`]);
9033	SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
9034
9035	SDValue NewT = (ConstSelOpNo == `1`) ? NewConstOp : NewNonConstOp;
9036	SDValue NewF = (ConstSelOpNo == `1`) ? NewNonConstOp : NewConstOp;
9037	return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: `0`), LHS: NewT, RHS: NewF);
9038	}
9039
9040	SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9041	SDValue CondV = Op.getOperand(i: `0`);
9042	SDValue TrueV = Op.getOperand(i: `1`);
9043	SDValue FalseV = Op.getOperand(i: `2`);
9044	SDLoc DL(Op);
9045	MVT VT = Op.getSimpleValueType();
9046	MVT XLenVT = Subtarget.getXLenVT();
9047
9048	// Lower vector SELECTs to VSELECTs by splatting the condition.
9049	if (VT.isVector()) {
9050	MVT SplatCondVT = VT.changeVectorElementType(EltVT: MVT::i1);
9051	SDValue CondSplat = DAG.getSplat(VT: SplatCondVT, DL, Op: CondV);
9052	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CondSplat, N2: TrueV, N3: FalseV);
9053	}
9054
9055	// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9056	// nodes to implement the SELECT. Performing the lowering here allows for
9057	// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9058	// sequence or RISCVISD::SELECT_CC node (branch-based select).
9059	if ((Subtarget.hasStdExtZicond() \|\| Subtarget.hasVendorXVentanaCondOps()) &&
9060	VT.isScalarInteger()) {
9061	// (select c, t, 0) -> (czero_eqz t, c)
9062	if (isNullConstant(V: FalseV))
9063	return DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV);
9064	// (select c, 0, f) -> (czero_nez f, c)
9065	if (isNullConstant(V: TrueV))
9066	return DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV);
9067
9068	// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9069	if (TrueV.getOpcode() == ISD::AND &&
9070	(TrueV.getOperand(i: `0`) == FalseV \|\| TrueV.getOperand(i: `1`) == FalseV))
9071	return DAG.getNode(
9072	Opcode: ISD::OR, DL, VT, N1: TrueV,
9073	N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV));
9074	// (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9075	if (FalseV.getOpcode() == ISD::AND &&
9076	(FalseV.getOperand(i: `0`) == TrueV \|\| FalseV.getOperand(i: `1`) == TrueV))
9077	return DAG.getNode(
9078	Opcode: ISD::OR, DL, VT, N1: FalseV,
9079	N2: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV));
9080
9081	// Try some other optimizations before falling back to generic lowering.
9082	if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
9083	return V;
9084
9085	// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9086	// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9087	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
9088	const APInt &TrueVal = TrueV ->getAsAPIntVal();
9089	const APInt &FalseVal = FalseV ->getAsAPIntVal();
9090
9091	// Prefer these over Zicond to avoid materializing an immediate:
9092	// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9093	// (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9094	if (CondV.getOpcode() == ISD::SETCC &&
9095	CondV.getOperand(i: `0`).getValueType() == VT && CondV.hasOneUse()) {
9096	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
9097	if ((CCVal == ISD::SETLT && isNullConstant(V: CondV.getOperand(i: `1`))) \|\|
9098	(CCVal == ISD::SETGT && isAllOnesConstant(V: CondV.getOperand(i: `1`)))) {
9099	int64_t TrueImm = TrueVal.getSExtValue();
9100	int64_t FalseImm = FalseVal.getSExtValue();
9101	if (CCVal == ISD::SETGT)
9102	std::swap(a&: TrueImm, b&: FalseImm);
9103	if (isInt<`12`>(x: TrueImm) && isInt<`12`>(x: FalseImm) &&
9104	isInt<`12`>(x: TrueImm - FalseImm)) {
9105	SDValue SRA =
9106	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CondV.getOperand(i: `0`),
9107	N2: DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT));
9108	SDValue AND =
9109	DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
9110	N2: DAG.getSignedConstant(Val: TrueImm - FalseImm, DL, VT));
9111	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND,
9112	N2: DAG.getSignedConstant(Val: FalseImm, DL, VT));
9113	}
9114	}
9115	}
9116
9117	const int TrueValCost = RISCVMatInt::getIntMatCost(
9118	Val: TrueVal, Size: Subtarget.getXLen(), STI: Subtarget, /CompressionCost=/true);
9119	const int FalseValCost = RISCVMatInt::getIntMatCost(
9120	Val: FalseVal, Size: Subtarget.getXLen(), STI: Subtarget, /CompressionCost=/true);
9121	bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
9122	SDValue LHSVal = DAG.getConstant(
9123	Val: IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9124	SDValue RHSVal =
9125	DAG.getConstant(Val: IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
9126	SDValue CMOV =
9127	DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9128	DL, VT, N1: LHSVal, N2: CondV);
9129	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CMOV, N2: RHSVal);
9130	}
9131
9132	// (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9133	// (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9134	if (isa<ConstantSDNode>(Val: TrueV) != isa<ConstantSDNode>(Val: FalseV)) {
9135	bool IsCZERO_NEZ = isa<ConstantSDNode>(Val: TrueV);
9136	SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9137	SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9138	int64_t RawConstVal = cast<ConstantSDNode>(Val&: ConstVal)->getSExtValue();
9139	// Fall back to XORI if Const == -0x800
9140	if (RawConstVal == -`0x800`) {
9141	SDValue XorOp = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: RegV, N2: ConstVal);
9142	SDValue CMOV =
9143	DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9144	DL, VT, N1: XorOp, N2: CondV);
9145	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: CMOV, N2: ConstVal);
9146	}
9147	// Efficient only if the constant and its negation fit into `ADDI`
9148	// Prefer Add/Sub over Xor since can be compressed for small immediates
9149	if (isInt<`12`>(x: RawConstVal)) {
9150	SDValue SubOp = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: RegV, N2: ConstVal);
9151	SDValue CMOV =
9152	DAG.getNode(Opcode: IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9153	DL, VT, N1: SubOp, N2: CondV);
9154	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CMOV, N2: ConstVal);
9155	}
9156	}
9157
9158	// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9159	// Unless we have the short forward branch optimization.
9160	if (!Subtarget.hasConditionalMoveFusion())
9161	return DAG.getNode(
9162	Opcode: ISD::OR, DL, VT,
9163	N1: DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV, N2: CondV),
9164	N2: DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV, N2: CondV));
9165	}
9166
9167	if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
9168	return V;
9169
9170	if (Op.hasOneUse()) {
9171	unsigned UseOpc = Op ->user_begin()->getOpcode();
9172	if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
9173	SDNode BinOp = Op ->user_begin();
9174	if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op ->user_begin(),
9175	DAG, Subtarget)) {
9176	DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
9177	// Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9178	// may return a constant node and cause crash in lowerSELECT.
9179	if (NewSel.getOpcode() == ISD::SELECT)
9180	return lowerSELECT(Op: NewSel, DAG);
9181	return NewSel;
9182	}
9183	}
9184	}
9185
9186	// (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9187	// (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9188	const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(Val&: TrueV);
9189	const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(Val&: FalseV);
9190	if (FPTV && FPFV) {
9191	if (FPTV->isExactlyValue(V: `1.0`) && FPFV->isExactlyValue(V: `0.0`))
9192	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: CondV);
9193	if (FPTV->isExactlyValue(V: `0.0`) && FPFV->isExactlyValue(V: `1.0`)) {
9194	SDValue XOR = DAG.getNode(Opcode: ISD::XOR, DL, VT: XLenVT, N1: CondV,
9195	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
9196	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: XOR);
9197	}
9198	}
9199
9200	// If the condition is not an integer SETCC which operates on XLenVT, we need
9201	// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9202	// (select condv, truev, falsev)
9203	// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9204	if (CondV.getOpcode() != ISD::SETCC \|\|
9205	CondV.getOperand(i: `0`).getSimpleValueType() != XLenVT) {
9206	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
9207	SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
9208
9209	SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9210
9211	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops);
9212	}
9213
9214	// If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9215	// then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9216	// advantage of the integer compare+branch instructions. i.e.:
9217	// (select (setcc lhs, rhs, cc), truev, falsev)
9218	// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9219	SDValue LHS = CondV.getOperand(i: `0`);
9220	SDValue RHS = CondV.getOperand(i: `1`);
9221	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
9222
9223	// Special case for a select of 2 constants that have a difference of 1.
9224	// Normally this is done by DAGCombine, but if the select is introduced by
9225	// type legalization or op legalization, we miss it. Restricting to SETLT
9226	// case for now because that is what signed saturating add/sub need.
9227	// FIXME: We don't need the condition to be SETLT or even a SETCC,
9228	// but we would probably want to swap the true/false values if the condition
9229	// is SETGE/SETLE to avoid an XORI.
9230	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
9231	CCVal == ISD::SETLT) {
9232	const APInt &TrueVal = TrueV ->getAsAPIntVal();
9233	const APInt &FalseVal = FalseV ->getAsAPIntVal();
9234	if (TrueVal - `1` == FalseVal)
9235	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
9236	if (TrueVal + `1` == FalseVal)
9237	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
9238	}
9239
9240	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG, Subtarget);
9241	// 1 < x ? x : 1 -> 0 < x ? x : 1
9242	if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT \|\| CCVal == ISD::SETULT) &&
9243	RHS == TrueV && LHS == FalseV) {
9244	LHS = DAG.getConstant(Val: `0`, DL, VT);
9245	// 0 <u x is the same as x != 0.
9246	if (CCVal == ISD::SETULT) {
9247	std::swap(a&: LHS, b&: RHS);
9248	CCVal = ISD::SETNE;
9249	}
9250	}
9251
9252	// x <s -1 ? x : -1 -> x <s 0 ? x : -1
9253	if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9254	RHS == FalseV) {
9255	RHS = DAG.getConstant(Val: `0`, DL, VT);
9256	}
9257
9258	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
9259
9260	if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
9261	// (select (setcc lhs, rhs, CC), constant, falsev)
9262	// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9263	std::swap(a&: TrueV, b&: FalseV);
9264	TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
9265	}
9266
9267	SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9268	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, Ops);
9269	}
9270
9271	SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9272	SDValue CondV = Op.getOperand(i: `1`);
9273	SDLoc DL(Op);
9274	MVT XLenVT = Subtarget.getXLenVT();
9275
9276	if (CondV.getOpcode() == ISD::SETCC &&
9277	CondV.getOperand(i: `0`).getValueType() == XLenVT) {
9278	SDValue LHS = CondV.getOperand(i: `0`);
9279	SDValue RHS = CondV.getOperand(i: `1`);
9280	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
9281
9282	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG, Subtarget);
9283
9284	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
9285	return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`),
9286	N2: LHS, N3: RHS, N4: TargetCC, N5: Op.getOperand(i: `2`));
9287	}
9288
9289	return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`),
9290	N2: CondV, N3: DAG.getConstant(Val: `0`, DL, VT: XLenVT),
9291	N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: `2`));
9292	}
9293
9294	SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9295	MachineFunction &MF = DAG.getMachineFunction();
9296	RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9297
9298	SDLoc DL(Op);
9299	SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
9300	VT: getPointerTy(DL: MF.getDataLayout()));
9301
9302	// vastart just stores the address of the VarArgsFrameIndex slot into the
9303	// memory location argument.
9304	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
9305	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: FI, Ptr: Op.getOperand(i: `1`),
9306	PtrInfo: MachinePointerInfo (SV));
9307	}
9308
9309	SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9310	SelectionDAG &DAG) const {
9311	const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9312	MachineFunction &MF = DAG.getMachineFunction();
9313	MachineFrameInfo &MFI = MF.getFrameInfo();
9314	MFI.setFrameAddressIsTaken(true);
9315	Register FrameReg = RI.getFrameRegister(MF);
9316	int XLenInBytes = Subtarget.getXLen() / `8`;
9317
9318	EVT VT = Op.getValueType();
9319	SDLoc DL(Op);
9320	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
9321	unsigned Depth = Op.getConstantOperandVal(i: `0`);
9322	while (Depth--) {
9323	int Offset = -(XLenInBytes * `2`);
9324	SDValue Ptr = DAG.getNode(
9325	Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
9326	N2: DAG.getSignedConstant(Val: Offset, DL, VT: getPointerTy(DL: DAG.getDataLayout())));
9327	FrameAddr =
9328	DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo ());
9329	}
9330	return FrameAddr;
9331	}
9332
9333	SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9334	SelectionDAG &DAG) const {
9335	const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9336	MachineFunction &MF = DAG.getMachineFunction();
9337	MachineFrameInfo &MFI = MF.getFrameInfo();
9338	MFI.setReturnAddressIsTaken(true);
9339	MVT XLenVT = Subtarget.getXLenVT();
9340	int XLenInBytes = Subtarget.getXLen() / `8`;
9341
9342	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
9343	return SDValue ();
9344
9345	EVT VT = Op.getValueType();
9346	SDLoc DL(Op);
9347	unsigned Depth = Op.getConstantOperandVal(i: `0`);
9348	if (Depth) {
9349	int Off = -XLenInBytes;
9350	SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9351	SDValue Offset = DAG.getSignedConstant(Val: Off, DL, VT);
9352	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
9353	Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
9354	PtrInfo: MachinePointerInfo ());
9355	}
9356
9357	// Return the value of the return address register, marking it an implicit
9358	// live-in.
9359	Register Reg = MF.addLiveIn(PReg: RI.getRARegister(), RC: getRegClassFor(VT: XLenVT));
9360	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg, VT: XLenVT);
9361	}
9362
9363	SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9364	SelectionDAG &DAG) const {
9365	SDLoc DL(Op);
9366	SDValue Lo = Op.getOperand(i: `0`);
9367	SDValue Hi = Op.getOperand(i: `1`);
9368	SDValue Shamt = Op.getOperand(i: `2`);
9369	EVT VT = Lo.getValueType();
9370
9371	// if Shamt-XLEN < 0: // Shamt < XLEN
9372	// Lo = Lo << Shamt
9373	// Hi = (Hi << Shamt) \| ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9374	// else:
9375	// Lo = 0
9376	// Hi = Lo << (Shamt-XLEN)
9377
9378	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
9379	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
9380	SDValue MinusXLen = DAG.getSignedConstant(Val: -(int)Subtarget.getXLen(), DL, VT);
9381	SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT);
9382	SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen);
9383	SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt);
9384
9385	SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
9386	SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
9387	SDValue ShiftRightLo =
9388	DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: XLenMinus1Shamt);
9389	SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
9390	SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
9391	SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusXLen);
9392
9393	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT);
9394
9395	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
9396	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
9397
9398	SDValue Parts[`2`] = {Lo, Hi};
9399	return DAG.getMergeValues(Ops: Parts, dl: DL);
9400	}
9401
9402	SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9403	bool IsSRA) const {
9404	SDLoc DL(Op);
9405	SDValue Lo = Op.getOperand(i: `0`);
9406	SDValue Hi = Op.getOperand(i: `1`);
9407	SDValue Shamt = Op.getOperand(i: `2`);
9408	EVT VT = Lo.getValueType();
9409
9410	// SRA expansion:
9411	// if Shamt-XLEN < 0: // Shamt < XLEN
9412	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (XLEN-1 - ShAmt))
9413	// Hi = Hi >>s Shamt
9414	// else:
9415	// Lo = Hi >>s (Shamt-XLEN);
9416	// Hi = Hi >>s (XLEN-1)
9417	//
9418	// SRL expansion:
9419	// if Shamt-XLEN < 0: // Shamt < XLEN
9420	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (XLEN-1 - ShAmt))
9421	// Hi = Hi >>u Shamt
9422	// else:
9423	// Lo = Hi >>u (Shamt-XLEN);
9424	// Hi = 0;
9425
9426	unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9427
9428	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
9429	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
9430	SDValue MinusXLen = DAG.getSignedConstant(Val: -(int)Subtarget.getXLen(), DL, VT);
9431	SDValue XLenMinus1 = DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT);
9432	SDValue ShamtMinusXLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusXLen);
9433	SDValue XLenMinus1Shamt = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: XLenMinus1, N2: Shamt);
9434
9435	SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
9436	SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
9437	SDValue ShiftLeftHi =
9438	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: XLenMinus1Shamt);
9439	SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
9440	SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
9441	SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusXLen);
9442	SDValue HiFalse =
9443	IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: XLenMinus1) : Zero;
9444
9445	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusXLen, RHS: Zero, Cond: ISD::SETLT);
9446
9447	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
9448	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
9449
9450	SDValue Parts[`2`] = {Lo, Hi};
9451	return DAG.getMergeValues(Ops: Parts, dl: DL);
9452	}
9453
9454	// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9455	// legal equivalently-sized i8 type, so we can use that as a go-between.
9456	SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9457	SelectionDAG &DAG) const {
9458	SDLoc DL(Op);
9459	MVT VT = Op.getSimpleValueType();
9460	SDValue SplatVal = Op.getOperand(i: `0`);
9461	// All-zeros or all-ones splats are handled specially.
9462	if (ISD::isConstantSplatVectorAllOnes(N: Op.getNode())) {
9463	SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second;
9464	return DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT, Operand: VL);
9465	}
9466	if (ISD::isConstantSplatVectorAllZeros(N: Op.getNode())) {
9467	SDValue VL = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget).second;
9468	return DAG.getNode(Opcode: RISCVISD::VMCLR_VL, DL, VT, Operand: VL);
9469	}
9470	MVT InterVT = VT.changeVectorElementType(EltVT: MVT::i8);
9471	SplatVal = DAG.getNode(Opcode: ISD::AND, DL, VT: SplatVal.getValueType(), N1: SplatVal,
9472	N2: DAG.getConstant(Val: `1`, DL, VT: SplatVal.getValueType()));
9473	SDValue LHS = DAG.getSplatVector(VT: InterVT, DL, Op: SplatVal);
9474	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: InterVT);
9475	return DAG.getSetCC(DL, VT, LHS, RHS: Zero, Cond: ISD::SETNE);
9476	}
9477
9478	// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9479	// illegal (currently only vXi64 RV32).
9480	// FIXME: We could also catch non-constant sign-extended i32 values and lower
9481	// them to VMV_V_X_VL.
9482	SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9483	SelectionDAG &DAG) const {
9484	SDLoc DL(Op);
9485	MVT VecVT = Op.getSimpleValueType();
9486	assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9487	"Unexpected SPLAT_VECTOR_PARTS lowering");
9488
9489	assert(Op.getNumOperands() == `2` && "Unexpected number of operands!");
9490	SDValue Lo = Op.getOperand(i: `0`);
9491	SDValue Hi = Op.getOperand(i: `1`);
9492
9493	MVT ContainerVT = VecVT;
9494	if (VecVT.isFixedLengthVector())
9495	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9496
9497	auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9498
9499	SDValue Res =
9500	splatPartsI64WithVL(DL, VT: ContainerVT, Passthru: SDValue (), Lo, Hi, VL, DAG);
9501
9502	if (VecVT.isFixedLengthVector())
9503	Res = convertFromScalableVector(VT: VecVT, V: Res, DAG, Subtarget);
9504
9505	return Res;
9506	}
9507
9508	// Custom-lower extensions from mask vectors by using a vselect either with 1
9509	// for zero/any-extension or -1 for sign-extension:
9510	// (vXiN = (s\|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9511	// Note that any-extension is lowered identically to zero-extension.
9512	SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9513	int64_t ExtTrueVal) const {
9514	SDLoc DL(Op);
9515	MVT VecVT = Op.getSimpleValueType();
9516	SDValue Src = Op.getOperand(i: `0`);
9517	// Only custom-lower extensions from mask types
9518	assert(Src.getValueType().isVector() &&
9519	Src.getValueType().getVectorElementType() == MVT::i1);
9520
9521	if (VecVT.isScalableVector()) {
9522	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: VecVT);
9523	SDValue SplatTrueVal = DAG.getSignedConstant(Val: ExtTrueVal, DL, VT: VecVT);
9524	if (Src.getOpcode() == ISD::XOR &&
9525	ISD::isConstantSplatVectorAllOnes(N: Src.getOperand(i: `1`).getNode()))
9526	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecVT, N1: Src.getOperand(i: `0`), N2: SplatZero,
9527	N3: SplatTrueVal);
9528	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecVT, N1: Src, N2: SplatTrueVal, N3: SplatZero);
9529	}
9530
9531	MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9532	MVT I1ContainerVT =
9533	MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount());
9534
9535	SDValue CC = convertToScalableVector(VT: I1ContainerVT, V: Src, DAG, Subtarget);
9536
9537	SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9538
9539	MVT XLenVT = Subtarget.getXLenVT();
9540	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
9541	SDValue SplatTrueVal = DAG.getSignedConstant(Val: ExtTrueVal, DL, VT: XLenVT);
9542
9543	if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9544	SDValue Xor = Src.getOperand(i: `0`);
9545	if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9546	SDValue ScalableOnes = Xor.getOperand(i: `1`);
9547	if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9548	ScalableOnes.getOperand(i: `0`).isUndef() &&
9549	ISD::isConstantSplatVectorAllOnes(
9550	N: ScalableOnes.getOperand(i: `1`).getNode())) {
9551	CC = Xor.getOperand(i: `0`);
9552	std::swap(a&: SplatZero, b&: SplatTrueVal);
9553	}
9554	}
9555	}
9556
9557	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
9558	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL);
9559	SplatTrueVal = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
9560	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatTrueVal, N3: VL);
9561	SDValue Select =
9562	DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: SplatTrueVal,
9563	N3: SplatZero, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
9564
9565	return convertFromScalableVector(VT: VecVT, V: Select, DAG, Subtarget);
9566	}
9567
9568	SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
9569	SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
9570	MVT ExtVT = Op.getSimpleValueType();
9571	// Only custom-lower extensions from fixed-length vector types.
9572	if (!ExtVT.isFixedLengthVector())
9573	return Op;
9574	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
9575	// Grab the canonical container type for the extended type. Infer the smaller
9576	// type from that to ensure the same number of vector elements, as we know
9577	// the LMUL will be sufficient to hold the smaller type.
9578	MVT ContainerExtVT = getContainerForFixedLengthVector(VT: ExtVT);
9579	// Get the extended container type manually to ensure the same number of
9580	// vector elements between source and dest.
9581	MVT ContainerVT = MVT::getVectorVT(VT: VT.getVectorElementType(),
9582	EC: ContainerExtVT.getVectorElementCount());
9583
9584	SDValue Op1 =
9585	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `0`), DAG, Subtarget);
9586
9587	SDLoc DL(Op);
9588	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
9589
9590	SDValue Ext = DAG.getNode(Opcode: ExtendOpc, DL, VT: ContainerExtVT, N1: Op1, N2: Mask, N3: VL);
9591
9592	return convertFromScalableVector(VT: ExtVT, V: Ext, DAG, Subtarget);
9593	}
9594
9595	// Custom-lower truncations from vectors to mask vectors by using a mask and a
9596	// setcc operation:
9597	// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9598	SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9599	SelectionDAG &DAG) const {
9600	bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9601	SDLoc DL(Op);
9602	EVT MaskVT = Op.getValueType();
9603	// Only expect to custom-lower truncations to mask types
9604	assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9605	"Unexpected type for vector mask lowering");
9606	SDValue Src = Op.getOperand(i: `0`);
9607	MVT VecVT = Src.getSimpleValueType();
9608	SDValue Mask, VL;
9609	if (IsVPTrunc) {
9610	Mask = Op.getOperand(i: `1`);
9611	VL = Op.getOperand(i: `2`);
9612	}
9613	// If this is a fixed vector, we need to convert it to a scalable vector.
9614	MVT ContainerVT = VecVT;
9615
9616	if (VecVT.isFixedLengthVector()) {
9617	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9618	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
9619	if (IsVPTrunc) {
9620	MVT MaskContainerVT =
9621	getContainerForFixedLengthVector(VT: Mask.getSimpleValueType());
9622	Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
9623	}
9624	}
9625
9626	if (!IsVPTrunc) {
9627	std::tie(args&: Mask, args&: VL) =
9628	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9629	}
9630
9631	SDValue SplatOne = DAG.getConstant(Val: `1`, DL, VT: Subtarget.getXLenVT());
9632	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT());
9633
9634	SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
9635	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatOne, N3: VL);
9636	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
9637	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatZero, N3: VL);
9638
9639	MVT MaskContainerVT = ContainerVT.changeVectorElementType(EltVT: MVT::i1);
9640	SDValue Trunc = DAG.getNode(Opcode: RISCVISD::AND_VL, DL, VT: ContainerVT, N1: Src, N2: SplatOne,
9641	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
9642	Trunc = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskContainerVT,
9643	Ops: {Trunc, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
9644	DAG.getUNDEF(VT: MaskContainerVT), Mask, VL});
9645	if (MaskVT.isFixedLengthVector())
9646	Trunc = convertFromScalableVector(VT: MaskVT, V: Trunc, DAG, Subtarget);
9647	return Trunc;
9648	}
9649
9650	SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9651	SelectionDAG &DAG) const {
9652	unsigned Opc = Op.getOpcode();
9653	bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9654	SDLoc DL(Op);
9655
9656	MVT VT = Op.getSimpleValueType();
9657	// Only custom-lower vector truncates
9658	assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9659
9660	// Truncates to mask types are handled differently
9661	if (VT.getVectorElementType() == MVT::i1)
9662	return lowerVectorMaskTruncLike(Op, DAG);
9663
9664	// RVV only has truncates which operate from SEW2->SEW, so lower arbitrary*
9665	// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9666	// truncate by one power of two at a time.
9667	MVT DstEltVT = VT.getVectorElementType();
9668
9669	SDValue Src = Op.getOperand(i: `0`);
9670	MVT SrcVT = Src.getSimpleValueType();
9671	MVT SrcEltVT = SrcVT.getVectorElementType();
9672
9673	assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9674	isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9675	"Unexpected vector truncate lowering");
9676
9677	MVT ContainerVT = SrcVT;
9678	SDValue Mask, VL;
9679	if (IsVPTrunc) {
9680	Mask = Op.getOperand(i: `1`);
9681	VL = Op.getOperand(i: `2`);
9682	}
9683	if (SrcVT.isFixedLengthVector()) {
9684	ContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
9685	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
9686	if (IsVPTrunc) {
9687	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
9688	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
9689	}
9690	}
9691
9692	SDValue Result = Src;
9693	if (!IsVPTrunc) {
9694	std::tie(args&: Mask, args&: VL) =
9695	getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
9696	}
9697
9698	unsigned NewOpc;
9699	if (Opc == ISD::TRUNCATE_SSAT_S)
9700	NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9701	else if (Opc == ISD::TRUNCATE_USAT_U)
9702	NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9703	else
9704	NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9705
9706	do {
9707	SrcEltVT = MVT::getIntegerVT(BitWidth: SrcEltVT.getSizeInBits() / `2`);
9708	MVT ResultVT = ContainerVT.changeVectorElementType(EltVT: SrcEltVT);
9709	Result = DAG.getNode(Opcode: NewOpc, DL, VT: ResultVT, N1: Result, N2: Mask, N3: VL);
9710	} while (SrcEltVT != DstEltVT);
9711
9712	if (SrcVT.isFixedLengthVector())
9713	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
9714
9715	return Result;
9716	}
9717
9718	SDValue
9719	RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9720	SelectionDAG &DAG) const {
9721	SDLoc DL(Op);
9722	SDValue Chain = Op.getOperand(i: `0`);
9723	SDValue Src = Op.getOperand(i: `1`);
9724	MVT VT = Op.getSimpleValueType();
9725	MVT SrcVT = Src.getSimpleValueType();
9726	MVT ContainerVT = VT;
9727	if (VT.isFixedLengthVector()) {
9728	MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
9729	ContainerVT =
9730	SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType());
9731	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
9732	}
9733
9734	auto [Mask, VL] = getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
9735
9736	// RVV can only widen/truncate fp to types double/half the size as the source.
9737	if ((VT.getVectorElementType() == MVT::f64 &&
9738	(SrcVT.getVectorElementType() == MVT::f16 \|\|
9739	SrcVT.getVectorElementType() == MVT::bf16)) \|\|
9740	((VT.getVectorElementType() == MVT::f16 \|\|
9741	VT.getVectorElementType() == MVT::bf16) &&
9742	SrcVT.getVectorElementType() == MVT::f64)) {
9743	// For double rounding, the intermediate rounding should be round-to-odd.
9744	unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9745	? RISCVISD::STRICT_FP_EXTEND_VL
9746	: RISCVISD::STRICT_VFNCVT_ROD_VL;
9747	MVT InterVT = ContainerVT.changeVectorElementType(EltVT: MVT::f32);
9748	Src = DAG.getNode(Opcode: InterConvOpc, DL, VTList: DAG.getVTList(VT1: InterVT, VT2: MVT::Other),
9749	N1: Chain, N2: Src, N3: Mask, N4: VL);
9750	Chain = Src.getValue(R: `1`);
9751	}
9752
9753	unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9754	? RISCVISD::STRICT_FP_EXTEND_VL
9755	: RISCVISD::STRICT_FP_ROUND_VL;
9756	SDValue Res = DAG.getNode(Opcode: ConvOpc, DL, VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other),
9757	N1: Chain, N2: Src, N3: Mask, N4: VL);
9758	if (VT.isFixedLengthVector()) {
9759	// StrictFP operations have two result values. Their lowered result should
9760	// have same result count.
9761	SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
9762	Res = DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: `1`)}, dl: DL);
9763	}
9764	return Res;
9765	}
9766
9767	SDValue
9768	RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
9769	SelectionDAG &DAG) const {
9770	bool IsVP =
9771	Op.getOpcode() == ISD::VP_FP_ROUND \|\| Op.getOpcode() == ISD::VP_FP_EXTEND;
9772	bool IsExtend =
9773	Op.getOpcode() == ISD::VP_FP_EXTEND \|\| Op.getOpcode() == ISD::FP_EXTEND;
9774	// RVV can only do truncate fp to types half the size as the source. We
9775	// custom-lower f64->f16 rounds via RVV's round-to-odd float
9776	// conversion instruction.
9777	SDLoc DL(Op);
9778	MVT VT = Op.getSimpleValueType();
9779
9780	assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9781
9782	SDValue Src = Op.getOperand(i: `0`);
9783	MVT SrcVT = Src.getSimpleValueType();
9784
9785	bool IsDirectExtend =
9786	IsExtend && (VT.getVectorElementType() != MVT::f64 \|\|
9787	(SrcVT.getVectorElementType() != MVT::f16 &&
9788	SrcVT.getVectorElementType() != MVT::bf16));
9789	bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9790	VT.getVectorElementType() != MVT::bf16) \|\|
9791	SrcVT.getVectorElementType() != MVT::f64);
9792
9793	bool IsDirectConv = IsDirectExtend \|\| IsDirectTrunc;
9794
9795	// We have regular SD node patterns for direct non-VL extends.
9796	if (VT.isScalableVector() && IsDirectConv && !IsVP)
9797	return Op;
9798
9799	// Prepare any fixed-length vector operands.
9800	MVT ContainerVT = VT;
9801	SDValue Mask, VL;
9802	if (IsVP) {
9803	Mask = Op.getOperand(i: `1`);
9804	VL = Op.getOperand(i: `2`);
9805	}
9806	if (VT.isFixedLengthVector()) {
9807	MVT SrcContainerVT = getContainerForFixedLengthVector(VT: SrcVT);
9808	ContainerVT =
9809	SrcContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType());
9810	Src = convertToScalableVector(VT: SrcContainerVT, V: Src, DAG, Subtarget);
9811	if (IsVP) {
9812	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
9813	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
9814	}
9815	}
9816
9817	if (!IsVP)
9818	std::tie(args&: Mask, args&: VL) =
9819	getDefaultVLOps(VecVT: SrcVT, ContainerVT, DL, DAG, Subtarget);
9820
9821	unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9822
9823	if (IsDirectConv) {
9824	Src = DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: Src, N2: Mask, N3: VL);
9825	if (VT.isFixedLengthVector())
9826	Src = convertFromScalableVector(VT, V: Src, DAG, Subtarget);
9827	return Src;
9828	}
9829
9830	unsigned InterConvOpc =
9831	IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
9832
9833	MVT InterVT = ContainerVT.changeVectorElementType(EltVT: MVT::f32);
9834	SDValue IntermediateConv =
9835	DAG.getNode(Opcode: InterConvOpc, DL, VT: InterVT, N1: Src, N2: Mask, N3: VL);
9836	SDValue Result =
9837	DAG.getNode(Opcode: ConvOpc, DL, VT: ContainerVT, N1: IntermediateConv, N2: Mask, N3: VL);
9838	if (VT.isFixedLengthVector())
9839	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
9840	return Result;
9841	}
9842
9843	// Given a scalable vector type and an index into it, returns the type for the
9844	// smallest subvector that the index fits in. This can be used to reduce LMUL
9845	// for operations like vslidedown.
9846	//
9847	// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9848	static std::optional<MVT>
9849	getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9850	const RISCVSubtarget &Subtarget) {
9851	assert(VecVT.isScalableVector());
9852	const unsigned EltSize = VecVT.getScalarSizeInBits();
9853	const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9854	const unsigned MinVLMAX = VectorBitsMin / EltSize;
9855	MVT SmallerVT;
9856	if (MaxIdx < MinVLMAX)
9857	SmallerVT = RISCVTargetLowering::getM1VT(VT: VecVT);
9858	else if (MaxIdx < MinVLMAX * `2`)
9859	SmallerVT =
9860	RISCVTargetLowering::getM1VT(VT: VecVT).getDoubleNumVectorElementsVT();
9861	else if (MaxIdx < MinVLMAX * `4`)
9862	SmallerVT = RISCVTargetLowering::getM1VT(VT: VecVT)
9863	.getDoubleNumVectorElementsVT()
9864	.getDoubleNumVectorElementsVT();
9865	if (!SmallerVT.isValid() \|\| !VecVT.bitsGT(VT: SmallerVT))
9866	return std::nullopt;
9867	return SmallerVT;
9868	}
9869
9870	static bool isValidVisniInsertExtractIndex(SDValue Idx) {
9871	auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx);
9872	if (!IdxC \|\| isNullConstant(V: Idx))
9873	return false;
9874	return isUInt<`5`>(x: IdxC->getZExtValue());
9875	}
9876
9877	// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9878	// first position of a vector, and that vector is slid up to the insert index.
9879	// By limiting the active vector length to index+1 and merging with the
9880	// original vector (with an undisturbed tail policy for elements >= VL), we
9881	// achieve the desired result of leaving all elements untouched except the one
9882	// at VL-1, which is replaced with the desired value.
9883	SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9884	SelectionDAG &DAG) const {
9885	SDLoc DL(Op);
9886	MVT VecVT = Op.getSimpleValueType();
9887	MVT XLenVT = Subtarget.getXLenVT();
9888	SDValue Vec = Op.getOperand(i: `0`);
9889	SDValue Val = Op.getOperand(i: `1`);
9890	MVT ValVT = Val.getSimpleValueType();
9891	SDValue Idx = Op.getOperand(i: `2`);
9892
9893	if (VecVT.getVectorElementType() == MVT::i1) {
9894	// FIXME: For now we just promote to an i8 vector and insert into that,
9895	// but this is probably not optimal.
9896	MVT WideVT = MVT::getVectorVT(VT: MVT::i8, EC: VecVT.getVectorElementCount());
9897	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec);
9898	Vec = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: WideVT, N1: Vec, N2: Val, N3: Idx);
9899	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VecVT, Operand: Vec);
9900	}
9901
9902	if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) \|\|
9903	ValVT == MVT::bf16) {
9904	// If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9905	MVT IntVT = VecVT.changeTypeToInteger();
9906	SDValue IntInsert = DAG.getNode(
9907	Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: IntVT, N1: DAG.getBitcast(VT: IntVT, V: Vec),
9908	N2: DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Val), N3: Idx);
9909	return DAG.getBitcast(VT: VecVT, V: IntInsert);
9910	}
9911
9912	MVT ContainerVT = VecVT;
9913	// If the operand is a fixed-length vector, convert to a scalable one.
9914	if (VecVT.isFixedLengthVector()) {
9915	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
9916	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
9917	}
9918
9919	// If we know the index we're going to insert at, we can shrink Vec so that
9920	// we're performing the scalar inserts and slideup on a smaller LMUL.
9921	SDValue OrigVec = Vec;
9922	std::optional<unsigned> AlignedIdx;
9923	if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx)) {
9924	const unsigned OrigIdx = IdxC->getZExtValue();
9925	// Do we know an upper bound on LMUL?
9926	if (auto ShrunkVT = getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: OrigIdx,
9927	DL, DAG, Subtarget)) {
9928	ContainerVT = *ShrunkVT;
9929	AlignedIdx = `0`;
9930	}
9931
9932	// If we're compiling for an exact VLEN value, we can always perform
9933	// the insert in m1 as we can determine the register corresponding to
9934	// the index in the register group.
9935	const MVT M1VT = RISCVTargetLowering::getM1VT(VT: ContainerVT);
9936	if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(VT: M1VT)) {
9937	EVT ElemVT = VecVT.getVectorElementType();
9938	unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9939	unsigned RemIdx = OrigIdx % ElemsPerVReg;
9940	unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9941	AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9942	Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL);
9943	ContainerVT = M1VT;
9944	}
9945
9946	if (AlignedIdx)
9947	Vec = DAG.getExtractSubvector(DL, VT: ContainerVT, Vec, Idx: *AlignedIdx);
9948	}
9949
9950	bool IsLegalInsert = Subtarget.is64Bit() \|\| Val.getValueType() != MVT::i64;
9951	// Even i64-element vectors on RV32 can be lowered without scalar
9952	// legalization if the most-significant 32 bits of the value are not affected
9953	// by the sign-extension of the lower 32 bits.
9954	// TODO: We could also catch sign extensions of a 32-bit value.
9955	if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9956	const auto *CVal = cast<ConstantSDNode>(Val);
9957	if (isInt<`32`>(x: CVal->getSExtValue())) {
9958	IsLegalInsert = true;
9959	Val = DAG.getSignedConstant(Val: CVal->getSExtValue(), DL, VT: MVT::i32);
9960	}
9961	}
9962
9963	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9964
9965	SDValue ValInVec;
9966
9967	if (IsLegalInsert) {
9968	unsigned Opc =
9969	VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
9970	if (isNullConstant(V: Idx)) {
9971	if (!VecVT.isFloatingPoint())
9972	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Val);
9973	Vec = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: Vec, N2: Val, N3: VL);
9974
9975	if (AlignedIdx)
9976	Vec = DAG.getInsertSubvector(DL, Vec: OrigVec, SubVec: Vec, Idx: *AlignedIdx);
9977	if (!VecVT.isFixedLengthVector())
9978	return Vec;
9979	return convertFromScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
9980	}
9981
9982	// Use ri.vinsert.v.x if available.
9983	if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
9984	isValidVisniInsertExtractIndex(Idx)) {
9985	// Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
9986	SDValue PolicyOp =
9987	DAG.getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
9988	Vec = DAG.getNode(Opcode: RISCVISD::RI_VINSERT_VL, DL, VT: ContainerVT, N1: Vec, N2: Val, N3: Idx,
9989	N4: VL, N5: PolicyOp);
9990	if (AlignedIdx)
9991	Vec = DAG.getInsertSubvector(DL, Vec: OrigVec, SubVec: Vec, Idx: *AlignedIdx);
9992	if (!VecVT.isFixedLengthVector())
9993	return Vec;
9994	return convertFromScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
9995	}
9996
9997	ValInVec = lowerScalarInsert(Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget);
9998	} else {
9999	// On RV32, i64-element vectors must be specially handled to place the
10000	// value at element 0, by using two vslide1down instructions in sequence on
10001	// the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10002	// this.
10003	SDValue ValLo, ValHi;
10004	std::tie(args&: ValLo, args&: ValHi) = DAG.SplitScalar(N: Val, DL, LoVT: MVT::i32, HiVT: MVT::i32);
10005	MVT I32ContainerVT =
10006	MVT::getVectorVT(VT: MVT::i32, EC: ContainerVT.getVectorElementCount() * `2`);
10007	SDValue I32Mask =
10008	getDefaultScalableVLOps(VecVT: I32ContainerVT, DL, DAG, Subtarget).first;
10009	// Limit the active VL to two.
10010	SDValue InsertI64VL = DAG.getConstant(Val: `2`, DL, VT: XLenVT);
10011	// If the Idx is 0 we can insert directly into the vector.
10012	if (isNullConstant(V: Idx)) {
10013	// First slide in the lo value, then the hi in above it. We use slide1down
10014	// to avoid the register group overlap constraint of vslide1up.
10015	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
10016	N1: Vec, N2: Vec, N3: ValLo, N4: I32Mask, N5: InsertI64VL);
10017	// If the source vector is undef don't pass along the tail elements from
10018	// the previous slide1down.
10019	SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10020	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
10021	N1: Tail, N2: ValInVec, N3: ValHi, N4: I32Mask, N5: InsertI64VL);
10022	// Bitcast back to the right container type.
10023	ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec);
10024
10025	if (AlignedIdx)
10026	ValInVec = DAG.getInsertSubvector(DL, Vec: OrigVec, SubVec: ValInVec, Idx: *AlignedIdx);
10027	if (!VecVT.isFixedLengthVector())
10028	return ValInVec;
10029	return convertFromScalableVector(VT: VecVT, V: ValInVec, DAG, Subtarget);
10030	}
10031
10032	// First slide in the lo value, then the hi in above it. We use slide1down
10033	// to avoid the register group overlap constraint of vslide1up.
10034	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
10035	N1: DAG.getUNDEF(VT: I32ContainerVT),
10036	N2: DAG.getUNDEF(VT: I32ContainerVT), N3: ValLo,
10037	N4: I32Mask, N5: InsertI64VL);
10038	ValInVec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32ContainerVT,
10039	N1: DAG.getUNDEF(VT: I32ContainerVT), N2: ValInVec, N3: ValHi,
10040	N4: I32Mask, N5: InsertI64VL);
10041	// Bitcast back to the right container type.
10042	ValInVec = DAG.getBitcast(VT: ContainerVT, V: ValInVec);
10043	}
10044
10045	// Now that the value is in a vector, slide it into position.
10046	SDValue InsertVL =
10047	DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: Idx, N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
10048
10049	// Use tail agnostic policy if Idx is the last index of Vec.
10050	unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
10051	if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Val: Idx) &&
10052	Idx ->getAsZExtVal() + `1` == VecVT.getVectorNumElements())
10053	Policy = RISCVVType::TAIL_AGNOSTIC;
10054	SDValue Slideup = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Passthru: Vec, Op: ValInVec,
10055	Offset: Idx, Mask, VL: InsertVL, Policy);
10056
10057	if (AlignedIdx)
10058	Slideup = DAG.getInsertSubvector(DL, Vec: OrigVec, SubVec: Slideup, Idx: *AlignedIdx);
10059	if (!VecVT.isFixedLengthVector())
10060	return Slideup;
10061	return convertFromScalableVector(VT: VecVT, V: Slideup, DAG, Subtarget);
10062	}
10063
10064	// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10065	// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10066	// types this is done using VMV_X_S to allow us to glean information about the
10067	// sign bits of the result.
10068	SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10069	SelectionDAG &DAG) const {
10070	SDLoc DL(Op);
10071	SDValue Idx = Op.getOperand(i: `1`);
10072	SDValue Vec = Op.getOperand(i: `0`);
10073	EVT EltVT = Op.getValueType();
10074	MVT VecVT = Vec.getSimpleValueType();
10075	MVT XLenVT = Subtarget.getXLenVT();
10076
10077	if (VecVT.getVectorElementType() == MVT::i1) {
10078	// Use vfirst.m to extract the first bit.
10079	if (isNullConstant(V: Idx)) {
10080	MVT ContainerVT = VecVT;
10081	if (VecVT.isFixedLengthVector()) {
10082	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
10083	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
10084	}
10085	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10086	SDValue Vfirst =
10087	DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
10088	SDValue Res = DAG.getSetCC(DL, VT: XLenVT, LHS: Vfirst,
10089	RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETEQ);
10090	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res);
10091	}
10092	if (VecVT.isFixedLengthVector()) {
10093	unsigned NumElts = VecVT.getVectorNumElements();
10094	if (NumElts >= `8`) {
10095	MVT WideEltVT;
10096	unsigned WidenVecLen;
10097	SDValue ExtractElementIdx;
10098	SDValue ExtractBitIdx;
10099	unsigned MaxEEW = Subtarget.getELen();
10100	MVT LargestEltVT = MVT::getIntegerVT(
10101	BitWidth: std::min(a: MaxEEW, b: unsigned(XLenVT.getSizeInBits())));
10102	if (NumElts <= LargestEltVT.getSizeInBits()) {
10103	assert(isPowerOf2_32(NumElts) &&
10104	"the number of elements should be power of 2");
10105	WideEltVT = MVT::getIntegerVT(BitWidth: NumElts);
10106	WidenVecLen = `1`;
10107	ExtractElementIdx = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
10108	ExtractBitIdx = Idx;
10109	} else {
10110	WideEltVT = LargestEltVT;
10111	WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10112	// extract element index = index / element width
10113	ExtractElementIdx = DAG.getNode(
10114	Opcode: ISD::SRL, DL, VT: XLenVT, N1: Idx,
10115	N2: DAG.getConstant(Val: Log2_64(Value: WideEltVT.getSizeInBits()), DL, VT: XLenVT));
10116	// mask bit index = index % element width
10117	ExtractBitIdx = DAG.getNode(
10118	Opcode: ISD::AND, DL, VT: XLenVT, N1: Idx,
10119	N2: DAG.getConstant(Val: WideEltVT.getSizeInBits() - `1`, DL, VT: XLenVT));
10120	}
10121	MVT WideVT = MVT::getVectorVT(VT: WideEltVT, NumElements: WidenVecLen);
10122	Vec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Vec);
10123	SDValue ExtractElt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: XLenVT,
10124	N1: Vec, N2: ExtractElementIdx);
10125	// Extract the bit from GPR.
10126	SDValue ShiftRight =
10127	DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT, N1: ExtractElt, N2: ExtractBitIdx);
10128	SDValue Res = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: ShiftRight,
10129	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
10130	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Res);
10131	}
10132	}
10133	// Otherwise, promote to an i8 vector and extract from that.
10134	MVT WideVT = MVT::getVectorVT(VT: MVT::i8, EC: VecVT.getVectorElementCount());
10135	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Vec);
10136	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vec, N2: Idx);
10137	}
10138
10139	if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) \|\|
10140	EltVT == MVT::bf16) {
10141	// If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10142	MVT IntVT = VecVT.changeTypeToInteger();
10143	SDValue IntVec = DAG.getBitcast(VT: IntVT, V: Vec);
10144	SDValue IntExtract =
10145	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: XLenVT, N1: IntVec, N2: Idx);
10146	return DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: EltVT, Operand: IntExtract);
10147	}
10148
10149	// If this is a fixed vector, we need to convert it to a scalable vector.
10150	MVT ContainerVT = VecVT;
10151	if (VecVT.isFixedLengthVector()) {
10152	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
10153	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
10154	}
10155
10156	// If we're compiling for an exact VLEN value and we have a known
10157	// constant index, we can always perform the extract in m1 (or
10158	// smaller) as we can determine the register corresponding to
10159	// the index in the register group.
10160	const auto VLen = Subtarget.getRealVLen();
10161	if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx);
10162	IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10163	MVT M1VT = RISCVTargetLowering::getM1VT(VT: ContainerVT);
10164	unsigned OrigIdx = IdxC->getZExtValue();
10165	EVT ElemVT = VecVT.getVectorElementType();
10166	unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10167	unsigned RemIdx = OrigIdx % ElemsPerVReg;
10168	unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10169	unsigned ExtractIdx =
10170	SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10171	Vec = DAG.getExtractSubvector(DL, VT: M1VT, Vec, Idx: ExtractIdx);
10172	Idx = DAG.getVectorIdxConstant(Val: RemIdx, DL);
10173	ContainerVT = M1VT;
10174	}
10175
10176	// Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10177	// contains our index.
10178	std::optional<uint64_t> MaxIdx;
10179	if (VecVT.isFixedLengthVector())
10180	MaxIdx = VecVT.getVectorNumElements() - `1`;
10181	if (auto *IdxC = dyn_cast<ConstantSDNode>(Val&: Idx))
10182	MaxIdx = IdxC->getZExtValue();
10183	if (MaxIdx) {
10184	if (auto SmallerVT =
10185	getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: *MaxIdx, DL, DAG, Subtarget)) {
10186	ContainerVT = *SmallerVT;
10187	Vec = DAG.getExtractSubvector(DL, VT: ContainerVT, Vec, Idx: `0`);
10188	}
10189	}
10190
10191	// Use ri.vextract.x.v if available.
10192	// TODO: Avoid index 0 and just use the vmv.x.s
10193	if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10194	isValidVisniInsertExtractIndex(Idx)) {
10195	SDValue Elt = DAG.getNode(Opcode: RISCVISD::RI_VEXTRACT, DL, VT: XLenVT, N1: Vec, N2: Idx);
10196	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Elt);
10197	}
10198
10199	// If after narrowing, the required slide is still greater than LMUL2,
10200	// fallback to generic expansion and go through the stack. This is done
10201	// for a subtle reason: extracting all* elements out of a vector is*
10202	// widely expected to be linear in vector size, but because vslidedown
10203	// is linear in LMUL, performing N extracts using vslidedown becomes
10204	// O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10205	// seems to have the same problem (the store is linear in LMUL), but the
10206	// generic expansion memoizes* the store, and thus for many extracts of*
10207	// the same vector we end up with one store and a bunch of loads.
10208	// TODO: We don't have the same code for insert_vector_elt because we
10209	// have BUILD_VECTOR and handle the degenerate case there. Should we
10210	// consider adding an inverse BUILD_VECTOR node?
10211	MVT LMUL2VT =
10212	RISCVTargetLowering::getM1VT(VT: ContainerVT).getDoubleNumVectorElementsVT();
10213	if (ContainerVT.bitsGT(VT: LMUL2VT) && VecVT.isFixedLengthVector())
10214	return SDValue ();
10215
10216	// If the index is 0, the vector is already in the right position.
10217	if (!isNullConstant(V: Idx)) {
10218	// Use a VL of 1 to avoid processing more elements than we need.
10219	auto [Mask, VL] = getDefaultVLOps(NumElts: `1`, ContainerVT, DL, DAG, Subtarget);
10220	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
10221	Passthru: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL);
10222	}
10223
10224	if (!EltVT.isInteger()) {
10225	// Floating-point extracts are handled in TableGen.
10226	return DAG.getExtractVectorElt(DL, VT: EltVT, Vec, Idx: `0`);
10227	}
10228
10229	SDValue Elt0 = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
10230	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: EltVT, Operand: Elt0);
10231	}
10232
10233	// Some RVV intrinsics may claim that they want an integer operand to be
10234	// promoted or expanded.
10235	static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
10236	const RISCVSubtarget &Subtarget) {
10237	assert((Op.getOpcode() == ISD::INTRINSIC_VOID \|\|
10238	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
10239	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10240	"Unexpected opcode");
10241
10242	if (!Subtarget.hasVInstructions())
10243	return SDValue ();
10244
10245	bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID \|\|
10246	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10247	unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? `1` : `0`);
10248
10249	SDLoc DL(Op);
10250
10251	const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
10252	RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID: IntNo);
10253	if (!II \|\| !II->hasScalarOperand())
10254	return SDValue ();
10255
10256	unsigned SplatOp = II->ScalarOperand + `1` + HasChain;
10257	assert(SplatOp < Op.getNumOperands());
10258
10259	SmallVector<SDValue, `8`> Operands(Op ->ops());
10260	SDValue &ScalarOp = Operands [SplatOp];
10261	MVT OpVT = ScalarOp.getSimpleValueType();
10262	MVT XLenVT = Subtarget.getXLenVT();
10263
10264	// If this isn't a scalar, or its type is XLenVT we're done.
10265	if (!OpVT.isScalarInteger() \|\| OpVT == XLenVT)
10266	return SDValue ();
10267
10268	// Simplest case is that the operand needs to be promoted to XLenVT.
10269	if (OpVT.bitsLT(VT: XLenVT)) {
10270	// If the operand is a constant, sign extend to increase our chances
10271	// of being able to use a .vi instruction. ANY_EXTEND would become a
10272	// a zero extend and the simm5 check in isel would fail.
10273	// FIXME: Should we ignore the upper bits in isel instead?
10274	unsigned ExtOpc =
10275	isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
10276	ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp);
10277	return DAG.getNode(Opcode: Op ->getOpcode(), DL, VTList: Op ->getVTList(), Ops: Operands);
10278	}
10279
10280	// Use the previous operand to get the vXi64 VT. The result might be a mask
10281	// VT for compares. Using the previous operand assumes that the previous
10282	// operand will never have a smaller element size than a scalar operand and
10283	// that a widening operation never uses SEW=64.
10284	// NOTE: If this fails the below assert, we can probably just find the
10285	// element count from any operand or result and use it to construct the VT.
10286	assert(II->ScalarOperand > `0` && "Unexpected splat operand!");
10287	MVT VT = Op.getOperand(i: SplatOp - `1`).getSimpleValueType();
10288
10289	// The more complex case is when the scalar is larger than XLenVT.
10290	assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10291	VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10292
10293	// If this is a sign-extended 32-bit value, we can truncate it and rely on the
10294	// instruction to sign-extend since SEW>XLEN.
10295	if (DAG.ComputeNumSignBits(Op: ScalarOp) > `32`) {
10296	ScalarOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: ScalarOp);
10297	return DAG.getNode(Opcode: Op ->getOpcode(), DL, VTList: Op ->getVTList(), Ops: Operands);
10298	}
10299
10300	switch (IntNo) {
10301	case Intrinsic::riscv_vslide1up:
10302	case Intrinsic::riscv_vslide1down:
10303	case Intrinsic::riscv_vslide1up_mask:
10304	case Intrinsic::riscv_vslide1down_mask: {
10305	// We need to special case these when the scalar is larger than XLen.
10306	unsigned NumOps = Op.getNumOperands();
10307	bool IsMasked = NumOps == `7`;
10308
10309	// Convert the vector source to the equivalent nxvXi32 vector.
10310	MVT I32VT = MVT::getVectorVT(VT: MVT::i32, EC: VT.getVectorElementCount() * `2`);
10311	SDValue Vec = DAG.getBitcast(VT: I32VT, V: Operands [`2`]);
10312	SDValue ScalarLo, ScalarHi;
10313	std::tie(args&: ScalarLo, args&: ScalarHi) =
10314	DAG.SplitScalar(N: ScalarOp, DL, LoVT: MVT::i32, HiVT: MVT::i32);
10315
10316	// Double the VL since we halved SEW.
10317	SDValue AVL = getVLOperand(Op);
10318	SDValue I32VL;
10319
10320	// Optimize for constant AVL
10321	if (isa<ConstantSDNode>(Val: AVL)) {
10322	const auto [MinVLMAX, MaxVLMAX] =
10323	RISCVTargetLowering::computeVLMAXBounds(VecVT: VT, Subtarget);
10324
10325	uint64_t AVLInt = AVL ->getAsZExtVal();
10326	if (AVLInt <= MinVLMAX) {
10327	I32VL = DAG.getConstant(Val: `2` * AVLInt, DL, VT: XLenVT);
10328	} else if (AVLInt >= `2` * MaxVLMAX) {
10329	// Just set vl to VLMAX in this situation
10330	I32VL = DAG.getRegister(Reg: RISCV::X0, VT: XLenVT);
10331	} else {
10332	// For AVL between (MinVLMAX, 2 MaxVLMAX), the actual working vl*
10333	// is related to the hardware implementation.
10334	// So let the following code handle
10335	}
10336	}
10337	if (!I32VL) {
10338	RISCVVType::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
10339	SDValue LMUL = DAG.getConstant(Val: Lmul, DL, VT: XLenVT);
10340	unsigned Sew = RISCVVType::encodeSEW(SEW: VT.getScalarSizeInBits());
10341	SDValue SEW = DAG.getConstant(Val: Sew, DL, VT: XLenVT);
10342	SDValue SETVL =
10343	DAG.getTargetConstant(Val: Intrinsic::riscv_vsetvli, DL, VT: MVT::i32);
10344	// Using vsetvli instruction to get actually used length which related to
10345	// the hardware implementation
10346	SDValue VL = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: SETVL, N2: AVL,
10347	N3: SEW, N4: LMUL);
10348	I32VL =
10349	DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: VL, N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
10350	}
10351
10352	SDValue I32Mask = getAllOnesMask(VecVT: I32VT, VL: I32VL, DL, DAG);
10353
10354	// Shift the two scalar parts in using SEW=32 slide1up/slide1down
10355	// instructions.
10356	SDValue Passthru;
10357	if (IsMasked)
10358	Passthru = DAG.getUNDEF(VT: I32VT);
10359	else
10360	Passthru = DAG.getBitcast(VT: I32VT, V: Operands [`1`]);
10361
10362	if (IntNo == Intrinsic::riscv_vslide1up \|\|
10363	IntNo == Intrinsic::riscv_vslide1up_mask) {
10364	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
10365	N3: ScalarHi, N4: I32Mask, N5: I32VL);
10366	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1UP_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
10367	N3: ScalarLo, N4: I32Mask, N5: I32VL);
10368	} else {
10369	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
10370	N3: ScalarLo, N4: I32Mask, N5: I32VL);
10371	Vec = DAG.getNode(Opcode: RISCVISD::VSLIDE1DOWN_VL, DL, VT: I32VT, N1: Passthru, N2: Vec,
10372	N3: ScalarHi, N4: I32Mask, N5: I32VL);
10373	}
10374
10375	// Convert back to nxvXi64.
10376	Vec = DAG.getBitcast(VT, V: Vec);
10377
10378	if (!IsMasked)
10379	return Vec;
10380	// Apply mask after the operation.
10381	SDValue Mask = Operands [NumOps - `3`];
10382	SDValue MaskedOff = Operands [`1`];
10383	// Assume Policy operand is the last operand.
10384	uint64_t Policy = Operands [NumOps - `1`]->getAsZExtVal();
10385	// We don't need to select maskedoff if it's undef.
10386	if (MaskedOff.isUndef())
10387	return Vec;
10388	// TAMU
10389	if (Policy == RISCVVType::TAIL_AGNOSTIC)
10390	return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff,
10391	N4: DAG.getUNDEF(VT), N5: AVL);
10392	// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10393	// It's fine because vmerge does not care mask policy.
10394	return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: Mask, N2: Vec, N3: MaskedOff,
10395	N4: MaskedOff, N5: AVL);
10396	}
10397	}
10398
10399	// We need to convert the scalar to a splat vector.
10400	SDValue VL = getVLOperand(Op);
10401	assert(VL.getValueType() == XLenVT);
10402	ScalarOp = splatSplitI64WithVL(DL, VT, Passthru: SDValue (), Scalar: ScalarOp, VL, DAG);
10403	return DAG.getNode(Opcode: Op ->getOpcode(), DL, VTList: Op ->getVTList(), Ops: Operands);
10404	}
10405
10406	// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10407	// scalable vector llvm.get.vector.length for now.
10408	//
10409	// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10410	// (vscale VF). The vscale and VF are independent of element width. We use*
10411	// SEW=8 for the vsetvli because it is the only element width that supports all
10412	// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10413	// (vscale VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The*
10414	// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10415	// SEW and LMUL are better for the surrounding vector instructions.
10416	static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
10417	const RISCVSubtarget &Subtarget) {
10418	MVT XLenVT = Subtarget.getXLenVT();
10419
10420	// The smallest LMUL is only valid for the smallest element width.
10421	const unsigned ElementWidth = `8`;
10422
10423	// Determine the VF that corresponds to LMUL 1 for ElementWidth.
10424	unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10425	// We don't support VF==1 with ELEN==32.
10426	[[maybe_unused]] unsigned MinVF =
10427	RISCV::RVVBitsPerBlock / Subtarget.getELen();
10428
10429	[[maybe_unused]] unsigned VF = N->getConstantOperandVal(Num: `2`);
10430	assert(VF >= MinVF && VF <= (LMul1VF * `8`) && isPowerOf2_32(VF) &&
10431	"Unexpected VF");
10432
10433	bool Fractional = VF < LMul1VF;
10434	unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10435	unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMUL: LMulVal, Fractional);
10436	unsigned VSEW = RISCVVType::encodeSEW(SEW: ElementWidth);
10437
10438	SDLoc DL(N);
10439
10440	SDValue LMul = DAG.getTargetConstant(Val: VLMUL, DL, VT: XLenVT);
10441	SDValue Sew = DAG.getTargetConstant(Val: VSEW, DL, VT: XLenVT);
10442
10443	SDValue AVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: `1`));
10444
10445	SDValue ID = DAG.getTargetConstant(Val: Intrinsic::riscv_vsetvli, DL, VT: XLenVT);
10446	SDValue Res =
10447	DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: XLenVT, N1: ID, N2: AVL, N3: Sew, N4: LMul);
10448	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: Res);
10449	}
10450
10451	static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,
10452	const RISCVSubtarget &Subtarget) {
10453	SDValue Op0 = N->getOperand(Num: `1`);
10454	MVT OpVT = Op0.getSimpleValueType();
10455	MVT ContainerVT = OpVT;
10456	if (OpVT.isFixedLengthVector()) {
10457	ContainerVT = getContainerForFixedLengthVector(DAG, VT: OpVT, Subtarget);
10458	Op0 = convertToScalableVector(VT: ContainerVT, V: Op0, DAG, Subtarget);
10459	}
10460	MVT XLenVT = Subtarget.getXLenVT();
10461	SDLoc DL(N);
10462	auto [Mask, VL] = getDefaultVLOps(VecVT: OpVT, ContainerVT, DL, DAG, Subtarget);
10463	SDValue Res = DAG.getNode(Opcode: RISCVISD::VFIRST_VL, DL, VT: XLenVT, N1: Op0, N2: Mask, N3: VL);
10464	if (isOneConstant(V: N->getOperand(Num: `2`)))
10465	return Res;
10466
10467	// Convert -1 to VL.
10468	SDValue Setcc =
10469	DAG.getSetCC(DL, VT: XLenVT, LHS: Res, RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETLT);
10470	VL = DAG.getElementCount(DL, VT: XLenVT, EC: OpVT.getVectorElementCount());
10471	return DAG.getSelect(DL, VT: XLenVT, Cond: Setcc, LHS: VL, RHS: Res);
10472	}
10473
10474	static inline void promoteVCIXScalar(const SDValue &Op,
10475	SmallVectorImpl<SDValue> &Operands,
10476	SelectionDAG &DAG) {
10477	const RISCVSubtarget &Subtarget =
10478	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
10479
10480	bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID \|\|
10481	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10482	unsigned IntNo = Op.getConstantOperandVal(i: HasChain ? `1` : `0`);
10483	SDLoc DL(Op);
10484
10485	const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
10486	RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID: IntNo);
10487	if (!II \|\| !II->hasScalarOperand())
10488	return;
10489
10490	unsigned SplatOp = II->ScalarOperand + `1`;
10491	assert(SplatOp < Op.getNumOperands());
10492
10493	SDValue &ScalarOp = Operands [SplatOp];
10494	MVT OpVT = ScalarOp.getSimpleValueType();
10495	MVT XLenVT = Subtarget.getXLenVT();
10496
10497	// The code below is partially copied from lowerVectorIntrinsicScalars.
10498	// If this isn't a scalar, or its type is XLenVT we're done.
10499	if (!OpVT.isScalarInteger() \|\| OpVT == XLenVT)
10500	return;
10501
10502	// Manually emit promote operation for scalar operation.
10503	if (OpVT.bitsLT(VT: XLenVT)) {
10504	unsigned ExtOpc =
10505	isa<ConstantSDNode>(Val: ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
10506	ScalarOp = DAG.getNode(Opcode: ExtOpc, DL, VT: XLenVT, Operand: ScalarOp);
10507	}
10508	}
10509
10510	static void processVCIXOperands(SDValue &OrigOp,
10511	SmallVectorImpl<SDValue> &Operands,
10512	SelectionDAG &DAG) {
10513	promoteVCIXScalar(Op: OrigOp, Operands, DAG);
10514	const RISCVSubtarget &Subtarget =
10515	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
10516	for (SDValue &V : Operands) {
10517	EVT ValType = V.getValueType();
10518	if (ValType.isVector() && ValType.isFloatingPoint()) {
10519	MVT InterimIVT =
10520	MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ValType.getScalarSizeInBits()),
10521	EC: ValType.getVectorElementCount());
10522	V = DAG.getBitcast(VT: InterimIVT, V);
10523	}
10524	if (ValType.isFixedLengthVector()) {
10525	MVT OpContainerVT = getContainerForFixedLengthVector(
10526	DAG, VT: V.getSimpleValueType(), Subtarget);
10527	V = convertToScalableVector(VT: OpContainerVT, V, DAG, Subtarget);
10528	}
10529	}
10530	}
10531
10532	// LMUL VLEN should be greater than or equal to EGS * SEW*
10533	static inline bool isValidEGW(int EGS, EVT VT,
10534	const RISCVSubtarget &Subtarget) {
10535	return (Subtarget.getRealMinVLen() *
10536	VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=
10537	EGS * VT.getScalarSizeInBits();
10538	}
10539
10540	SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10541	SelectionDAG &DAG) const {
10542	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
10543	SDLoc DL(Op);
10544	MVT XLenVT = Subtarget.getXLenVT();
10545
10546	switch (IntNo) {
10547	default:
10548	break; // Don't custom lower most intrinsics.
10549	case Intrinsic::riscv_tuple_insert: {
10550	SDValue Vec = Op.getOperand(i: `1`);
10551	SDValue SubVec = Op.getOperand(i: `2`);
10552	SDValue Index = Op.getOperand(i: `3`);
10553
10554	return DAG.getNode(Opcode: RISCVISD::TUPLE_INSERT, DL, VT: Op.getValueType(), N1: Vec,
10555	N2: SubVec, N3: Index);
10556	}
10557	case Intrinsic::riscv_tuple_extract: {
10558	SDValue Vec = Op.getOperand(i: `1`);
10559	SDValue Index = Op.getOperand(i: `2`);
10560
10561	return DAG.getNode(Opcode: RISCVISD::TUPLE_EXTRACT, DL, VT: Op.getValueType(), N1: Vec,
10562	N2: Index);
10563	}
10564	case Intrinsic::thread_pointer: {
10565	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10566	return DAG.getRegister(Reg: RISCV::X4, VT: PtrVT);
10567	}
10568	case Intrinsic::riscv_orc_b:
10569	case Intrinsic::riscv_brev8:
10570	case Intrinsic::riscv_sha256sig0:
10571	case Intrinsic::riscv_sha256sig1:
10572	case Intrinsic::riscv_sha256sum0:
10573	case Intrinsic::riscv_sha256sum1:
10574	case Intrinsic::riscv_sm3p0:
10575	case Intrinsic::riscv_sm3p1: {
10576	unsigned Opc;
10577	switch (IntNo) {
10578	case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10579	case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10580	case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10581	case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10582	case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10583	case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10584	case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10585	case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10586	}
10587
10588	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: `1`));
10589	}
10590	case Intrinsic::riscv_sm4ks:
10591	case Intrinsic::riscv_sm4ed: {
10592	unsigned Opc =
10593	IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10594
10595	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
10596	N3: Op.getOperand(i: `3`));
10597	}
10598	case Intrinsic::riscv_zip:
10599	case Intrinsic::riscv_unzip: {
10600	unsigned Opc =
10601	IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10602	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, Operand: Op.getOperand(i: `1`));
10603	}
10604	case Intrinsic::riscv_mopr:
10605	return DAG.getNode(Opcode: RISCVISD::MOPR, DL, VT: XLenVT, N1: Op.getOperand(i: `1`),
10606	N2: Op.getOperand(i: `2`));
10607
10608	case Intrinsic::riscv_moprr: {
10609	return DAG.getNode(Opcode: RISCVISD::MOPRR, DL, VT: XLenVT, N1: Op.getOperand(i: `1`),
10610	N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
10611	}
10612	case Intrinsic::riscv_clmul:
10613	return DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: XLenVT, N1: Op.getOperand(i: `1`),
10614	N2: Op.getOperand(i: `2`));
10615	case Intrinsic::riscv_clmulh:
10616	case Intrinsic::riscv_clmulr: {
10617	unsigned Opc =
10618	IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10619	return DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`));
10620	}
10621	case Intrinsic::experimental_get_vector_length:
10622	return lowerGetVectorLength(N: Op.getNode(), DAG, Subtarget);
10623	case Intrinsic::experimental_cttz_elts:
10624	return lowerCttzElts(N: Op.getNode(), DAG, Subtarget);
10625	case Intrinsic::riscv_vmv_x_s: {
10626	SDValue Res = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Op.getOperand(i: `1`));
10627	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: Res);
10628	}
10629	case Intrinsic::riscv_vfmv_f_s:
10630	return DAG.getExtractVectorElt(DL, VT: Op.getValueType(), Vec: Op.getOperand(i: `1`), Idx: `0`);
10631	case Intrinsic::riscv_vmv_v_x:
10632	return lowerScalarSplat(Passthru: Op.getOperand(i: `1`), Scalar: Op.getOperand(i: `2`),
10633	VL: Op.getOperand(i: `3`), VT: Op.getSimpleValueType(), DL, DAG,
10634	Subtarget);
10635	case Intrinsic::riscv_vfmv_v_f:
10636	return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: Op.getValueType(),
10637	N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
10638	case Intrinsic::riscv_vmv_s_x: {
10639	SDValue Scalar = Op.getOperand(i: `2`);
10640
10641	if (Scalar.getValueType().bitsLE(VT: XLenVT)) {
10642	Scalar = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: Scalar);
10643	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT: Op.getValueType(),
10644	N1: Op.getOperand(i: `1`), N2: Scalar, N3: Op.getOperand(i: `3`));
10645	}
10646
10647	assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10648
10649	// This is an i64 value that lives in two scalar registers. We have to
10650	// insert this in a convoluted way. First we build vXi64 splat containing
10651	// the two values that we assemble using some bit math. Next we'll use
10652	// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10653	// to merge element 0 from our splat into the source vector.
10654	// FIXME: This is probably not the best way to do this, but it is
10655	// consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10656	// point.
10657	// sw lo, (a0)
10658	// sw hi, 4(a0)
10659	// vlse vX, (a0)
10660	//
10661	// vid.v vVid
10662	// vmseq.vx mMask, vVid, 0
10663	// vmerge.vvm vDest, vSrc, vVal, mMask
10664	MVT VT = Op.getSimpleValueType();
10665	SDValue Vec = Op.getOperand(i: `1`);
10666	SDValue VL = getVLOperand(Op);
10667
10668	SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Passthru: SDValue (), Scalar, VL, DAG);
10669	if (Op.getOperand(i: `1`).isUndef())
10670	return SplattedVal;
10671	SDValue SplattedIdx =
10672	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
10673	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i32), N3: VL);
10674
10675	MVT MaskVT = getMaskTypeFor(VecVT: VT);
10676	SDValue Mask = getAllOnesMask(VecVT: VT, VL, DL, DAG);
10677	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL);
10678	SDValue SelectCond =
10679	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
10680	Ops: {VID, SplattedIdx, DAG.getCondCode(Cond: ISD::SETEQ),
10681	DAG.getUNDEF(VT: MaskVT), Mask, VL});
10682	return DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT, N1: SelectCond, N2: SplattedVal,
10683	N3: Vec, N4: DAG.getUNDEF(VT), N5: VL);
10684	}
10685	case Intrinsic::riscv_vfmv_s_f:
10686	return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT: Op.getSimpleValueType(),
10687	N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
10688	// EGS EEW >= 128 bits*
10689	case Intrinsic::riscv_vaesdf_vv:
10690	case Intrinsic::riscv_vaesdf_vs:
10691	case Intrinsic::riscv_vaesdm_vv:
10692	case Intrinsic::riscv_vaesdm_vs:
10693	case Intrinsic::riscv_vaesef_vv:
10694	case Intrinsic::riscv_vaesef_vs:
10695	case Intrinsic::riscv_vaesem_vv:
10696	case Intrinsic::riscv_vaesem_vs:
10697	case Intrinsic::riscv_vaeskf1:
10698	case Intrinsic::riscv_vaeskf2:
10699	case Intrinsic::riscv_vaesz_vs:
10700	case Intrinsic::riscv_vsm4k:
10701	case Intrinsic::riscv_vsm4r_vv:
10702	case Intrinsic::riscv_vsm4r_vs: {
10703	if (!isValidEGW(EGS: `4`, VT: Op.getSimpleValueType(), Subtarget) \|\|
10704	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `1`).getSimpleValueType(), Subtarget) \|\|
10705	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `2`).getSimpleValueType(), Subtarget))
10706	report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW.");
10707	return Op;
10708	}
10709	// EGS EEW >= 256 bits*
10710	case Intrinsic::riscv_vsm3c:
10711	case Intrinsic::riscv_vsm3me: {
10712	if (!isValidEGW(EGS: `8`, VT: Op.getSimpleValueType(), Subtarget) \|\|
10713	!isValidEGW(EGS: `8`, VT: Op ->getOperand(Num: `1`).getSimpleValueType(), Subtarget))
10714	report_fatal_error(reason: "EGW should be greater than or equal to 8 * SEW.");
10715	return Op;
10716	}
10717	// zvknha(SEW=32)/zvknhb(SEW=[32\|64])
10718	case Intrinsic::riscv_vsha2ch:
10719	case Intrinsic::riscv_vsha2cl:
10720	case Intrinsic::riscv_vsha2ms: {
10721	if (Op ->getSimpleValueType(ResNo: `0`).getScalarSizeInBits() == `64` &&
10722	!Subtarget.hasStdExtZvknhb())
10723	report_fatal_error(reason: "SEW=64 needs Zvknhb to be enabled.");
10724	if (!isValidEGW(EGS: `4`, VT: Op.getSimpleValueType(), Subtarget) \|\|
10725	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `1`).getSimpleValueType(), Subtarget) \|\|
10726	!isValidEGW(EGS: `4`, VT: Op ->getOperand(Num: `2`).getSimpleValueType(), Subtarget))
10727	report_fatal_error(reason: "EGW should be greater than or equal to 4 * SEW.");
10728	return Op;
10729	}
10730	case Intrinsic::riscv_sf_vc_v_x:
10731	case Intrinsic::riscv_sf_vc_v_i:
10732	case Intrinsic::riscv_sf_vc_v_xv:
10733	case Intrinsic::riscv_sf_vc_v_iv:
10734	case Intrinsic::riscv_sf_vc_v_vv:
10735	case Intrinsic::riscv_sf_vc_v_fv:
10736	case Intrinsic::riscv_sf_vc_v_xvv:
10737	case Intrinsic::riscv_sf_vc_v_ivv:
10738	case Intrinsic::riscv_sf_vc_v_vvv:
10739	case Intrinsic::riscv_sf_vc_v_fvv:
10740	case Intrinsic::riscv_sf_vc_v_xvw:
10741	case Intrinsic::riscv_sf_vc_v_ivw:
10742	case Intrinsic::riscv_sf_vc_v_vvw:
10743	case Intrinsic::riscv_sf_vc_v_fvw: {
10744	MVT VT = Op.getSimpleValueType();
10745
10746	SmallVector<SDValue> Operands{Op ->op_values()};
10747	processVCIXOperands(OrigOp&: Op, Operands, DAG);
10748
10749	MVT RetVT = VT;
10750	if (VT.isFixedLengthVector())
10751	RetVT = getContainerForFixedLengthVector(VT);
10752	else if (VT.isFloatingPoint())
10753	RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()),
10754	EC: VT.getVectorElementCount());
10755
10756	SDValue NewNode = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: RetVT, Ops: Operands);
10757
10758	if (VT.isFixedLengthVector())
10759	NewNode = convertFromScalableVector(VT, V: NewNode, DAG, Subtarget);
10760	else if (VT.isFloatingPoint())
10761	NewNode = DAG.getBitcast(VT, V: NewNode);
10762
10763	if (Op == NewNode)
10764	break;
10765
10766	return NewNode;
10767	}
10768	}
10769
10770	return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10771	}
10772
10773	static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
10774	unsigned Type) {
10775	SDLoc DL(Op);
10776	SmallVector<SDValue> Operands{Op ->op_values()};
10777	Operands.erase(CI: Operands.begin() + `1`);
10778
10779	const RISCVSubtarget &Subtarget =
10780	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
10781	MVT VT = Op.getSimpleValueType();
10782	MVT RetVT = VT;
10783	MVT FloatVT = VT;
10784
10785	if (VT.isFloatingPoint()) {
10786	RetVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()),
10787	EC: VT.getVectorElementCount());
10788	FloatVT = RetVT;
10789	}
10790	if (VT.isFixedLengthVector())
10791	RetVT = getContainerForFixedLengthVector(TLI: DAG.getTargetLoweringInfo(), VT: RetVT,
10792	Subtarget);
10793
10794	processVCIXOperands(OrigOp&: Op, Operands, DAG);
10795
10796	SDVTList VTs = DAG.getVTList(VTs: {RetVT, MVT::Other});
10797	SDValue NewNode = DAG.getNode(Opcode: Type, DL, VTList: VTs, Ops: Operands);
10798	SDValue Chain = NewNode.getValue(R: `1`);
10799
10800	if (VT.isFixedLengthVector())
10801	NewNode = convertFromScalableVector(VT: FloatVT, V: NewNode, DAG, Subtarget);
10802	if (VT.isFloatingPoint())
10803	NewNode = DAG.getBitcast(VT, V: NewNode);
10804
10805	NewNode = DAG.getMergeValues(Ops: {NewNode, Chain}, dl: DL);
10806
10807	return NewNode;
10808	}
10809
10810	static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
10811	unsigned Type) {
10812	SmallVector<SDValue> Operands{Op ->op_values()};
10813	Operands.erase(CI: Operands.begin() + `1`);
10814	processVCIXOperands(OrigOp&: Op, Operands, DAG);
10815
10816	return DAG.getNode(Opcode: Type, DL: SDLoc (Op), VT: Op.getValueType(), Ops: Operands);
10817	}
10818
10819	SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10820	SelectionDAG &DAG) const {
10821	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
10822	switch (IntNo) {
10823	default:
10824	break;
10825	case Intrinsic::riscv_seg2_load_mask:
10826	case Intrinsic::riscv_seg3_load_mask:
10827	case Intrinsic::riscv_seg4_load_mask:
10828	case Intrinsic::riscv_seg5_load_mask:
10829	case Intrinsic::riscv_seg6_load_mask:
10830	case Intrinsic::riscv_seg7_load_mask:
10831	case Intrinsic::riscv_seg8_load_mask: {
10832	SDLoc DL(Op);
10833	static const Intrinsic::ID VlsegInts[`7`] = {
10834	Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
10835	Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
10836	Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
10837	Intrinsic::riscv_vlseg8_mask};
10838	unsigned NF = Op ->getNumValues() - `1`;
10839	assert(NF >= `2` && NF <= `8` && "Unexpected seg number");
10840	MVT XLenVT = Subtarget.getXLenVT();
10841	MVT VT = Op ->getSimpleValueType(ResNo: `0`);
10842	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10843	unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10844	ContainerVT.getScalarSizeInBits();
10845	EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NFields: NF);
10846
10847	// Operands: (chain, int_id, pointer, mask, vl)
10848	SDValue VL = Op.getOperand(i: Op.getNumOperands() - `1`);
10849	SDValue Mask = Op.getOperand(i: `3`);
10850	MVT MaskVT = Mask.getSimpleValueType();
10851	MVT MaskContainerVT =
10852	::getContainerForFixedLengthVector(DAG, VT: MaskVT, Subtarget);
10853	Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
10854
10855	SDValue IntID = DAG.getTargetConstant(Val: VlsegInts[NF - `2`], DL, VT: XLenVT);
10856	auto *Load = cast<MemIntrinsicSDNode>(Val&: Op);
10857
10858	SDVTList VTs = DAG.getVTList(VTs: {VecTupTy, MVT::Other});
10859	SDValue Ops[] = {
10860	Load->getChain(),
10861	IntID,
10862	DAG.getUNDEF(VT: VecTupTy),
10863	Op.getOperand(i: `2`),
10864	Mask,
10865	VL,
10866	DAG.getTargetConstant(
10867	Val: RISCVVType::TAIL_AGNOSTIC \| RISCVVType::MASK_AGNOSTIC, DL, VT: XLenVT),
10868	DAG.getTargetConstant(Val: Log2_64(Value: VT.getScalarSizeInBits()), DL, VT: XLenVT)};
10869	SDValue Result =
10870	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
10871	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
10872	SmallVector<SDValue, `9`> Results;
10873	for (unsigned int RetIdx = `0`; RetIdx < NF; RetIdx++) {
10874	SDValue SubVec =
10875	DAG.getNode(Opcode: RISCVISD::TUPLE_EXTRACT, DL, VT: ContainerVT,
10876	N1: Result.getValue(R: `0`), N2: DAG.getVectorIdxConstant(Val: RetIdx, DL));
10877	Results.push_back(Elt: convertFromScalableVector(VT, V: SubVec, DAG, Subtarget));
10878	}
10879	Results.push_back(Elt: Result.getValue(R: `1`));
10880	return DAG.getMergeValues(Ops: Results, dl: DL);
10881	}
10882	case Intrinsic::riscv_sf_vc_v_x_se:
10883	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_X_SE);
10884	case Intrinsic::riscv_sf_vc_v_i_se:
10885	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_I_SE);
10886	case Intrinsic::riscv_sf_vc_v_xv_se:
10887	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XV_SE);
10888	case Intrinsic::riscv_sf_vc_v_iv_se:
10889	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IV_SE);
10890	case Intrinsic::riscv_sf_vc_v_vv_se:
10891	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VV_SE);
10892	case Intrinsic::riscv_sf_vc_v_fv_se:
10893	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FV_SE);
10894	case Intrinsic::riscv_sf_vc_v_xvv_se:
10895	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVV_SE);
10896	case Intrinsic::riscv_sf_vc_v_ivv_se:
10897	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVV_SE);
10898	case Intrinsic::riscv_sf_vc_v_vvv_se:
10899	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVV_SE);
10900	case Intrinsic::riscv_sf_vc_v_fvv_se:
10901	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVV_SE);
10902	case Intrinsic::riscv_sf_vc_v_xvw_se:
10903	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_XVW_SE);
10904	case Intrinsic::riscv_sf_vc_v_ivw_se:
10905	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_IVW_SE);
10906	case Intrinsic::riscv_sf_vc_v_vvw_se:
10907	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_VVW_SE);
10908	case Intrinsic::riscv_sf_vc_v_fvw_se:
10909	return getVCIXISDNodeWCHAIN(Op, DAG, Type: RISCVISD::SF_VC_V_FVW_SE);
10910	}
10911
10912	return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10913	}
10914
10915	SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10916	SelectionDAG &DAG) const {
10917	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
10918	switch (IntNo) {
10919	default:
10920	break;
10921	case Intrinsic::riscv_seg2_store_mask:
10922	case Intrinsic::riscv_seg3_store_mask:
10923	case Intrinsic::riscv_seg4_store_mask:
10924	case Intrinsic::riscv_seg5_store_mask:
10925	case Intrinsic::riscv_seg6_store_mask:
10926	case Intrinsic::riscv_seg7_store_mask:
10927	case Intrinsic::riscv_seg8_store_mask: {
10928	SDLoc DL(Op);
10929	static const Intrinsic::ID VssegInts[] = {
10930	Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
10931	Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
10932	Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
10933	Intrinsic::riscv_vsseg8_mask};
10934
10935	// Operands: (chain, int_id, vec, ptr, mask, vl)*
10936	unsigned NF = Op ->getNumOperands() - `5`;
10937	assert(NF >= `2` && NF <= `8` && "Unexpected seg number");
10938	MVT XLenVT = Subtarget.getXLenVT();
10939	MVT VT = Op ->getOperand(Num: `2`).getSimpleValueType();
10940	MVT ContainerVT = getContainerForFixedLengthVector(VT);
10941	unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10942	ContainerVT.getScalarSizeInBits();
10943	EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NFields: NF);
10944
10945	SDValue VL = Op.getOperand(i: Op.getNumOperands() - `1`);
10946	SDValue Mask = Op.getOperand(i: Op.getNumOperands() - `2`);
10947	MVT MaskVT = Mask.getSimpleValueType();
10948	MVT MaskContainerVT =
10949	::getContainerForFixedLengthVector(DAG, VT: MaskVT, Subtarget);
10950	Mask = convertToScalableVector(VT: MaskContainerVT, V: Mask, DAG, Subtarget);
10951
10952	SDValue IntID = DAG.getTargetConstant(Val: VssegInts[NF - `2`], DL, VT: XLenVT);
10953	SDValue Ptr = Op ->getOperand(Num: NF + `2`);
10954
10955	auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Val&: Op);
10956
10957	SDValue StoredVal = DAG.getUNDEF(VT: VecTupTy);
10958	for (unsigned i = `0`; i < NF; i++)
10959	StoredVal = DAG.getNode(
10960	Opcode: RISCVISD::TUPLE_INSERT, DL, VT: VecTupTy, N1: StoredVal,
10961	N2: convertToScalableVector(
10962	VT: ContainerVT, V: FixedIntrinsic->getOperand(Num: `2` + i), DAG, Subtarget),
10963	N3: DAG.getVectorIdxConstant(Val: i, DL));
10964
10965	SDValue Ops[] = {
10966	FixedIntrinsic->getChain(),
10967	IntID,
10968	StoredVal,
10969	Ptr,
10970	Mask,
10971	VL,
10972	DAG.getTargetConstant(Val: Log2_64(Value: VT.getScalarSizeInBits()), DL, VT: XLenVT)};
10973
10974	return DAG.getMemIntrinsicNode(
10975	Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops,
10976	MemVT: FixedIntrinsic->getMemoryVT(), MMO: FixedIntrinsic->getMemOperand());
10977	}
10978	case Intrinsic::riscv_sf_vc_xv_se:
10979	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XV_SE);
10980	case Intrinsic::riscv_sf_vc_iv_se:
10981	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IV_SE);
10982	case Intrinsic::riscv_sf_vc_vv_se:
10983	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VV_SE);
10984	case Intrinsic::riscv_sf_vc_fv_se:
10985	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FV_SE);
10986	case Intrinsic::riscv_sf_vc_xvv_se:
10987	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVV_SE);
10988	case Intrinsic::riscv_sf_vc_ivv_se:
10989	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVV_SE);
10990	case Intrinsic::riscv_sf_vc_vvv_se:
10991	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVV_SE);
10992	case Intrinsic::riscv_sf_vc_fvv_se:
10993	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVV_SE);
10994	case Intrinsic::riscv_sf_vc_xvw_se:
10995	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_XVW_SE);
10996	case Intrinsic::riscv_sf_vc_ivw_se:
10997	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_IVW_SE);
10998	case Intrinsic::riscv_sf_vc_vvw_se:
10999	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_VVW_SE);
11000	case Intrinsic::riscv_sf_vc_fvw_se:
11001	return getVCIXISDNodeVOID(Op, DAG, Type: RISCVISD::SF_VC_FVW_SE);
11002	}
11003
11004	return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11005	}
11006
11007	static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11008	switch (ISDOpcode) {
11009	default:
11010	llvm_unreachable("Unhandled reduction");
11011	case ISD::VP_REDUCE_ADD:
11012	case ISD::VECREDUCE_ADD:
11013	return RISCVISD::VECREDUCE_ADD_VL;
11014	case ISD::VP_REDUCE_UMAX:
11015	case ISD::VECREDUCE_UMAX:
11016	return RISCVISD::VECREDUCE_UMAX_VL;
11017	case ISD::VP_REDUCE_SMAX:
11018	case ISD::VECREDUCE_SMAX:
11019	return RISCVISD::VECREDUCE_SMAX_VL;
11020	case ISD::VP_REDUCE_UMIN:
11021	case ISD::VECREDUCE_UMIN:
11022	return RISCVISD::VECREDUCE_UMIN_VL;
11023	case ISD::VP_REDUCE_SMIN:
11024	case ISD::VECREDUCE_SMIN:
11025	return RISCVISD::VECREDUCE_SMIN_VL;
11026	case ISD::VP_REDUCE_AND:
11027	case ISD::VECREDUCE_AND:
11028	return RISCVISD::VECREDUCE_AND_VL;
11029	case ISD::VP_REDUCE_OR:
11030	case ISD::VECREDUCE_OR:
11031	return RISCVISD::VECREDUCE_OR_VL;
11032	case ISD::VP_REDUCE_XOR:
11033	case ISD::VECREDUCE_XOR:
11034	return RISCVISD::VECREDUCE_XOR_VL;
11035	case ISD::VP_REDUCE_FADD:
11036	return RISCVISD::VECREDUCE_FADD_VL;
11037	case ISD::VP_REDUCE_SEQ_FADD:
11038	return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11039	case ISD::VP_REDUCE_FMAX:
11040	case ISD::VP_REDUCE_FMAXIMUM:
11041	return RISCVISD::VECREDUCE_FMAX_VL;
11042	case ISD::VP_REDUCE_FMIN:
11043	case ISD::VP_REDUCE_FMINIMUM:
11044	return RISCVISD::VECREDUCE_FMIN_VL;
11045	}
11046
11047	}
11048
11049	SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11050	SelectionDAG &DAG,
11051	bool IsVP) const {
11052	SDLoc DL(Op);
11053	SDValue Vec = Op.getOperand(i: IsVP ? `1` : `0`);
11054	MVT VecVT = Vec.getSimpleValueType();
11055	assert((Op.getOpcode() == ISD::VECREDUCE_AND \|\|
11056	Op.getOpcode() == ISD::VECREDUCE_OR \|\|
11057	Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
11058	Op.getOpcode() == ISD::VP_REDUCE_AND \|\|
11059	Op.getOpcode() == ISD::VP_REDUCE_OR \|\|
11060	Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11061	"Unexpected reduction lowering");
11062
11063	MVT XLenVT = Subtarget.getXLenVT();
11064
11065	MVT ContainerVT = VecVT;
11066	if (VecVT.isFixedLengthVector()) {
11067	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11068	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
11069	}
11070
11071	SDValue Mask, VL;
11072	if (IsVP) {
11073	Mask = Op.getOperand(i: `2`);
11074	VL = Op.getOperand(i: `3`);
11075	} else {
11076	std::tie(args&: Mask, args&: VL) =
11077	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11078	}
11079
11080	ISD::CondCode CC;
11081	switch (Op.getOpcode()) {
11082	default:
11083	llvm_unreachable("Unhandled reduction");
11084	case ISD::VECREDUCE_AND:
11085	case ISD::VP_REDUCE_AND: {
11086	// vcpop ~x == 0
11087	SDValue TrueMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
11088	if (IsVP \|\| VecVT.isFixedLengthVector())
11089	Vec = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Vec, N2: TrueMask, N3: VL);
11090	else
11091	Vec = DAG.getNode(Opcode: ISD::XOR, DL, VT: ContainerVT, N1: Vec, N2: TrueMask);
11092	Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
11093	CC = ISD::SETEQ;
11094	break;
11095	}
11096	case ISD::VECREDUCE_OR:
11097	case ISD::VP_REDUCE_OR:
11098	// vcpop x != 0
11099	Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
11100	CC = ISD::SETNE;
11101	break;
11102	case ISD::VECREDUCE_XOR:
11103	case ISD::VP_REDUCE_XOR: {
11104	// ((vcpop x) & 1) != 0
11105	SDValue One = DAG.getConstant(Val: `1`, DL, VT: XLenVT);
11106	Vec = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Vec, N2: Mask, N3: VL);
11107	Vec = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Vec, N2: One);
11108	CC = ISD::SETNE;
11109	break;
11110	}
11111	}
11112
11113	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
11114	SDValue SetCC = DAG.getSetCC(DL, VT: XLenVT, LHS: Vec, RHS: Zero, Cond: CC);
11115	SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Op.getValueType(), Operand: SetCC);
11116
11117	if (!IsVP)
11118	return SetCC;
11119
11120	// Now include the start value in the operation.
11121	// Note that we must return the start value when no elements are operated
11122	// upon. The vcpop instructions we've emitted in each case above will return
11123	// 0 for an inactive vector, and so we've already received the neutral value:
11124	// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11125	// can simply include the start value.
11126	unsigned BaseOpc = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
11127	return DAG.getNode(Opcode: BaseOpc, DL, VT: Op.getValueType(), N1: SetCC, N2: Op.getOperand(i: `0`));
11128	}
11129
11130	static bool isNonZeroAVL(SDValue AVL) {
11131	auto *RegisterAVL = dyn_cast<RegisterSDNode>(Val&: AVL);
11132	auto *ImmAVL = dyn_cast<ConstantSDNode>(Val&: AVL);
11133	return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) \|\|
11134	(ImmAVL && ImmAVL->getZExtValue() >= `1`);
11135	}
11136
11137	/// Helper to lower a reduction sequence of the form:
11138	/// scalar = reduce_op vec, scalar_start
11139	static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11140	SDValue StartValue, SDValue Vec, SDValue Mask,
11141	SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11142	const RISCVSubtarget &Subtarget) {
11143	const MVT VecVT = Vec.getSimpleValueType();
11144	const MVT M1VT = RISCVTargetLowering::getM1VT(VT: VecVT);
11145	const MVT XLenVT = Subtarget.getXLenVT();
11146	const bool NonZeroAVL = isNonZeroAVL(AVL: VL);
11147
11148	// The reduction needs an LMUL1 input; do the splat at either LMUL1
11149	// or the original VT if fractional.
11150	auto InnerVT = VecVT.bitsLE(VT: M1VT) ? VecVT : M1VT;
11151	// We reuse the VL of the reduction to reduce vsetvli toggles if we can
11152	// prove it is non-zero. For the AVL=0 case, we need the scalar to
11153	// be the result of the reduction operation.
11154	auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(Val: `1`, DL, VT: XLenVT);
11155	SDValue InitialValue =
11156	lowerScalarInsert(Scalar: StartValue, VL: InnerVL, VT: InnerVT, DL, DAG, Subtarget);
11157	if (M1VT != InnerVT)
11158	InitialValue =
11159	DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: M1VT), SubVec: InitialValue, Idx: `0`);
11160	SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(VT: M1VT) : InitialValue;
11161	SDValue Policy = DAG.getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
11162	SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11163	SDValue Reduction = DAG.getNode(Opcode: RVVOpcode, DL, VT: M1VT, Ops);
11164	return DAG.getExtractVectorElt(DL, VT: ResVT, Vec: Reduction, Idx: `0`);
11165	}
11166
11167	SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11168	SelectionDAG &DAG) const {
11169	SDLoc DL(Op);
11170	SDValue Vec = Op.getOperand(i: `0`);
11171	EVT VecEVT = Vec.getValueType();
11172
11173	unsigned BaseOpc = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
11174
11175	// Due to ordering in legalize types we may have a vector type that needs to
11176	// be split. Do that manually so we can get down to a legal type.
11177	while (getTypeAction(Context&: *DAG.getContext(), VT: VecEVT) ==
11178	TargetLowering::TypeSplitVector) {
11179	auto [Lo, Hi] = DAG.SplitVector(N: Vec, DL);
11180	VecEVT = Lo.getValueType();
11181	Vec = DAG.getNode(Opcode: BaseOpc, DL, VT: VecEVT, N1: Lo, N2: Hi);
11182	}
11183
11184	// TODO: The type may need to be widened rather than split. Or widened before
11185	// it can be split.
11186	if (!isTypeLegal(VT: VecEVT))
11187	return SDValue ();
11188
11189	MVT VecVT = VecEVT.getSimpleVT();
11190	MVT VecEltVT = VecVT.getVectorElementType();
11191	unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Op.getOpcode());
11192
11193	MVT ContainerVT = VecVT;
11194	if (VecVT.isFixedLengthVector()) {
11195	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11196	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
11197	}
11198
11199	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11200
11201	SDValue StartV = DAG.getNeutralElement(Opcode: BaseOpc, DL, VT: VecEltVT, Flags: SDNodeFlags ());
11202	switch (BaseOpc) {
11203	case ISD::AND:
11204	case ISD::OR:
11205	case ISD::UMAX:
11206	case ISD::UMIN:
11207	case ISD::SMAX:
11208	case ISD::SMIN:
11209	StartV = DAG.getExtractVectorElt(DL, VT: VecEltVT, Vec, Idx: `0`);
11210	}
11211	return lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: StartV, Vec,
11212	Mask, VL, DL, DAG, Subtarget);
11213	}
11214
11215	// Given a reduction op, this function returns the matching reduction opcode,
11216	// the vector SDValue and the scalar SDValue required to lower this to a
11217	// RISCVISD node.
11218	static std::tuple<unsigned, SDValue, SDValue>
11219	getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,
11220	const RISCVSubtarget &Subtarget) {
11221	SDLoc DL(Op);
11222	auto Flags = Op ->getFlags();
11223	unsigned Opcode = Op.getOpcode();
11224	switch (Opcode) {
11225	default:
11226	llvm_unreachable("Unhandled reduction");
11227	case ISD::VECREDUCE_FADD: {
11228	// Use positive zero if we can. It is cheaper to materialize.
11229	SDValue Zero =
11230	DAG.getConstantFP(Val: Flags.hasNoSignedZeros() ? `0.0` : -`0.0`, DL, VT: EltVT);
11231	return std::make_tuple(args: RISCVISD::VECREDUCE_FADD_VL, args: Op.getOperand(i: `0`), args&: Zero);
11232	}
11233	case ISD::VECREDUCE_SEQ_FADD:
11234	return std::make_tuple(args: RISCVISD::VECREDUCE_SEQ_FADD_VL, args: Op.getOperand(i: `1`),
11235	args: Op.getOperand(i: `0`));
11236	case ISD::VECREDUCE_FMINIMUM:
11237	case ISD::VECREDUCE_FMAXIMUM:
11238	case ISD::VECREDUCE_FMIN:
11239	case ISD::VECREDUCE_FMAX: {
11240	SDValue Front = DAG.getExtractVectorElt(DL, VT: EltVT, Vec: Op.getOperand(i: `0`), Idx: `0`);
11241	unsigned RVVOpc =
11242	(Opcode == ISD::VECREDUCE_FMIN \|\| Opcode == ISD::VECREDUCE_FMINIMUM)
11243	? RISCVISD::VECREDUCE_FMIN_VL
11244	: RISCVISD::VECREDUCE_FMAX_VL;
11245	return std::make_tuple(args&: RVVOpc, args: Op.getOperand(i: `0`), args&: Front);
11246	}
11247	}
11248	}
11249
11250	SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11251	SelectionDAG &DAG) const {
11252	SDLoc DL(Op);
11253	MVT VecEltVT = Op.getSimpleValueType();
11254
11255	unsigned RVVOpcode;
11256	SDValue VectorVal, ScalarVal;
11257	std::tie(args&: RVVOpcode, args&: VectorVal, args&: ScalarVal) =
11258	getRVVFPReductionOpAndOperands(Op, DAG, EltVT: VecEltVT, Subtarget);
11259	MVT VecVT = VectorVal.getSimpleValueType();
11260
11261	MVT ContainerVT = VecVT;
11262	if (VecVT.isFixedLengthVector()) {
11263	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11264	VectorVal = convertToScalableVector(VT: ContainerVT, V: VectorVal, DAG, Subtarget);
11265	}
11266
11267	MVT ResVT = Op.getSimpleValueType();
11268	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11269	SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, StartValue: ScalarVal, Vec: VectorVal, Mask,
11270	VL, DL, DAG, Subtarget);
11271	if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11272	Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11273	return Res;
11274
11275	if (Op ->getFlags().hasNoNaNs())
11276	return Res;
11277
11278	// Force output to NaN if any element is Nan.
11279	SDValue IsNan =
11280	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: Mask.getValueType(),
11281	Ops: {VectorVal, VectorVal, DAG.getCondCode(Cond: ISD::SETNE),
11282	DAG.getUNDEF(VT: Mask.getValueType()), Mask, VL});
11283	MVT XLenVT = Subtarget.getXLenVT();
11284	SDValue CPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNan, N2: Mask, N3: VL);
11285	SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: CPop,
11286	RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETEQ);
11287	return DAG.getSelect(
11288	DL, VT: ResVT, Cond: NoNaNs, LHS: Res,
11289	RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: ResVT.getFltSemantics()), DL, VT: ResVT));
11290	}
11291
11292	SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11293	SelectionDAG &DAG) const {
11294	SDLoc DL(Op);
11295	unsigned Opc = Op.getOpcode();
11296	SDValue Start = Op.getOperand(i: `0`);
11297	SDValue Vec = Op.getOperand(i: `1`);
11298	EVT VecEVT = Vec.getValueType();
11299	MVT XLenVT = Subtarget.getXLenVT();
11300
11301	// TODO: The type may need to be widened rather than split. Or widened before
11302	// it can be split.
11303	if (!isTypeLegal(VT: VecEVT))
11304	return SDValue ();
11305
11306	MVT VecVT = VecEVT.getSimpleVT();
11307	unsigned RVVOpcode = getRVVReductionOp(ISDOpcode: Opc);
11308
11309	if (VecVT.isFixedLengthVector()) {
11310	auto ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11311	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
11312	}
11313
11314	SDValue VL = Op.getOperand(i: `3`);
11315	SDValue Mask = Op.getOperand(i: `2`);
11316	SDValue Res =
11317	lowerReductionSeq(RVVOpcode, ResVT: Op.getSimpleValueType(), StartValue: Op.getOperand(i: `0`),
11318	Vec, Mask, VL, DL, DAG, Subtarget);
11319	if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) \|\|
11320	Op ->getFlags().hasNoNaNs())
11321	return Res;
11322
11323	// Propagate NaNs.
11324	MVT PredVT = getMaskTypeFor(VecVT: Vec.getSimpleValueType());
11325	// Check if any of the elements in Vec is NaN.
11326	SDValue IsNaN = DAG.getNode(
11327	Opcode: RISCVISD::SETCC_VL, DL, VT: PredVT,
11328	Ops: {Vec, Vec, DAG.getCondCode(Cond: ISD::SETNE), DAG.getUNDEF(VT: PredVT), Mask, VL});
11329	SDValue VCPop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: IsNaN, N2: Mask, N3: VL);
11330	// Check if the start value is NaN.
11331	SDValue StartIsNaN = DAG.getSetCC(DL, VT: XLenVT, LHS: Start, RHS: Start, Cond: ISD::SETUO);
11332	VCPop = DAG.getNode(Opcode: ISD::OR, DL, VT: XLenVT, N1: VCPop, N2: StartIsNaN);
11333	SDValue NoNaNs = DAG.getSetCC(DL, VT: XLenVT, LHS: VCPop,
11334	RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: ISD::SETEQ);
11335	MVT ResVT = Res.getSimpleValueType();
11336	return DAG.getSelect(
11337	DL, VT: ResVT, Cond: NoNaNs, LHS: Res,
11338	RHS: DAG.getConstantFP(Val: APFloat::getNaN(Sem: ResVT.getFltSemantics()), DL, VT: ResVT));
11339	}
11340
11341	SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11342	SelectionDAG &DAG) const {
11343	SDValue Vec = Op.getOperand(i: `0`);
11344	SDValue SubVec = Op.getOperand(i: `1`);
11345	MVT VecVT = Vec.getSimpleValueType();
11346	MVT SubVecVT = SubVec.getSimpleValueType();
11347
11348	SDLoc DL(Op);
11349	MVT XLenVT = Subtarget.getXLenVT();
11350	unsigned OrigIdx = Op.getConstantOperandVal(i: `2`);
11351	const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11352
11353	if (OrigIdx == `0` && Vec.isUndef())
11354	return Op;
11355
11356	// We don't have the ability to slide mask vectors up indexed by their i1
11357	// elements; the smallest we can do is i8. Often we are able to bitcast to
11358	// equivalent i8 vectors. Note that when inserting a fixed-length vector
11359	// into a scalable one, we might not necessarily have enough scalable
11360	// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11361	if (SubVecVT.getVectorElementType() == MVT::i1) {
11362	if (VecVT.getVectorMinNumElements() >= `8` &&
11363	SubVecVT.getVectorMinNumElements() >= `8`) {
11364	assert(OrigIdx % `8` == `0` && "Invalid index");
11365	assert(VecVT.getVectorMinNumElements() % `8` == `0` &&
11366	SubVecVT.getVectorMinNumElements() % `8` == `0` &&
11367	"Unexpected mask vector lowering");
11368	OrigIdx /= `8`;
11369	SubVecVT =
11370	MVT::getVectorVT(VT: MVT::i8, NumElements: SubVecVT.getVectorMinNumElements() / `8`,
11371	IsScalable: SubVecVT.isScalableVector());
11372	VecVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VecVT.getVectorMinNumElements() / `8`,
11373	IsScalable: VecVT.isScalableVector());
11374	Vec = DAG.getBitcast(VT: VecVT, V: Vec);
11375	SubVec = DAG.getBitcast(VT: SubVecVT, V: SubVec);
11376	} else {
11377	// We can't slide this mask vector up indexed by its i1 elements.
11378	// This poses a problem when we wish to insert a scalable vector which
11379	// can't be re-expressed as a larger type. Just choose the slow path and
11380	// extend to a larger type, then truncate back down.
11381	MVT ExtVecVT = VecVT.changeVectorElementType(EltVT: MVT::i8);
11382	MVT ExtSubVecVT = SubVecVT.changeVectorElementType(EltVT: MVT::i8);
11383	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec);
11384	SubVec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtSubVecVT, Operand: SubVec);
11385	Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ExtVecVT, N1: Vec, N2: SubVec,
11386	N3: Op.getOperand(i: `2`));
11387	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: ExtVecVT);
11388	return DAG.getSetCC(DL, VT: VecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE);
11389	}
11390	}
11391
11392	// If the subvector vector is a fixed-length type and we don't know VLEN
11393	// exactly, we cannot use subregister manipulation to simplify the codegen; we
11394	// don't know which register of a LMUL group contains the specific subvector
11395	// as we only know the minimum register size. Therefore we must slide the
11396	// vector group up the full amount.
11397	const auto VLen = Subtarget.getRealVLen();
11398	if (SubVecVT.isFixedLengthVector() && !VLen) {
11399	MVT ContainerVT = VecVT;
11400	if (VecVT.isFixedLengthVector()) {
11401	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11402	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
11403	}
11404
11405	SubVec = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: ContainerVT), SubVec, Idx: `0`);
11406
11407	SDValue Mask =
11408	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11409	// Set the vector length to only the number of elements we care about. Note
11410	// that for slideup this includes the offset.
11411	unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11412	SDValue VL = DAG.getConstant(Val: EndIndex, DL, VT: XLenVT);
11413
11414	// Use tail agnostic policy if we're inserting over Vec's tail.
11415	unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
11416	if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11417	Policy = RISCVVType::TAIL_AGNOSTIC;
11418
11419	// If we're inserting into the lowest elements, use a tail undisturbed
11420	// vmv.v.v.
11421	if (OrigIdx == `0`) {
11422	SubVec =
11423	DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: ContainerVT, N1: Vec, N2: SubVec, N3: VL);
11424	} else {
11425	SDValue SlideupAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT);
11426	SubVec = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Passthru: Vec, Op: SubVec,
11427	Offset: SlideupAmt, Mask, VL, Policy);
11428	}
11429
11430	if (VecVT.isFixedLengthVector())
11431	SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget);
11432	return DAG.getBitcast(VT: Op.getValueType(), V: SubVec);
11433	}
11434
11435	MVT ContainerVecVT = VecVT;
11436	if (VecVT.isFixedLengthVector()) {
11437	ContainerVecVT = getContainerForFixedLengthVector(VT: VecVT);
11438	Vec = convertToScalableVector(VT: ContainerVecVT, V: Vec, DAG, Subtarget);
11439	}
11440
11441	MVT ContainerSubVecVT = SubVecVT;
11442	if (SubVecVT.isFixedLengthVector()) {
11443	ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT);
11444	SubVec = convertToScalableVector(VT: ContainerSubVecVT, V: SubVec, DAG, Subtarget);
11445	}
11446
11447	unsigned SubRegIdx;
11448	ElementCount RemIdx;
11449	// insert_subvector scales the index by vscale if the subvector is scalable,
11450	// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11451	// we have a fixed length subvector, we need to adjust the index by 1/vscale.
11452	if (SubVecVT.isFixedLengthVector()) {
11453	assert(VLen);
11454	unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11455	auto Decompose =
11456	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
11457	VecVT: ContainerVecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI);
11458	SubRegIdx = Decompose.first;
11459	RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) +
11460	(OrigIdx % Vscale));
11461	} else {
11462	auto Decompose =
11463	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
11464	VecVT: ContainerVecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI);
11465	SubRegIdx = Decompose.first;
11466	RemIdx = ElementCount::getScalable(MinVal: Decompose.second);
11467	}
11468
11469	TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
11470	assert(isPowerOf2_64(
11471	Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11472	bool ExactlyVecRegSized =
11473	Subtarget.expandVScale(X: SubVecVT.getSizeInBits())
11474	.isKnownMultipleOf(RHS: Subtarget.expandVScale(X: VecRegSize));
11475
11476	// 1. If the Idx has been completely eliminated and this subvector's size is
11477	// a vector register or a multiple thereof, or the surrounding elements are
11478	// undef, then this is a subvector insert which naturally aligns to a vector
11479	// register. These can easily be handled using subregister manipulation.
11480	// 2. If the subvector isn't an exact multiple of a valid register group size,
11481	// then the insertion must preserve the undisturbed elements of the register.
11482	// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11483	// vector type (which resolves to a subregister copy), performing a VSLIDEUP
11484	// to place the subvector within the vector register, and an INSERT_SUBVECTOR
11485	// of that LMUL=1 type back into the larger vector (resolving to another
11486	// subregister operation). See below for how our VSLIDEUP works. We go via a
11487	// LMUL=1 type to avoid allocating a large register group to hold our
11488	// subvector.
11489	if (RemIdx.isZero() && (ExactlyVecRegSized \|\| Vec.isUndef())) {
11490	if (SubVecVT.isFixedLengthVector()) {
11491	// We may get NoSubRegister if inserting at index 0 and the subvec
11492	// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11493	if (SubRegIdx == RISCV::NoSubRegister) {
11494	assert(OrigIdx == `0`);
11495	return Op;
11496	}
11497
11498	// Use a insert_subvector that will resolve to an insert subreg.
11499	assert(VLen);
11500	unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11501	SDValue Insert =
11502	DAG.getInsertSubvector(DL, Vec, SubVec, Idx: OrigIdx / Vscale);
11503	if (VecVT.isFixedLengthVector())
11504	Insert = convertFromScalableVector(VT: VecVT, V: Insert, DAG, Subtarget);
11505	return Insert;
11506	}
11507	return Op;
11508	}
11509
11510	// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11511	// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11512	// (in our case undisturbed). This means we can set up a subvector insertion
11513	// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11514	// size of the subvector.
11515	MVT InterSubVT = ContainerVecVT;
11516	SDValue AlignedExtract = Vec;
11517	unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11518	if (SubVecVT.isFixedLengthVector()) {
11519	assert(VLen);
11520	AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11521	}
11522	if (ContainerVecVT.bitsGT(VT: RISCVTargetLowering::getM1VT(VT: ContainerVecVT))) {
11523	InterSubVT = RISCVTargetLowering::getM1VT(VT: ContainerVecVT);
11524	// Extract a subvector equal to the nearest full vector register type. This
11525	// should resolve to a EXTRACT_SUBREG instruction.
11526	AlignedExtract = DAG.getExtractSubvector(DL, VT: InterSubVT, Vec, Idx: AlignedIdx);
11527	}
11528
11529	SubVec = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: InterSubVT), SubVec, Idx: `0`);
11530
11531	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT: ContainerVecVT, DL, DAG, Subtarget);
11532
11533	ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11534	VL = DAG.getElementCount(DL, VT: XLenVT, EC: SubVecVT.getVectorElementCount());
11535
11536	// Use tail agnostic policy if we're inserting over InterSubVT's tail.
11537	unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
11538	if (Subtarget.expandVScale(X: EndIndex) ==
11539	Subtarget.expandVScale(X: InterSubVT.getVectorElementCount()))
11540	Policy = RISCVVType::TAIL_AGNOSTIC;
11541
11542	// If we're inserting into the lowest elements, use a tail undisturbed
11543	// vmv.v.v.
11544	if (RemIdx.isZero()) {
11545	SubVec = DAG.getNode(Opcode: RISCVISD::VMV_V_V_VL, DL, VT: InterSubVT, N1: AlignedExtract,
11546	N2: SubVec, N3: VL);
11547	} else {
11548	SDValue SlideupAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx);
11549
11550	// Construct the vector length corresponding to RemIdx + length(SubVecVT).
11551	VL = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: SlideupAmt, N2: VL);
11552
11553	SubVec = getVSlideup(DAG, Subtarget, DL, VT: InterSubVT, Passthru: AlignedExtract, Op: SubVec,
11554	Offset: SlideupAmt, Mask, VL, Policy);
11555	}
11556
11557	// If required, insert this subvector back into the correct vector register.
11558	// This should resolve to an INSERT_SUBREG instruction.
11559	if (ContainerVecVT.bitsGT(VT: InterSubVT))
11560	SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, Idx: AlignedIdx);
11561
11562	if (VecVT.isFixedLengthVector())
11563	SubVec = convertFromScalableVector(VT: VecVT, V: SubVec, DAG, Subtarget);
11564
11565	// We might have bitcast from a mask type: cast back to the original type if
11566	// required.
11567	return DAG.getBitcast(VT: Op.getSimpleValueType(), V: SubVec);
11568	}
11569
11570	SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11571	SelectionDAG &DAG) const {
11572	SDValue Vec = Op.getOperand(i: `0`);
11573	MVT SubVecVT = Op.getSimpleValueType();
11574	MVT VecVT = Vec.getSimpleValueType();
11575
11576	SDLoc DL(Op);
11577	MVT XLenVT = Subtarget.getXLenVT();
11578	unsigned OrigIdx = Op.getConstantOperandVal(i: `1`);
11579	const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11580
11581	// With an index of 0 this is a cast-like subvector, which can be performed
11582	// with subregister operations.
11583	if (OrigIdx == `0`)
11584	return Op;
11585
11586	// We don't have the ability to slide mask vectors down indexed by their i1
11587	// elements; the smallest we can do is i8. Often we are able to bitcast to
11588	// equivalent i8 vectors. Note that when extracting a fixed-length vector
11589	// from a scalable one, we might not necessarily have enough scalable
11590	// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11591	if (SubVecVT.getVectorElementType() == MVT::i1) {
11592	if (VecVT.getVectorMinNumElements() >= `8` &&
11593	SubVecVT.getVectorMinNumElements() >= `8`) {
11594	assert(OrigIdx % `8` == `0` && "Invalid index");
11595	assert(VecVT.getVectorMinNumElements() % `8` == `0` &&
11596	SubVecVT.getVectorMinNumElements() % `8` == `0` &&
11597	"Unexpected mask vector lowering");
11598	OrigIdx /= `8`;
11599	SubVecVT =
11600	MVT::getVectorVT(VT: MVT::i8, NumElements: SubVecVT.getVectorMinNumElements() / `8`,
11601	IsScalable: SubVecVT.isScalableVector());
11602	VecVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VecVT.getVectorMinNumElements() / `8`,
11603	IsScalable: VecVT.isScalableVector());
11604	Vec = DAG.getBitcast(VT: VecVT, V: Vec);
11605	} else {
11606	// We can't slide this mask vector down, indexed by its i1 elements.
11607	// This poses a problem when we wish to extract a scalable vector which
11608	// can't be re-expressed as a larger type. Just choose the slow path and
11609	// extend to a larger type, then truncate back down.
11610	// TODO: We could probably improve this when extracting certain fixed
11611	// from fixed, where we can extract as i8 and shift the correct element
11612	// right to reach the desired subvector?
11613	MVT ExtVecVT = VecVT.changeVectorElementType(EltVT: MVT::i8);
11614	MVT ExtSubVecVT = SubVecVT.changeVectorElementType(EltVT: MVT::i8);
11615	Vec = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVecVT, Operand: Vec);
11616	Vec = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ExtSubVecVT, N1: Vec,
11617	N2: Op.getOperand(i: `1`));
11618	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: ExtSubVecVT);
11619	return DAG.getSetCC(DL, VT: SubVecVT, LHS: Vec, RHS: SplatZero, Cond: ISD::SETNE);
11620	}
11621	}
11622
11623	const auto VLen = Subtarget.getRealVLen();
11624
11625	// If the subvector vector is a fixed-length type and we don't know VLEN
11626	// exactly, we cannot use subregister manipulation to simplify the codegen; we
11627	// don't know which register of a LMUL group contains the specific subvector
11628	// as we only know the minimum register size. Therefore we must slide the
11629	// vector group down the full amount.
11630	if (SubVecVT.isFixedLengthVector() && !VLen) {
11631	MVT ContainerVT = VecVT;
11632	if (VecVT.isFixedLengthVector()) {
11633	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11634	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
11635	}
11636
11637	// Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11638	unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - `1`;
11639	if (auto ShrunkVT =
11640	getSmallestVTForIndex(VecVT: ContainerVT, MaxIdx: LastIdx, DL, DAG, Subtarget)) {
11641	ContainerVT = *ShrunkVT;
11642	Vec = DAG.getExtractSubvector(DL, VT: ContainerVT, Vec, Idx: `0`);
11643	}
11644
11645	SDValue Mask =
11646	getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11647	// Set the vector length to only the number of elements we care about. This
11648	// avoids sliding down elements we're going to discard straight away.
11649	SDValue VL = DAG.getConstant(Val: SubVecVT.getVectorNumElements(), DL, VT: XLenVT);
11650	SDValue SlidedownAmt = DAG.getConstant(Val: OrigIdx, DL, VT: XLenVT);
11651	SDValue Slidedown =
11652	getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
11653	Passthru: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: SlidedownAmt, Mask, VL);
11654	// Now we can use a cast-like subvector extract to get the result.
11655	Slidedown = DAG.getExtractSubvector(DL, VT: SubVecVT, Vec: Slidedown, Idx: `0`);
11656	return DAG.getBitcast(VT: Op.getValueType(), V: Slidedown);
11657	}
11658
11659	if (VecVT.isFixedLengthVector()) {
11660	VecVT = getContainerForFixedLengthVector(VT: VecVT);
11661	Vec = convertToScalableVector(VT: VecVT, V: Vec, DAG, Subtarget);
11662	}
11663
11664	MVT ContainerSubVecVT = SubVecVT;
11665	if (SubVecVT.isFixedLengthVector())
11666	ContainerSubVecVT = getContainerForFixedLengthVector(VT: SubVecVT);
11667
11668	unsigned SubRegIdx;
11669	ElementCount RemIdx;
11670	// extract_subvector scales the index by vscale if the subvector is scalable,
11671	// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11672	// we have a fixed length subvector, we need to adjust the index by 1/vscale.
11673	if (SubVecVT.isFixedLengthVector()) {
11674	assert(VLen);
11675	unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11676	auto Decompose =
11677	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
11678	VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx / Vscale, TRI);
11679	SubRegIdx = Decompose.first;
11680	RemIdx = ElementCount::getFixed(MinVal: (Decompose.second * Vscale) +
11681	(OrigIdx % Vscale));
11682	} else {
11683	auto Decompose =
11684	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
11685	VecVT, SubVecVT: ContainerSubVecVT, InsertExtractIdx: OrigIdx, TRI);
11686	SubRegIdx = Decompose.first;
11687	RemIdx = ElementCount::getScalable(MinVal: Decompose.second);
11688	}
11689
11690	// If the Idx has been completely eliminated then this is a subvector extract
11691	// which naturally aligns to a vector register. These can easily be handled
11692	// using subregister manipulation. We use an extract_subvector that will
11693	// resolve to an extract subreg.
11694	if (RemIdx.isZero()) {
11695	if (SubVecVT.isFixedLengthVector()) {
11696	assert(VLen);
11697	unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11698	Vec =
11699	DAG.getExtractSubvector(DL, VT: ContainerSubVecVT, Vec, Idx: OrigIdx / Vscale);
11700	return convertFromScalableVector(VT: SubVecVT, V: Vec, DAG, Subtarget);
11701	}
11702	return Op;
11703	}
11704
11705	// Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
11706	// was > M1 then the index would need to be a multiple of VLMAX, and so would
11707	// divide exactly.
11708	assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second \|\|
11709	getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
11710
11711	// If the vector type is an LMUL-group type, extract a subvector equal to the
11712	// nearest full vector register type.
11713	MVT InterSubVT = VecVT;
11714	if (VecVT.bitsGT(VT: RISCVTargetLowering::getM1VT(VT: VecVT))) {
11715	// If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
11716	// we should have successfully decomposed the extract into a subregister.
11717	// We use an extract_subvector that will resolve to a subreg extract.
11718	assert(SubRegIdx != RISCV::NoSubRegister);
11719	(void)SubRegIdx;
11720	unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
11721	if (SubVecVT.isFixedLengthVector()) {
11722	assert(VLen);
11723	Idx /= *VLen / RISCV::RVVBitsPerBlock;
11724	}
11725	InterSubVT = RISCVTargetLowering::getM1VT(VT: VecVT);
11726	Vec = DAG.getExtractSubvector(DL, VT: InterSubVT, Vec, Idx);
11727	}
11728
11729	// Slide this vector register down by the desired number of elements in order
11730	// to place the desired subvector starting at element 0.
11731	SDValue SlidedownAmt = DAG.getElementCount(DL, VT: XLenVT, EC: RemIdx);
11732	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: InterSubVT, DL, DAG, Subtarget);
11733	if (SubVecVT.isFixedLengthVector())
11734	VL = DAG.getConstant(Val: SubVecVT.getVectorNumElements(), DL, VT: XLenVT);
11735	SDValue Slidedown =
11736	getVSlidedown(DAG, Subtarget, DL, VT: InterSubVT, Passthru: DAG.getUNDEF(VT: InterSubVT),
11737	Op: Vec, Offset: SlidedownAmt, Mask, VL);
11738
11739	// Now the vector is in the right position, extract our final subvector. This
11740	// should resolve to a COPY.
11741	Slidedown = DAG.getExtractSubvector(DL, VT: SubVecVT, Vec: Slidedown, Idx: `0`);
11742
11743	// We might have bitcast from a mask type: cast back to the original type if
11744	// required.
11745	return DAG.getBitcast(VT: Op.getSimpleValueType(), V: Slidedown);
11746	}
11747
11748	// Widen a vector's operands to i8, then truncate its results back to the
11749	// original type, typically i1. All operand and result types must be the same.
11750	static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
11751	SelectionDAG &DAG) {
11752	MVT VT = N.getSimpleValueType();
11753	MVT WideVT = VT.changeVectorElementType(EltVT: MVT::i8);
11754	SmallVector<SDValue, `4`> WideOps;
11755	for (SDValue Op : N ->ops()) {
11756	assert(Op.getSimpleValueType() == VT &&
11757	"Operands and result must be same type");
11758	WideOps.push_back(Elt: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WideVT, Operand: Op));
11759	}
11760
11761	unsigned NumVals = N ->getNumValues();
11762
11763	SDVTList VTs = DAG.getVTList(VTs: SmallVector<EVT, `4`>(
11764	NumVals, N.getValueType().changeVectorElementType(EltVT: MVT::i8)));
11765	SDValue WideN = DAG.getNode(Opcode: N.getOpcode(), DL, VTList: VTs, Ops: WideOps);
11766	SmallVector<SDValue, `4`> TruncVals;
11767	for (unsigned I = `0`; I < NumVals; I++) {
11768	TruncVals.push_back(
11769	Elt: DAG.getSetCC(DL, VT: N ->getSimpleValueType(ResNo: I), LHS: WideN.getValue(R: I),
11770	RHS: DAG.getConstant(Val: `0`, DL, VT: WideVT), Cond: ISD::SETNE));
11771	}
11772
11773	if (TruncVals.size() > `1`)
11774	return DAG.getMergeValues(Ops: TruncVals, dl: DL);
11775	return TruncVals.front();
11776	}
11777
11778	SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
11779	SelectionDAG &DAG) const {
11780	SDLoc DL(Op);
11781	MVT VecVT = Op.getSimpleValueType();
11782
11783	const unsigned Factor = Op ->getNumValues();
11784	assert(Factor <= `8`);
11785
11786	// 1 bit element vectors need to be widened to e8
11787	if (VecVT.getVectorElementType() == MVT::i1)
11788	return widenVectorOpsToi8(N: Op, DL, DAG);
11789
11790	// Convert to scalable vectors first.
11791	if (VecVT.isFixedLengthVector()) {
11792	MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11793	SmallVector<SDValue, `8`> Ops(Factor);
11794	for (unsigned i = `0U`; i < Factor; ++i)
11795	Ops [i] = convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i), DAG,
11796	Subtarget);
11797
11798	SmallVector<EVT, `8`> VTs(Factor, ContainerVT);
11799	SDValue NewDeinterleave =
11800	DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, ResultTys: VTs, Ops);
11801
11802	SmallVector<SDValue, `8`> Res(Factor);
11803	for (unsigned i = `0U`; i < Factor; ++i)
11804	Res [i] = convertFromScalableVector(VT: VecVT, V: NewDeinterleave.getValue(R: i),
11805	DAG, Subtarget);
11806	return DAG.getMergeValues(Ops: Res, dl: DL);
11807	}
11808
11809	// If concatenating would exceed LMUL=8, we need to split.
11810	if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
11811	(`8` * RISCV::RVVBitsPerBlock)) {
11812	SmallVector<SDValue, `8`> Ops(Factor * `2`);
11813	for (unsigned i = `0`; i != Factor; ++i) {
11814	auto [OpLo, OpHi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: i);
11815	Ops [i * `2`] = OpLo;
11816	Ops [i * `2` + `1`] = OpHi;
11817	}
11818
11819	SmallVector<EVT, `8`> VTs(Factor, Ops [`0`].getValueType());
11820
11821	SDValue Lo = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, ResultTys: VTs,
11822	Ops: ArrayRef(Ops).slice(N: `0`, M: Factor));
11823	SDValue Hi = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, ResultTys: VTs,
11824	Ops: ArrayRef(Ops).slice(N: Factor, M: Factor));
11825
11826	SmallVector<SDValue, `8`> Res(Factor);
11827	for (unsigned i = `0`; i != Factor; ++i)
11828	Res [i] = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT, N1: Lo.getValue(R: i),
11829	N2: Hi.getValue(R: i));
11830
11831	return DAG.getMergeValues(Ops: Res, dl: DL);
11832	}
11833
11834	if (Subtarget.hasVendorXRivosVizip() && Factor == `2`) {
11835	MVT VT = Op ->getSimpleValueType(ResNo: `0`);
11836	SDValue V1 = Op ->getOperand(Num: `0`);
11837	SDValue V2 = Op ->getOperand(Num: `1`);
11838
11839	// For fractional LMUL, check if we can use a higher LMUL
11840	// instruction to avoid a vslidedown.
11841	if (SDValue Src = foldConcatVector(V1, V2);
11842	Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
11843	EVT NewVT = VT.getDoubleNumVectorElementsVT();
11844	Src = DAG.getExtractSubvector(DL, VT: NewVT, Vec: Src, Idx: `0`);
11845	// Freeze the source so we can increase its use count.
11846	Src = DAG.getFreeze(V: Src);
11847	SDValue Even = lowerVZIP(Opc: RISCVISD::RI_VUNZIP2A_VL, Op0: Src,
11848	Op1: DAG.getUNDEF(VT: NewVT), DL, DAG, Subtarget);
11849	SDValue Odd = lowerVZIP(Opc: RISCVISD::RI_VUNZIP2B_VL, Op0: Src,
11850	Op1: DAG.getUNDEF(VT: NewVT), DL, DAG, Subtarget);
11851	Even = DAG.getExtractSubvector(DL, VT, Vec: Even, Idx: `0`);
11852	Odd = DAG.getExtractSubvector(DL, VT, Vec: Odd, Idx: `0`);
11853	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
11854	}
11855
11856	// Freeze the sources so we can increase their use count.
11857	V1 = DAG.getFreeze(V: V1);
11858	V2 = DAG.getFreeze(V: V2);
11859	SDValue Even =
11860	lowerVZIP(Opc: RISCVISD::RI_VUNZIP2A_VL, Op0: V1, Op1: V2, DL, DAG, Subtarget);
11861	SDValue Odd =
11862	lowerVZIP(Opc: RISCVISD::RI_VUNZIP2B_VL, Op0: V1, Op1: V2, DL, DAG, Subtarget);
11863	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
11864	}
11865
11866	SmallVector<SDValue, `8`> Ops(Op ->op_values());
11867
11868	// Concatenate the vectors as one vector to deinterleave
11869	MVT ConcatVT =
11870	MVT::getVectorVT(VT: VecVT.getVectorElementType(),
11871	EC: VecVT.getVectorElementCount().multiplyCoefficientBy(
11872	RHS: PowerOf2Ceil(A: Factor)));
11873	if (Ops.size() < PowerOf2Ceil(A: Factor))
11874	Ops.append(NumInputs: PowerOf2Ceil(A: Factor) - Factor, Elt: DAG.getUNDEF(VT: VecVT));
11875	SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT, Ops);
11876
11877	if (Factor == `2`) {
11878	// We can deinterleave through vnsrl.wi if the element type is smaller than
11879	// ELEN
11880	if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11881	SDValue Even = getDeinterleaveShiftAndTrunc(DL, VT: VecVT, Src: Concat, Factor: `2`, Index: `0`, DAG);
11882	SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VT: VecVT, Src: Concat, Factor: `2`, Index: `1`, DAG);
11883	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
11884	}
11885
11886	// For the indices, use the vmv.v.x of an i8 constant to fill the largest
11887	// possibly mask vector, then extract the required subvector. Doing this
11888	// (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11889	// creation to be rematerialized during register allocation to reduce
11890	// register pressure if needed.
11891
11892	MVT MaskVT = ConcatVT.changeVectorElementType(EltVT: MVT::i1);
11893
11894	SDValue EvenSplat = DAG.getConstant(Val: `0b01010101`, DL, VT: MVT::nxv8i8);
11895	EvenSplat = DAG.getBitcast(VT: MVT::nxv64i1, V: EvenSplat);
11896	SDValue EvenMask = DAG.getExtractSubvector(DL, VT: MaskVT, Vec: EvenSplat, Idx: `0`);
11897
11898	SDValue OddSplat = DAG.getConstant(Val: `0b10101010`, DL, VT: MVT::nxv8i8);
11899	OddSplat = DAG.getBitcast(VT: MVT::nxv64i1, V: OddSplat);
11900	SDValue OddMask = DAG.getExtractSubvector(DL, VT: MaskVT, Vec: OddSplat, Idx: `0`);
11901
11902	// vcompress the even and odd elements into two separate vectors
11903	SDValue EvenWide = DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL, VT: ConcatVT, N1: Concat,
11904	N2: EvenMask, N3: DAG.getUNDEF(VT: ConcatVT));
11905	SDValue OddWide = DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL, VT: ConcatVT, N1: Concat,
11906	N2: OddMask, N3: DAG.getUNDEF(VT: ConcatVT));
11907
11908	// Extract the result half of the gather for even and odd
11909	SDValue Even = DAG.getExtractSubvector(DL, VT: VecVT, Vec: EvenWide, Idx: `0`);
11910	SDValue Odd = DAG.getExtractSubvector(DL, VT: VecVT, Vec: OddWide, Idx: `0`);
11911
11912	return DAG.getMergeValues(Ops: {Even, Odd}, dl: DL);
11913	}
11914
11915	// Store with unit-stride store and load it back with segmented load.
11916	MVT XLenVT = Subtarget.getXLenVT();
11917	SDValue VL = getDefaultScalableVLOps(VecVT: ConcatVT, DL, DAG, Subtarget).second;
11918	SDValue Passthru = DAG.getUNDEF(VT: ConcatVT);
11919
11920	// Allocate a stack slot.
11921	Align Alignment = DAG.getReducedAlign(VT: VecVT, /UseABI=/false);
11922	SDValue StackPtr =
11923	DAG.CreateStackTemporary(Bytes: ConcatVT.getStoreSize(), Alignment);
11924	auto &MF = DAG.getMachineFunction();
11925	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
11926	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
11927
11928	SDValue StoreOps[] = {DAG.getEntryNode(),
11929	DAG.getTargetConstant(Val: Intrinsic::riscv_vse, DL, VT: XLenVT),
11930	Concat, StackPtr, VL};
11931
11932	SDValue Chain = DAG.getMemIntrinsicNode(
11933	Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps,
11934	MemVT: ConcatVT.getVectorElementType(), PtrInfo, Alignment,
11935	Flags: MachineMemOperand::MOStore, Size: LocationSize::beforeOrAfterPointer());
11936
11937	static const Intrinsic::ID VlsegIntrinsicsIds[] = {
11938	Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, Intrinsic::riscv_vlseg4,
11939	Intrinsic::riscv_vlseg5, Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
11940	Intrinsic::riscv_vlseg8};
11941
11942	SDValue LoadOps[] = {
11943	Chain,
11944	DAG.getTargetConstant(Val: VlsegIntrinsicsIds[Factor - `2`], DL, VT: XLenVT),
11945	Passthru,
11946	StackPtr,
11947	VL,
11948	DAG.getTargetConstant(Val: Log2_64(Value: VecVT.getScalarSizeInBits()), DL, VT: XLenVT)};
11949
11950	unsigned Sz =
11951	Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
11952	EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NFields: Factor);
11953
11954	SDValue Load = DAG.getMemIntrinsicNode(
11955	Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: DAG.getVTList(VTs: {VecTupTy, MVT::Other}),
11956	Ops: LoadOps, MemVT: ConcatVT.getVectorElementType(), PtrInfo, Alignment,
11957	Flags: MachineMemOperand::MOLoad, Size: LocationSize::beforeOrAfterPointer());
11958
11959	SmallVector<SDValue, `8`> Res(Factor);
11960
11961	for (unsigned i = `0U`; i < Factor; ++i)
11962	Res [i] = DAG.getNode(Opcode: RISCVISD::TUPLE_EXTRACT, DL, VT: VecVT, N1: Load,
11963	N2: DAG.getVectorIdxConstant(Val: i, DL));
11964
11965	return DAG.getMergeValues(Ops: Res, dl: DL);
11966	}
11967
11968	SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11969	SelectionDAG &DAG) const {
11970	SDLoc DL(Op);
11971	MVT VecVT = Op.getSimpleValueType();
11972
11973	const unsigned Factor = Op.getNumOperands();
11974	assert(Factor <= `8`);
11975
11976	// i1 vectors need to be widened to i8
11977	if (VecVT.getVectorElementType() == MVT::i1)
11978	return widenVectorOpsToi8(N: Op, DL, DAG);
11979
11980	// Convert to scalable vectors first.
11981	if (VecVT.isFixedLengthVector()) {
11982	MVT ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
11983	SmallVector<SDValue, `8`> Ops(Factor);
11984	for (unsigned i = `0U`; i < Factor; ++i)
11985	Ops [i] = convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i), DAG,
11986	Subtarget);
11987
11988	SmallVector<EVT, `8`> VTs(Factor, ContainerVT);
11989	SDValue NewInterleave = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, ResultTys: VTs, Ops);
11990
11991	SmallVector<SDValue, `8`> Res(Factor);
11992	for (unsigned i = `0U`; i < Factor; ++i)
11993	Res [i] = convertFromScalableVector(VT: VecVT, V: NewInterleave.getValue(R: i), DAG,
11994	Subtarget);
11995	return DAG.getMergeValues(Ops: Res, dl: DL);
11996	}
11997
11998	MVT XLenVT = Subtarget.getXLenVT();
11999	SDValue VL = DAG.getRegister(Reg: RISCV::X0, VT: XLenVT);
12000
12001	// If the VT is larger than LMUL=8, we need to split and reassemble.
12002	if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12003	(`8` * RISCV::RVVBitsPerBlock)) {
12004	SmallVector<SDValue, `8`> Ops(Factor * `2`);
12005	for (unsigned i = `0`; i != Factor; ++i) {
12006	auto [OpLo, OpHi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: i);
12007	Ops [i] = OpLo;
12008	Ops [i + Factor] = OpHi;
12009	}
12010
12011	SmallVector<EVT, `8`> VTs(Factor, Ops [`0`].getValueType());
12012
12013	SDValue Res[] = {DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, ResultTys: VTs,
12014	Ops: ArrayRef(Ops).take_front(N: Factor)),
12015	DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, ResultTys: VTs,
12016	Ops: ArrayRef(Ops).drop_front(N: Factor))};
12017
12018	SmallVector<SDValue, `8`> Concats(Factor);
12019	for (unsigned i = `0`; i != Factor; ++i) {
12020	unsigned IdxLo = `2` * i;
12021	unsigned IdxHi = `2` * i + `1`;
12022	Concats [i] = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: VecVT,
12023	N1: Res[IdxLo / Factor].getValue(R: IdxLo % Factor),
12024	N2: Res[IdxHi / Factor].getValue(R: IdxHi % Factor));
12025	}
12026
12027	return DAG.getMergeValues(Ops: Concats, dl: DL);
12028	}
12029
12030	SDValue Interleaved;
12031
12032	// Spill to the stack using a segment store for simplicity.
12033	if (Factor != `2`) {
12034	EVT MemVT =
12035	EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(),
12036	EC: VecVT.getVectorElementCount() * Factor);
12037
12038	// Allocate a stack slot.
12039	Align Alignment = DAG.getReducedAlign(VT: VecVT, /UseABI=/false);
12040	SDValue StackPtr =
12041	DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
12042	EVT PtrVT = StackPtr.getValueType();
12043	auto &MF = DAG.getMachineFunction();
12044	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12045	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
12046
12047	static const Intrinsic::ID IntrIds[] = {
12048	Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
12049	Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
12050	Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
12051	Intrinsic::riscv_vsseg8,
12052	};
12053
12054	unsigned Sz =
12055	Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12056	EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NFields: Factor);
12057
12058	SDValue StoredVal = DAG.getUNDEF(VT: VecTupTy);
12059	for (unsigned i = `0`; i < Factor; i++)
12060	StoredVal = DAG.getNode(Opcode: RISCVISD::TUPLE_INSERT, DL, VT: VecTupTy, N1: StoredVal,
12061	N2: Op.getOperand(i), N3: DAG.getConstant(Val: i, DL, VT: XLenVT));
12062
12063	SDValue Ops[] = {DAG.getEntryNode(),
12064	DAG.getTargetConstant(Val: IntrIds[Factor - `2`], DL, VT: XLenVT),
12065	StoredVal,
12066	StackPtr,
12067	VL,
12068	DAG.getTargetConstant(Val: Log2_64(Value: VecVT.getScalarSizeInBits()),
12069	DL, VT: XLenVT)};
12070
12071	SDValue Chain = DAG.getMemIntrinsicNode(
12072	Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops,
12073	MemVT: VecVT.getVectorElementType(), PtrInfo, Alignment,
12074	Flags: MachineMemOperand::MOStore, Size: LocationSize::beforeOrAfterPointer());
12075
12076	SmallVector<SDValue, `8`> Loads(Factor);
12077
12078	SDValue Increment =
12079	DAG.getVScale(DL, VT: PtrVT,
12080	MulImm: APInt (PtrVT.getFixedSizeInBits(),
12081	VecVT.getStoreSize().getKnownMinValue()));
12082	for (unsigned i = `0`; i != Factor; ++i) {
12083	if (i != `0`)
12084	StackPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: Increment);
12085
12086	Loads [i] = DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12087	}
12088
12089	return DAG.getMergeValues(Ops: Loads, dl: DL);
12090	}
12091
12092	// Use ri.vzip2{a,b} if available
12093	// TODO: Figure out the best lowering for the spread variants
12094	if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(i: `0`).isUndef() &&
12095	!Op.getOperand(i: `1`).isUndef()) {
12096	// Freeze the sources so we can increase their use count.
12097	SDValue V1 = DAG.getFreeze(V: Op ->getOperand(Num: `0`));
12098	SDValue V2 = DAG.getFreeze(V: Op ->getOperand(Num: `1`));
12099	SDValue Lo = lowerVZIP(Opc: RISCVISD::RI_VZIP2A_VL, Op0: V1, Op1: V2, DL, DAG, Subtarget);
12100	SDValue Hi = lowerVZIP(Opc: RISCVISD::RI_VZIP2B_VL, Op0: V1, Op1: V2, DL, DAG, Subtarget);
12101	return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL);
12102	}
12103
12104	// If the element type is smaller than ELEN, then we can interleave with
12105	// vwaddu.vv and vwmaccu.vx
12106	if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12107	Interleaved = getWideningInterleave(EvenV: Op.getOperand(i: `0`), OddV: Op.getOperand(i: `1`), DL,
12108	DAG, Subtarget);
12109	} else {
12110	// Otherwise, fallback to using vrgathere16.vv
12111	MVT ConcatVT =
12112	MVT::getVectorVT(VT: VecVT.getVectorElementType(),
12113	EC: VecVT.getVectorElementCount().multiplyCoefficientBy(RHS: `2`));
12114	SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ConcatVT,
12115	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
12116
12117	MVT IdxVT = ConcatVT.changeVectorElementType(EltVT: MVT::i16);
12118
12119	// 0 1 2 3 4 5 6 7 ...
12120	SDValue StepVec = DAG.getStepVector(DL, ResVT: IdxVT);
12121
12122	// 1 1 1 1 1 1 1 1 ...
12123	SDValue Ones = DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
12124
12125	// 1 0 1 0 1 0 1 0 ...
12126	SDValue OddMask = DAG.getNode(Opcode: ISD::AND, DL, VT: IdxVT, N1: StepVec, N2: Ones);
12127	OddMask = DAG.getSetCC(
12128	DL, VT: IdxVT.changeVectorElementType(EltVT: MVT::i1), LHS: OddMask,
12129	RHS: DAG.getSplatVector(VT: IdxVT, DL, Op: DAG.getConstant(Val: `0`, DL, VT: XLenVT)),
12130	Cond: ISD::CondCode::SETNE);
12131
12132	SDValue VLMax = DAG.getSplatVector(VT: IdxVT, DL, Op: computeVLMax(VecVT, DL, DAG));
12133
12134	// Build up the index vector for interleaving the concatenated vector
12135	// 0 0 1 1 2 2 3 3 ...
12136	SDValue Idx = DAG.getNode(Opcode: ISD::SRL, DL, VT: IdxVT, N1: StepVec, N2: Ones);
12137	// 0 n 1 n+1 2 n+2 3 n+3 ...
12138	Idx =
12139	DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT: IdxVT, N1: Idx, N2: VLMax, N3: Idx, N4: OddMask, N5: VL);
12140
12141	// Then perform the interleave
12142	// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12143	SDValue TrueMask = getAllOnesMask(VecVT: IdxVT, VL, DL, DAG);
12144	Interleaved = DAG.getNode(Opcode: RISCVISD::VRGATHEREI16_VV_VL, DL, VT: ConcatVT,
12145	N1: Concat, N2: Idx, N3: DAG.getUNDEF(VT: ConcatVT), N4: TrueMask, N5: VL);
12146	}
12147
12148	// Extract the two halves from the interleaved result
12149	SDValue Lo = DAG.getExtractSubvector(DL, VT: VecVT, Vec: Interleaved, Idx: `0`);
12150	SDValue Hi = DAG.getExtractSubvector(DL, VT: VecVT, Vec: Interleaved,
12151	Idx: VecVT.getVectorMinNumElements());
12152
12153	return DAG.getMergeValues(Ops: {Lo, Hi}, dl: DL);
12154	}
12155
12156	// Lower step_vector to the vid instruction. Any non-identity step value must
12157	// be accounted for my manual expansion.
12158	SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12159	SelectionDAG &DAG) const {
12160	SDLoc DL(Op);
12161	MVT VT = Op.getSimpleValueType();
12162	assert(VT.isScalableVector() && "Expected scalable vector");
12163	MVT XLenVT = Subtarget.getXLenVT();
12164	auto [Mask, VL] = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
12165	SDValue StepVec = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT, N1: Mask, N2: VL);
12166	uint64_t StepValImm = Op.getConstantOperandVal(i: `0`);
12167	if (StepValImm != `1`) {
12168	if (isPowerOf2_64(Value: StepValImm)) {
12169	SDValue StepVal =
12170	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
12171	N2: DAG.getConstant(Val: Log2_64(Value: StepValImm), DL, VT: XLenVT), N3: VL);
12172	StepVec = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: StepVec, N2: StepVal);
12173	} else {
12174	SDValue StepVal = lowerScalarSplat(
12175	Passthru: SDValue (), Scalar: DAG.getConstant(Val: StepValImm, DL, VT: VT.getVectorElementType()),
12176	VL, VT, DL, DAG, Subtarget);
12177	StepVec = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: StepVec, N2: StepVal);
12178	}
12179	}
12180	return StepVec;
12181	}
12182
12183	// Implement vector_reverse using vrgather.vv with indices determined by
12184	// subtracting the id of each element from (VLMAX-1). This will convert
12185	// the indices like so:
12186	// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12187	// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12188	SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12189	SelectionDAG &DAG) const {
12190	SDLoc DL(Op);
12191	MVT VecVT = Op.getSimpleValueType();
12192	if (VecVT.getVectorElementType() == MVT::i1) {
12193	MVT WidenVT = MVT::getVectorVT(VT: MVT::i8, EC: VecVT.getVectorElementCount());
12194	SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: WidenVT, Operand: Op.getOperand(i: `0`));
12195	SDValue Op2 = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: WidenVT, Operand: Op1);
12196	return DAG.getSetCC(DL, VT: VecVT, LHS: Op2,
12197	RHS: DAG.getConstant(Val: `0`, DL, VT: Op2.getValueType()), Cond: ISD::SETNE);
12198	}
12199
12200	MVT ContainerVT = VecVT;
12201	SDValue Vec = Op.getOperand(i: `0`);
12202	if (VecVT.isFixedLengthVector()) {
12203	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
12204	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
12205	}
12206
12207	MVT XLenVT = Subtarget.getXLenVT();
12208	auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12209
12210	// On some uarchs vrgather.vv will read from every input register for each
12211	// output register, regardless of the indices. However to reverse a vector
12212	// each output register only needs to read from one register. So decompose it
12213	// into LMUL M1 vrgather.vvs, so we get O(LMUL) performance instead of*
12214	// O(LMUL^2).
12215	//
12216	// vsetvli a1, zero, e64, m4, ta, ma
12217	// vrgatherei16.vv v12, v8, v16
12218	// ->
12219	// vsetvli a1, zero, e64, m1, ta, ma
12220	// vrgather.vv v15, v8, v16
12221	// vrgather.vv v14, v9, v16
12222	// vrgather.vv v13, v10, v16
12223	// vrgather.vv v12, v11, v16
12224	if (ContainerVT.bitsGT(VT: RISCVTargetLowering::getM1VT(VT: ContainerVT)) &&
12225	ContainerVT.getVectorElementCount().isKnownMultipleOf(RHS: `2`)) {
12226	auto [Lo, Hi] = DAG.SplitVector(N: Vec, DL);
12227	Lo = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: Lo.getSimpleValueType(), Operand: Lo);
12228	Hi = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: Hi.getSimpleValueType(), Operand: Hi);
12229	SDValue Concat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ContainerVT, N1: Hi, N2: Lo);
12230
12231	// Fixed length vectors might not fit exactly into their container, and so
12232	// leave a gap in the front of the vector after being reversed. Slide this
12233	// away.
12234	//
12235	// x x x x 3 2 1 0 <- v4i16 @ vlen=128
12236	// 0 1 2 3 x x x x <- reverse
12237	// x x x x 0 1 2 3 <- vslidedown.vx
12238	if (VecVT.isFixedLengthVector()) {
12239	SDValue Offset = DAG.getNode(
12240	Opcode: ISD::SUB, DL, VT: XLenVT,
12241	N1: DAG.getElementCount(DL, VT: XLenVT, EC: ContainerVT.getVectorElementCount()),
12242	N2: DAG.getElementCount(DL, VT: XLenVT, EC: VecVT.getVectorElementCount()));
12243	Concat =
12244	getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
12245	Passthru: DAG.getUNDEF(VT: ContainerVT), Op: Concat, Offset, Mask, VL);
12246	Concat = convertFromScalableVector(VT: VecVT, V: Concat, DAG, Subtarget);
12247	}
12248	return Concat;
12249	}
12250
12251	unsigned EltSize = ContainerVT.getScalarSizeInBits();
12252	unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12253	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12254	unsigned MaxVLMAX =
12255	VecVT.isFixedLengthVector()
12256	? VecVT.getVectorNumElements()
12257	: RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
12258
12259	unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12260	MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12261
12262	// If this is SEW=8 and VLMAX is potentially more than 256, we need
12263	// to use vrgatherei16.vv.
12264	if (MaxVLMAX > `256` && EltSize == `8`) {
12265	// If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12266	// Reverse each half, then reassemble them in reverse order.
12267	// NOTE: It's also possible that after splitting that VLMAX no longer
12268	// requires vrgatherei16.vv.
12269	if (MinSize == (`8` * RISCV::RVVBitsPerBlock)) {
12270	auto [Lo, Hi] = DAG.SplitVectorOperand(N: Op.getNode(), OpNo: `0`);
12271	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: VecVT);
12272	Lo = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo);
12273	Hi = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi);
12274	// Reassemble the low and high pieces reversed.
12275	// FIXME: This is a CONCAT_VECTORS.
12276	SDValue Res = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: VecVT), SubVec: Hi, Idx: `0`);
12277	return DAG.getInsertSubvector(DL, Vec: Res, SubVec: Lo,
12278	Idx: LoVT.getVectorMinNumElements());
12279	}
12280
12281	// Just promote the int type to i16 which will double the LMUL.
12282	IntVT = MVT::getVectorVT(VT: MVT::i16, EC: ContainerVT.getVectorElementCount());
12283	GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12284	}
12285
12286	// At LMUL > 1, do the index computation in 16 bits to reduce register
12287	// pressure.
12288	if (IntVT.getScalarType().bitsGT(VT: MVT::i16) &&
12289	IntVT.bitsGT(VT: RISCVTargetLowering::getM1VT(VT: IntVT))) {
12290	assert(isUInt<`16`>(MaxVLMAX - `1`)); // Largest VLMAX is 65536 @ zvl65536b
12291	GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12292	IntVT = IntVT.changeVectorElementType(EltVT: MVT::i16);
12293	}
12294
12295	// Calculate VLMAX-1 for the desired SEW.
12296	SDValue VLMinus1 = DAG.getNode(
12297	Opcode: ISD::SUB, DL, VT: XLenVT,
12298	N1: DAG.getElementCount(DL, VT: XLenVT, EC: VecVT.getVectorElementCount()),
12299	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
12300
12301	// Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12302	bool IsRV32E64 =
12303	!Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12304	SDValue SplatVL;
12305	if (!IsRV32E64)
12306	SplatVL = DAG.getSplatVector(VT: IntVT, DL, Op: VLMinus1);
12307	else
12308	SplatVL = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT, N1: DAG.getUNDEF(VT: IntVT),
12309	N2: VLMinus1, N3: DAG.getRegister(Reg: RISCV::X0, VT: XLenVT));
12310
12311	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IntVT, N1: Mask, N2: VL);
12312	SDValue Indices = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IntVT, N1: SplatVL, N2: VID,
12313	N3: DAG.getUNDEF(VT: IntVT), N4: Mask, N5: VL);
12314
12315	SDValue Gather = DAG.getNode(Opcode: GatherOpc, DL, VT: ContainerVT, N1: Vec, N2: Indices,
12316	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
12317	if (VecVT.isFixedLengthVector())
12318	Gather = convertFromScalableVector(VT: VecVT, V: Gather, DAG, Subtarget);
12319	return Gather;
12320	}
12321
12322	SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12323	SelectionDAG &DAG) const {
12324	SDLoc DL(Op);
12325	SDValue V1 = Op.getOperand(i: `0`);
12326	SDValue V2 = Op.getOperand(i: `1`);
12327	MVT XLenVT = Subtarget.getXLenVT();
12328	MVT VecVT = Op.getSimpleValueType();
12329
12330	SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12331
12332	int64_t ImmValue = cast<ConstantSDNode>(Val: Op.getOperand(i: `2`))->getSExtValue();
12333	SDValue DownOffset, UpOffset;
12334	if (ImmValue >= `0`) {
12335	// The operand is a TargetConstant, we need to rebuild it as a regular
12336	// constant.
12337	DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT);
12338	UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: DownOffset);
12339	} else {
12340	// The operand is a TargetConstant, we need to rebuild it as a regular
12341	// constant rather than negating the original operand.
12342	UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT);
12343	DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: UpOffset);
12344	}
12345
12346	SDValue TrueMask = getAllOnesMask(VecVT, VL: VLMax, DL, DAG);
12347
12348	SDValue SlideDown =
12349	getVSlidedown(DAG, Subtarget, DL, VT: VecVT, Passthru: DAG.getUNDEF(VT: VecVT), Op: V1,
12350	Offset: DownOffset, Mask: TrueMask, VL: UpOffset);
12351	return getVSlideup(DAG, Subtarget, DL, VT: VecVT, Passthru: SlideDown, Op: V2, Offset: UpOffset,
12352	Mask: TrueMask, VL: DAG.getRegister(Reg: RISCV::X0, VT: XLenVT),
12353	Policy: RISCVVType::TAIL_AGNOSTIC);
12354	}
12355
12356	SDValue
12357	RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12358	SelectionDAG &DAG) const {
12359	SDLoc DL(Op);
12360	auto *Load = cast<LoadSDNode>(Val&: Op);
12361
12362	assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
12363	Load->getMemoryVT(),
12364	*Load->getMemOperand()) &&
12365	"Expecting a correctly-aligned load");
12366
12367	MVT VT = Op.getSimpleValueType();
12368	MVT XLenVT = Subtarget.getXLenVT();
12369	MVT ContainerVT = getContainerForFixedLengthVector(VT);
12370
12371	// If we know the exact VLEN and our fixed length vector completely fills
12372	// the container, use a whole register load instead.
12373	const auto [MinVLMAX, MaxVLMAX] =
12374	RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget);
12375	if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12376	RISCVTargetLowering::getM1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) {
12377	MachineMemOperand *MMO = Load->getMemOperand();
12378	SDValue NewLoad =
12379	DAG.getLoad(VT: ContainerVT, dl: DL, Chain: Load->getChain(), Ptr: Load->getBasePtr(),
12380	PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(), MMOFlags: MMO->getFlags(),
12381	AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges());
12382	SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
12383	return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: `1`)}, dl: DL);
12384	}
12385
12386	SDValue VL = DAG.getConstant(Val: VT.getVectorNumElements(), DL, VT: XLenVT);
12387
12388	bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12389	SDValue IntID = DAG.getTargetConstant(
12390	Val: IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, VT: XLenVT);
12391	SmallVector<SDValue, `4`> Ops{Load->getChain(), IntID};
12392	if (!IsMaskOp)
12393	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
12394	Ops.push_back(Elt: Load->getBasePtr());
12395	Ops.push_back(Elt: VL);
12396	SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other});
12397	SDValue NewLoad =
12398	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
12399	MemVT: Load->getMemoryVT(), MMO: Load->getMemOperand());
12400
12401	SDValue Result = convertFromScalableVector(VT, V: NewLoad, DAG, Subtarget);
12402	return DAG.getMergeValues(Ops: {Result, NewLoad.getValue(R: `1`)}, dl: DL);
12403	}
12404
12405	SDValue
12406	RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12407	SelectionDAG &DAG) const {
12408	SDLoc DL(Op);
12409	auto *Store = cast<StoreSDNode>(Val&: Op);
12410
12411	assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
12412	Store->getMemoryVT(),
12413	*Store->getMemOperand()) &&
12414	"Expecting a correctly-aligned store");
12415
12416	SDValue StoreVal = Store->getValue();
12417	MVT VT = StoreVal.getSimpleValueType();
12418	MVT XLenVT = Subtarget.getXLenVT();
12419
12420	// If the size less than a byte, we need to pad with zeros to make a byte.
12421	if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < `8`) {
12422	VT = MVT::v8i1;
12423	StoreVal =
12424	DAG.getInsertSubvector(DL, Vec: DAG.getConstant(Val: `0`, DL, VT), SubVec: StoreVal, Idx: `0`);
12425	}
12426
12427	MVT ContainerVT = getContainerForFixedLengthVector(VT);
12428
12429	SDValue NewValue =
12430	convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget);
12431
12432	// If we know the exact VLEN and our fixed length vector completely fills
12433	// the container, use a whole register store instead.
12434	const auto [MinVLMAX, MaxVLMAX] =
12435	RISCVTargetLowering::computeVLMAXBounds(VecVT: ContainerVT, Subtarget);
12436	if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12437	RISCVTargetLowering::getM1VT(VT: ContainerVT).bitsLE(VT: ContainerVT)) {
12438	MachineMemOperand *MMO = Store->getMemOperand();
12439	return DAG.getStore(Chain: Store->getChain(), dl: DL, Val: NewValue, Ptr: Store->getBasePtr(),
12440	PtrInfo: MMO->getPointerInfo(), Alignment: MMO->getBaseAlign(),
12441	MMOFlags: MMO->getFlags(), AAInfo: MMO->getAAInfo());
12442	}
12443
12444	SDValue VL = DAG.getConstant(Val: VT.getVectorNumElements(), DL, VT: XLenVT);
12445
12446	bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12447	SDValue IntID = DAG.getTargetConstant(
12448	Val: IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, VT: XLenVT);
12449	return DAG.getMemIntrinsicNode(
12450	Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: DAG.getVTList(VT: MVT::Other),
12451	Ops: {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12452	MemVT: Store->getMemoryVT(), MMO: Store->getMemOperand());
12453	}
12454
12455	SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12456	SelectionDAG &DAG) const {
12457	SDLoc DL(Op);
12458	MVT VT = Op.getSimpleValueType();
12459
12460	const auto *MemSD = cast<MemSDNode>(Val&: Op);
12461	EVT MemVT = MemSD->getMemoryVT();
12462	MachineMemOperand *MMO = MemSD->getMemOperand();
12463	SDValue Chain = MemSD->getChain();
12464	SDValue BasePtr = MemSD->getBasePtr();
12465
12466	SDValue Mask, PassThru, VL;
12467	bool IsExpandingLoad = false;
12468	if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Val&: Op)) {
12469	Mask = VPLoad->getMask();
12470	PassThru = DAG.getUNDEF(VT);
12471	VL = VPLoad->getVectorLength();
12472	} else {
12473	const auto *MLoad = cast<MaskedLoadSDNode>(Val&: Op);
12474	Mask = MLoad->getMask();
12475	PassThru = MLoad->getPassThru();
12476	IsExpandingLoad = MLoad->isExpandingLoad();
12477	}
12478
12479	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
12480
12481	MVT XLenVT = Subtarget.getXLenVT();
12482
12483	MVT ContainerVT = VT;
12484	if (VT.isFixedLengthVector()) {
12485	ContainerVT = getContainerForFixedLengthVector(VT);
12486	PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
12487	if (!IsUnmasked) {
12488	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
12489	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
12490	}
12491	}
12492
12493	if (!VL)
12494	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
12495
12496	SDValue ExpandingVL;
12497	if (!IsUnmasked && IsExpandingLoad) {
12498	ExpandingVL = VL;
12499	VL =
12500	DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Mask,
12501	N2: getAllOnesMask(VecVT: Mask.getSimpleValueType(), VL, DL, DAG), N3: VL);
12502	}
12503
12504	unsigned IntID = IsUnmasked \|\| IsExpandingLoad ? Intrinsic::riscv_vle
12505	: Intrinsic::riscv_vle_mask;
12506	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
12507	if (IntID == Intrinsic::riscv_vle)
12508	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
12509	else
12510	Ops.push_back(Elt: PassThru);
12511	Ops.push_back(Elt: BasePtr);
12512	if (IntID == Intrinsic::riscv_vle_mask)
12513	Ops.push_back(Elt: Mask);
12514	Ops.push_back(Elt: VL);
12515	if (IntID == Intrinsic::riscv_vle_mask)
12516	Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT));
12517
12518	SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other});
12519
12520	SDValue Result =
12521	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO);
12522	Chain = Result.getValue(R: `1`);
12523	if (ExpandingVL) {
12524	MVT IndexVT = ContainerVT;
12525	if (ContainerVT.isFloatingPoint())
12526	IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12527
12528	MVT IndexEltVT = IndexVT.getVectorElementType();
12529	bool UseVRGATHEREI16 = false;
12530	// If index vector is an i8 vector and the element count exceeds 256, we
12531	// should change the element type of index vector to i16 to avoid
12532	// overflow.
12533	if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > `256`) {
12534	// FIXME: We need to do vector splitting manually for LMUL=8 cases.
12535	assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12536	IndexVT = IndexVT.changeVectorElementType(EltVT: MVT::i16);
12537	UseVRGATHEREI16 = true;
12538	}
12539
12540	SDValue Iota =
12541	DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: IndexVT,
12542	N1: DAG.getConstant(Val: Intrinsic::riscv_viota, DL, VT: XLenVT),
12543	N2: DAG.getUNDEF(VT: IndexVT), N3: Mask, N4: ExpandingVL);
12544	Result =
12545	DAG.getNode(Opcode: UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12546	: RISCVISD::VRGATHER_VV_VL,
12547	DL, VT: ContainerVT, N1: Result, N2: Iota, N3: PassThru, N4: Mask, N5: ExpandingVL);
12548	}
12549
12550	if (VT.isFixedLengthVector())
12551	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
12552
12553	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
12554	}
12555
12556	SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12557	SelectionDAG &DAG) const {
12558	SDLoc DL(Op);
12559
12560	const auto *MemSD = cast<MemSDNode>(Val&: Op);
12561	EVT MemVT = MemSD->getMemoryVT();
12562	MachineMemOperand *MMO = MemSD->getMemOperand();
12563	SDValue Chain = MemSD->getChain();
12564	SDValue BasePtr = MemSD->getBasePtr();
12565	SDValue Val, Mask, VL;
12566
12567	bool IsCompressingStore = false;
12568	if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Val&: Op)) {
12569	Val = VPStore->getValue();
12570	Mask = VPStore->getMask();
12571	VL = VPStore->getVectorLength();
12572	} else {
12573	const auto *MStore = cast<MaskedStoreSDNode>(Val&: Op);
12574	Val = MStore->getValue();
12575	Mask = MStore->getMask();
12576	IsCompressingStore = MStore->isCompressingStore();
12577	}
12578
12579	bool IsUnmasked =
12580	ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()) \|\| IsCompressingStore;
12581
12582	MVT VT = Val.getSimpleValueType();
12583	MVT XLenVT = Subtarget.getXLenVT();
12584
12585	MVT ContainerVT = VT;
12586	if (VT.isFixedLengthVector()) {
12587	ContainerVT = getContainerForFixedLengthVector(VT);
12588
12589	Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
12590	if (!IsUnmasked \|\| IsCompressingStore) {
12591	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
12592	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
12593	}
12594	}
12595
12596	if (!VL)
12597	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
12598
12599	if (IsCompressingStore) {
12600	Val = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: ContainerVT,
12601	N1: DAG.getConstant(Val: Intrinsic::riscv_vcompress, DL, VT: XLenVT),
12602	N2: DAG.getUNDEF(VT: ContainerVT), N3: Val, N4: Mask, N5: VL);
12603	VL =
12604	DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Mask,
12605	N2: getAllOnesMask(VecVT: Mask.getSimpleValueType(), VL, DL, DAG), N3: VL);
12606	}
12607
12608	unsigned IntID =
12609	IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
12610	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
12611	Ops.push_back(Elt: Val);
12612	Ops.push_back(Elt: BasePtr);
12613	if (!IsUnmasked)
12614	Ops.push_back(Elt: Mask);
12615	Ops.push_back(Elt: VL);
12616
12617	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL,
12618	VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT, MMO);
12619	}
12620
12621	SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
12622	SelectionDAG &DAG) const {
12623	SDLoc DL(Op);
12624	SDValue Val = Op.getOperand(i: `0`);
12625	SDValue Mask = Op.getOperand(i: `1`);
12626	SDValue Passthru = Op.getOperand(i: `2`);
12627
12628	MVT VT = Val.getSimpleValueType();
12629	MVT XLenVT = Subtarget.getXLenVT();
12630	MVT ContainerVT = VT;
12631	if (VT.isFixedLengthVector()) {
12632	ContainerVT = getContainerForFixedLengthVector(VT);
12633	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
12634	Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
12635	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
12636	Passthru = convertToScalableVector(VT: ContainerVT, V: Passthru, DAG, Subtarget);
12637	}
12638
12639	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
12640	SDValue Res =
12641	DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: ContainerVT,
12642	N1: DAG.getConstant(Val: Intrinsic::riscv_vcompress, DL, VT: XLenVT),
12643	N2: Passthru, N3: Val, N4: Mask, N5: VL);
12644
12645	if (VT.isFixedLengthVector())
12646	Res = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
12647
12648	return Res;
12649	}
12650
12651	SDValue
12652	RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
12653	SelectionDAG &DAG) const {
12654	MVT InVT = Op.getOperand(i: `0`).getSimpleValueType();
12655	MVT ContainerVT = getContainerForFixedLengthVector(VT: InVT);
12656
12657	MVT VT = Op.getSimpleValueType();
12658
12659	SDValue Op1 =
12660	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `0`), DAG, Subtarget);
12661	SDValue Op2 =
12662	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `1`), DAG, Subtarget);
12663
12664	SDLoc DL(Op);
12665	auto [Mask, VL] = getDefaultVLOps(NumElts: VT.getVectorNumElements(), ContainerVT, DL,
12666	DAG, Subtarget);
12667	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
12668
12669	SDValue Cmp =
12670	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: MaskVT,
12671	Ops: {Op1, Op2, Op.getOperand(i: `2`), DAG.getUNDEF(VT: MaskVT), Mask, VL});
12672
12673	return convertFromScalableVector(VT, V: Cmp, DAG, Subtarget);
12674	}
12675
12676	SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
12677	SelectionDAG &DAG) const {
12678	unsigned Opc = Op.getOpcode();
12679	SDLoc DL(Op);
12680	SDValue Chain = Op.getOperand(i: `0`);
12681	SDValue Op1 = Op.getOperand(i: `1`);
12682	SDValue Op2 = Op.getOperand(i: `2`);
12683	SDValue CC = Op.getOperand(i: `3`);
12684	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
12685	MVT VT = Op.getSimpleValueType();
12686	MVT InVT = Op1.getSimpleValueType();
12687
12688	// RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
12689	// condition code.
12690	if (Opc == ISD::STRICT_FSETCCS) {
12691	// Expand strict_fsetccs(x, oeq) to
12692	// (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
12693	SDVTList VTList = Op ->getVTList();
12694	if (CCVal == ISD::SETEQ \|\| CCVal == ISD::SETOEQ) {
12695	SDValue OLECCVal = DAG.getCondCode(Cond: ISD::SETOLE);
12696	SDValue Tmp1 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1,
12697	N3: Op2, N4: OLECCVal);
12698	SDValue Tmp2 = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op2,
12699	N3: Op1, N4: OLECCVal);
12700	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other,
12701	N1: Tmp1.getValue(R: `1`), N2: Tmp2.getValue(R: `1`));
12702	// Tmp1 and Tmp2 might be the same node.
12703	if (Tmp1 != Tmp2)
12704	Tmp1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp1, N2: Tmp2);
12705	return DAG.getMergeValues(Ops: {Tmp1, OutChain}, dl: DL);
12706	}
12707
12708	// Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
12709	if (CCVal == ISD::SETNE \|\| CCVal == ISD::SETUNE) {
12710	SDValue OEQCCVal = DAG.getCondCode(Cond: ISD::SETOEQ);
12711	SDValue OEQ = DAG.getNode(Opcode: ISD::STRICT_FSETCCS, DL, VTList, N1: Chain, N2: Op1,
12712	N3: Op2, N4: OEQCCVal);
12713	SDValue Res = DAG.getNOT(DL, Val: OEQ, VT);
12714	return DAG.getMergeValues(Ops: {Res, OEQ.getValue(R: `1`)}, dl: DL);
12715	}
12716	}
12717
12718	MVT ContainerInVT = InVT;
12719	if (InVT.isFixedLengthVector()) {
12720	ContainerInVT = getContainerForFixedLengthVector(VT: InVT);
12721	Op1 = convertToScalableVector(VT: ContainerInVT, V: Op1, DAG, Subtarget);
12722	Op2 = convertToScalableVector(VT: ContainerInVT, V: Op2, DAG, Subtarget);
12723	}
12724	MVT MaskVT = getMaskTypeFor(VecVT: ContainerInVT);
12725
12726	auto [Mask, VL] = getDefaultVLOps(VecVT: InVT, ContainerVT: ContainerInVT, DL, DAG, Subtarget);
12727
12728	SDValue Res;
12729	if (Opc == ISD::STRICT_FSETCC &&
12730	(CCVal == ISD::SETLT \|\| CCVal == ISD::SETOLT \|\| CCVal == ISD::SETLE \|\|
12731	CCVal == ISD::SETOLE)) {
12732	// VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
12733	// active when both input elements are ordered.
12734	SDValue True = getAllOnesMask(VecVT: ContainerInVT, VL, DL, DAG);
12735	SDValue OrderMask1 = DAG.getNode(
12736	Opcode: RISCVISD::STRICT_FSETCC_VL, DL, VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other),
12737	Ops: {Chain, Op1, Op1, DAG.getCondCode(Cond: ISD::SETOEQ), DAG.getUNDEF(VT: MaskVT),
12738	True, VL});
12739	SDValue OrderMask2 = DAG.getNode(
12740	Opcode: RISCVISD::STRICT_FSETCC_VL, DL, VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other),
12741	Ops: {Chain, Op2, Op2, DAG.getCondCode(Cond: ISD::SETOEQ), DAG.getUNDEF(VT: MaskVT),
12742	True, VL});
12743	Mask =
12744	DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: MaskVT, N1: OrderMask1, N2: OrderMask2, N3: VL);
12745	// Use Mask as the passthru operand to let the result be 0 if either of the
12746	// inputs is unordered.
12747	Res = DAG.getNode(Opcode: RISCVISD::STRICT_FSETCCS_VL, DL,
12748	VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other),
12749	Ops: {Chain, Op1, Op2, CC, Mask, Mask, VL});
12750	} else {
12751	unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
12752	: RISCVISD::STRICT_FSETCCS_VL;
12753	Res = DAG.getNode(Opcode: RVVOpc, DL, VTList: DAG.getVTList(VT1: MaskVT, VT2: MVT::Other),
12754	Ops: {Chain, Op1, Op2, CC, DAG.getUNDEF(VT: MaskVT), Mask, VL});
12755	}
12756
12757	if (VT.isFixedLengthVector()) {
12758	SDValue SubVec = convertFromScalableVector(VT, V: Res, DAG, Subtarget);
12759	return DAG.getMergeValues(Ops: {SubVec, Res.getValue(R: `1`)}, dl: DL);
12760	}
12761	return Res;
12762	}
12763
12764	// Lower vector ABS to smax(X, sub(0, X)).
12765	SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
12766	SDLoc DL(Op);
12767	MVT VT = Op.getSimpleValueType();
12768	SDValue X = Op.getOperand(i: `0`);
12769
12770	assert((Op.getOpcode() == ISD::VP_ABS \|\| VT.isFixedLengthVector()) &&
12771	"Unexpected type for ISD::ABS");
12772
12773	MVT ContainerVT = VT;
12774	if (VT.isFixedLengthVector()) {
12775	ContainerVT = getContainerForFixedLengthVector(VT);
12776	X = convertToScalableVector(VT: ContainerVT, V: X, DAG, Subtarget);
12777	}
12778
12779	SDValue Mask, VL;
12780	if (Op ->getOpcode() == ISD::VP_ABS) {
12781	Mask = Op ->getOperand(Num: `1`);
12782	if (VT.isFixedLengthVector())
12783	Mask = convertToScalableVector(VT: getMaskTypeFor(VecVT: ContainerVT), V: Mask, DAG,
12784	Subtarget);
12785	VL = Op ->getOperand(Num: `2`);
12786	} else
12787	std::tie(args&: Mask, args&: VL) = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
12788
12789	SDValue SplatZero = DAG.getNode(
12790	Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT),
12791	N2: DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT()), N3: VL);
12792	SDValue NegX = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: ContainerVT, N1: SplatZero, N2: X,
12793	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
12794	SDValue Max = DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: ContainerVT, N1: X, N2: NegX,
12795	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
12796
12797	if (VT.isFixedLengthVector())
12798	Max = convertFromScalableVector(VT, V: Max, DAG, Subtarget);
12799	return Max;
12800	}
12801
12802	SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
12803	SDValue Op, SelectionDAG &DAG) const {
12804	SDLoc DL(Op);
12805	MVT VT = Op.getSimpleValueType();
12806	SDValue Mag = Op.getOperand(i: `0`);
12807	SDValue Sign = Op.getOperand(i: `1`);
12808	assert(Mag.getValueType() == Sign.getValueType() &&
12809	"Can only handle COPYSIGN with matching types.");
12810
12811	MVT ContainerVT = getContainerForFixedLengthVector(VT);
12812	Mag = convertToScalableVector(VT: ContainerVT, V: Mag, DAG, Subtarget);
12813	Sign = convertToScalableVector(VT: ContainerVT, V: Sign, DAG, Subtarget);
12814
12815	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
12816
12817	SDValue CopySign = DAG.getNode(Opcode: RISCVISD::FCOPYSIGN_VL, DL, VT: ContainerVT, N1: Mag,
12818	N2: Sign, N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
12819
12820	return convertFromScalableVector(VT, V: CopySign, DAG, Subtarget);
12821	}
12822
12823	SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
12824	SDValue Op, SelectionDAG &DAG) const {
12825	MVT VT = Op.getSimpleValueType();
12826	MVT ContainerVT = getContainerForFixedLengthVector(VT);
12827
12828	MVT I1ContainerVT =
12829	MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount());
12830
12831	SDValue CC =
12832	convertToScalableVector(VT: I1ContainerVT, V: Op.getOperand(i: `0`), DAG, Subtarget);
12833	SDValue Op1 =
12834	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `1`), DAG, Subtarget);
12835	SDValue Op2 =
12836	convertToScalableVector(VT: ContainerVT, V: Op.getOperand(i: `2`), DAG, Subtarget);
12837
12838	SDLoc DL(Op);
12839	SDValue VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
12840
12841	SDValue Select = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: CC, N2: Op1,
12842	N3: Op2, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
12843
12844	return convertFromScalableVector(VT, V: Select, DAG, Subtarget);
12845	}
12846
12847	SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
12848	SelectionDAG &DAG) const {
12849	const auto &TSInfo =
12850	static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
12851
12852	unsigned NewOpc = getRISCVVLOp(Op);
12853	bool HasPassthruOp = TSInfo.hasPassthruOp(Opcode: NewOpc);
12854	bool HasMask = TSInfo.hasMaskOp(Opcode: NewOpc);
12855
12856	MVT VT = Op.getSimpleValueType();
12857	MVT ContainerVT = getContainerForFixedLengthVector(VT);
12858
12859	// Create list of operands by converting existing ones to scalable types.
12860	SmallVector<SDValue, `6`> Ops;
12861	for (const SDValue &V : Op ->op_values()) {
12862	assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
12863
12864	// Pass through non-vector operands.
12865	if (!V.getValueType().isVector()) {
12866	Ops.push_back(Elt: V);
12867	continue;
12868	}
12869
12870	// "cast" fixed length vector to a scalable vector.
12871	assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
12872	"Only fixed length vectors are supported!");
12873	Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget));
12874	}
12875
12876	SDLoc DL(Op);
12877	auto [Mask, VL] = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget);
12878	if (HasPassthruOp)
12879	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
12880	if (HasMask)
12881	Ops.push_back(Elt: Mask);
12882	Ops.push_back(Elt: VL);
12883
12884	// StrictFP operations have two result values. Their lowered result should
12885	// have same result count.
12886	if (Op ->isStrictFPOpcode()) {
12887	SDValue ScalableRes =
12888	DAG.getNode(Opcode: NewOpc, DL, VTList: DAG.getVTList(VT1: ContainerVT, VT2: MVT::Other), Ops,
12889	Flags: Op ->getFlags());
12890	SDValue SubVec = convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget);
12891	return DAG.getMergeValues(Ops: {SubVec, ScalableRes.getValue(R: `1`)}, dl: DL);
12892	}
12893
12894	SDValue ScalableRes =
12895	DAG.getNode(Opcode: NewOpc, DL, VT: ContainerVT, Ops, Flags: Op ->getFlags());
12896	return convertFromScalableVector(VT, V: ScalableRes, DAG, Subtarget);
12897	}
12898
12899	// Lower a VP_ ISD node to the corresponding RISCVISD::_VL node:
12900	// Operands of each node are assumed to be in the same order.*
12901	// The EVL operand is promoted from i32 to i64 on RV64.*
12902	// Fixed-length vectors are converted to their scalable-vector container*
12903	// types.
12904	SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
12905	const auto &TSInfo =
12906	static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
12907
12908	unsigned RISCVISDOpc = getRISCVVLOp(Op);
12909	bool HasPassthruOp = TSInfo.hasPassthruOp(Opcode: RISCVISDOpc);
12910
12911	SDLoc DL(Op);
12912	MVT VT = Op.getSimpleValueType();
12913	SmallVector<SDValue, `4`> Ops;
12914
12915	MVT ContainerVT = VT;
12916	if (VT.isFixedLengthVector())
12917	ContainerVT = getContainerForFixedLengthVector(VT);
12918
12919	for (const auto &OpIdx : enumerate(First: Op ->ops())) {
12920	SDValue V = OpIdx.value();
12921	assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
12922	// Add dummy passthru value before the mask. Or if there isn't a mask,
12923	// before EVL.
12924	if (HasPassthruOp) {
12925	auto MaskIdx = ISD::getVPMaskIdx(Opcode: Op.getOpcode());
12926	if (MaskIdx) {
12927	if (*MaskIdx == OpIdx.index())
12928	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
12929	} else if (ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) ==
12930	OpIdx.index()) {
12931	if (Op.getOpcode() == ISD::VP_MERGE) {
12932	// For VP_MERGE, copy the false operand instead of an undef value.
12933	Ops.push_back(Elt: Ops.back());
12934	} else {
12935	assert(Op.getOpcode() == ISD::VP_SELECT);
12936	// For VP_SELECT, add an undef value.
12937	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
12938	}
12939	}
12940	}
12941	// VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
12942	if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
12943	ISD::getVPExplicitVectorLengthIdx(Opcode: Op.getOpcode()) == OpIdx.index())
12944	Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVFPRndMode::DYN, DL,
12945	VT: Subtarget.getXLenVT()));
12946	// Pass through operands which aren't fixed-length vectors.
12947	if (!V.getValueType().isFixedLengthVector()) {
12948	Ops.push_back(Elt: V);
12949	continue;
12950	}
12951	// "cast" fixed length vector to a scalable vector.
12952	MVT OpVT = V.getSimpleValueType();
12953	MVT ContainerVT = getContainerForFixedLengthVector(VT: OpVT);
12954	assert(useRVVForFixedLengthVectorVT(OpVT) &&
12955	"Only fixed length vectors are supported!");
12956	Ops.push_back(Elt: convertToScalableVector(VT: ContainerVT, V, DAG, Subtarget));
12957	}
12958
12959	if (!VT.isFixedLengthVector())
12960	return DAG.getNode(Opcode: RISCVISDOpc, DL, VT, Ops, Flags: Op ->getFlags());
12961
12962	SDValue VPOp = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: ContainerVT, Ops, Flags: Op ->getFlags());
12963
12964	return convertFromScalableVector(VT, V: VPOp, DAG, Subtarget);
12965	}
12966
12967	SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
12968	SelectionDAG &DAG) const {
12969	SDLoc DL(Op);
12970	MVT VT = Op.getSimpleValueType();
12971
12972	SDValue Src = Op.getOperand(i: `0`);
12973	// NOTE: Mask is dropped.
12974	SDValue VL = Op.getOperand(i: `2`);
12975
12976	MVT ContainerVT = VT;
12977	if (VT.isFixedLengthVector()) {
12978	ContainerVT = getContainerForFixedLengthVector(VT);
12979	MVT SrcVT = MVT::getVectorVT(VT: MVT::i1, EC: ContainerVT.getVectorElementCount());
12980	Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget);
12981	}
12982
12983	MVT XLenVT = Subtarget.getXLenVT();
12984	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
12985	SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
12986	N1: DAG.getUNDEF(VT: ContainerVT), N2: Zero, N3: VL);
12987
12988	SDValue SplatValue = DAG.getSignedConstant(
12989	Val: Op.getOpcode() == ISD::VP_ZERO_EXTEND ? `1` : -`1`, DL, VT: XLenVT);
12990	SDValue Splat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
12991	N1: DAG.getUNDEF(VT: ContainerVT), N2: SplatValue, N3: VL);
12992
12993	SDValue Result = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Src, N2: Splat,
12994	N3: ZeroSplat, N4: DAG.getUNDEF(VT: ContainerVT), N5: VL);
12995	if (!VT.isFixedLengthVector())
12996	return Result;
12997	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
12998	}
12999
13000	SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13001	SelectionDAG &DAG) const {
13002	SDLoc DL(Op);
13003	MVT VT = Op.getSimpleValueType();
13004
13005	SDValue Op1 = Op.getOperand(i: `0`);
13006	SDValue Op2 = Op.getOperand(i: `1`);
13007	ISD::CondCode Condition = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
13008	// NOTE: Mask is dropped.
13009	SDValue VL = Op.getOperand(i: `4`);
13010
13011	MVT ContainerVT = VT;
13012	if (VT.isFixedLengthVector()) {
13013	ContainerVT = getContainerForFixedLengthVector(VT);
13014	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
13015	Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
13016	}
13017
13018	SDValue Result;
13019	SDValue AllOneMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
13020
13021	switch (Condition) {
13022	default:
13023	break;
13024	// X != Y --> (X^Y)
13025	case ISD::SETNE:
13026	Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
13027	break;
13028	// X == Y --> ~(X^Y)
13029	case ISD::SETEQ: {
13030	SDValue Temp =
13031	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
13032	Result =
13033	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: AllOneMask, N3: VL);
13034	break;
13035	}
13036	// X >s Y --> X == 0 & Y == 1 --> ~X & Y
13037	// X <u Y --> X == 0 & Y == 1 --> ~X & Y
13038	case ISD::SETGT:
13039	case ISD::SETULT: {
13040	SDValue Temp =
13041	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL);
13042	Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL);
13043	break;
13044	}
13045	// X <s Y --> X == 1 & Y == 0 --> ~Y & X
13046	// X >u Y --> X == 1 & Y == 0 --> ~Y & X
13047	case ISD::SETLT:
13048	case ISD::SETUGT: {
13049	SDValue Temp =
13050	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL);
13051	Result = DAG.getNode(Opcode: RISCVISD::VMAND_VL, DL, VT: ContainerVT, N1: Op1, N2: Temp, N3: VL);
13052	break;
13053	}
13054	// X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
13055	// X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
13056	case ISD::SETGE:
13057	case ISD::SETULE: {
13058	SDValue Temp =
13059	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op1, N2: AllOneMask, N3: VL);
13060	Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op2, N3: VL);
13061	break;
13062	}
13063	// X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
13064	// X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
13065	case ISD::SETLE:
13066	case ISD::SETUGE: {
13067	SDValue Temp =
13068	DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Op2, N2: AllOneMask, N3: VL);
13069	Result = DAG.getNode(Opcode: RISCVISD::VMXOR_VL, DL, VT: ContainerVT, N1: Temp, N2: Op1, N3: VL);
13070	break;
13071	}
13072	}
13073
13074	if (!VT.isFixedLengthVector())
13075	return Result;
13076	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13077	}
13078
13079	// Lower Floating-Point/Integer Type-Convert VP SDNodes
13080	SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13081	SelectionDAG &DAG) const {
13082	SDLoc DL(Op);
13083
13084	SDValue Src = Op.getOperand(i: `0`);
13085	SDValue Mask = Op.getOperand(i: `1`);
13086	SDValue VL = Op.getOperand(i: `2`);
13087	unsigned RISCVISDOpc = getRISCVVLOp(Op);
13088
13089	MVT DstVT = Op.getSimpleValueType();
13090	MVT SrcVT = Src.getSimpleValueType();
13091	if (DstVT.isFixedLengthVector()) {
13092	DstVT = getContainerForFixedLengthVector(VT: DstVT);
13093	SrcVT = getContainerForFixedLengthVector(VT: SrcVT);
13094	Src = convertToScalableVector(VT: SrcVT, V: Src, DAG, Subtarget);
13095	MVT MaskVT = getMaskTypeFor(VecVT: DstVT);
13096	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13097	}
13098
13099	unsigned DstEltSize = DstVT.getScalarSizeInBits();
13100	unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13101
13102	SDValue Result;
13103	if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13104	if (SrcVT.isInteger()) {
13105	assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13106
13107	unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13108	? RISCVISD::VSEXT_VL
13109	: RISCVISD::VZEXT_VL;
13110
13111	// Do we need to do any pre-widening before converting?
13112	if (SrcEltSize == `1`) {
13113	MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13114	MVT XLenVT = Subtarget.getXLenVT();
13115	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
13116	SDValue ZeroSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT,
13117	N1: DAG.getUNDEF(VT: IntVT), N2: Zero, N3: VL);
13118	SDValue One = DAG.getSignedConstant(
13119	Val: RISCVISDExtOpc == RISCVISD::VZEXT_VL ? `1` : -`1`, DL, VT: XLenVT);
13120	SDValue OneSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IntVT,
13121	N1: DAG.getUNDEF(VT: IntVT), N2: One, N3: VL);
13122	Src = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IntVT, N1: Src, N2: OneSplat,
13123	N3: ZeroSplat, N4: DAG.getUNDEF(VT: IntVT), N5: VL);
13124	} else if (DstEltSize > (`2` * SrcEltSize)) {
13125	// Widen before converting.
13126	MVT IntVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: DstEltSize / `2`),
13127	EC: DstVT.getVectorElementCount());
13128	Src = DAG.getNode(Opcode: RISCVISDExtOpc, DL, VT: IntVT, N1: Src, N2: Mask, N3: VL);
13129	}
13130
13131	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
13132	} else {
13133	assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13134	"Wrong input/output vector types");
13135
13136	// Convert f16 to f32 then convert f32 to i64.
13137	if (DstEltSize > (`2` * SrcEltSize)) {
13138	assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13139	MVT InterimFVT =
13140	MVT::getVectorVT(VT: MVT::f32, EC: DstVT.getVectorElementCount());
13141	Src =
13142	DAG.getNode(Opcode: RISCVISD::FP_EXTEND_VL, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL);
13143	}
13144
13145	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
13146	}
13147	} else { // Narrowing + Conversion
13148	if (SrcVT.isInteger()) {
13149	assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13150	// First do a narrowing convert to an FP type half the size, then round
13151	// the FP type to a small FP type if needed.
13152
13153	MVT InterimFVT = DstVT;
13154	if (SrcEltSize > (`2` * DstEltSize)) {
13155	assert(SrcEltSize == (`4` * DstEltSize) && "Unexpected types!");
13156	assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13157	InterimFVT = MVT::getVectorVT(VT: MVT::f32, EC: DstVT.getVectorElementCount());
13158	}
13159
13160	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimFVT, N1: Src, N2: Mask, N3: VL);
13161
13162	if (InterimFVT != DstVT) {
13163	Src = Result;
13164	Result = DAG.getNode(Opcode: RISCVISD::FP_ROUND_VL, DL, VT: DstVT, N1: Src, N2: Mask, N3: VL);
13165	}
13166	} else {
13167	assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13168	"Wrong input/output vector types");
13169	// First do a narrowing conversion to an integer half the size, then
13170	// truncate if needed.
13171
13172	if (DstEltSize == `1`) {
13173	// First convert to the same size integer, then convert to mask using
13174	// setcc.
13175	assert(SrcEltSize >= `16` && "Unexpected FP type!");
13176	MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize),
13177	EC: DstVT.getVectorElementCount());
13178	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL);
13179
13180	// Compare the integer result to 0. The integer should be 0 or 1/-1,
13181	// otherwise the conversion was undefined.
13182	MVT XLenVT = Subtarget.getXLenVT();
13183	SDValue SplatZero = DAG.getConstant(Val: `0`, DL, VT: XLenVT);
13184	SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: InterimIVT,
13185	N1: DAG.getUNDEF(VT: InterimIVT), N2: SplatZero, N3: VL);
13186	Result = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: DstVT,
13187	Ops: {Result, SplatZero, DAG.getCondCode(Cond: ISD::SETNE),
13188	DAG.getUNDEF(VT: DstVT), Mask, VL});
13189	} else {
13190	MVT InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / `2`),
13191	EC: DstVT.getVectorElementCount());
13192
13193	Result = DAG.getNode(Opcode: RISCVISDOpc, DL, VT: InterimIVT, N1: Src, N2: Mask, N3: VL);
13194
13195	while (InterimIVT != DstVT) {
13196	SrcEltSize /= `2`;
13197	Src = Result;
13198	InterimIVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SrcEltSize / `2`),
13199	EC: DstVT.getVectorElementCount());
13200	Result = DAG.getNode(Opcode: RISCVISD::TRUNCATE_VECTOR_VL, DL, VT: InterimIVT,
13201	N1: Src, N2: Mask, N3: VL);
13202	}
13203	}
13204	}
13205	}
13206
13207	MVT VT = Op.getSimpleValueType();
13208	if (!VT.isFixedLengthVector())
13209	return Result;
13210	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13211	}
13212
13213	SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13214	SelectionDAG &DAG) const {
13215	SDLoc DL(Op);
13216	MVT VT = Op.getSimpleValueType();
13217	MVT XLenVT = Subtarget.getXLenVT();
13218
13219	SDValue Mask = Op.getOperand(i: `0`);
13220	SDValue TrueVal = Op.getOperand(i: `1`);
13221	SDValue FalseVal = Op.getOperand(i: `2`);
13222	SDValue VL = Op.getOperand(i: `3`);
13223
13224	// Use default legalization if a vector of EVL type would be legal.
13225	EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VL.getValueType(),
13226	EC: VT.getVectorElementCount());
13227	if (isTypeLegal(VT: EVLVecVT))
13228	return SDValue ();
13229
13230	MVT ContainerVT = VT;
13231	if (VT.isFixedLengthVector()) {
13232	ContainerVT = getContainerForFixedLengthVector(VT);
13233	Mask = convertToScalableVector(VT: ContainerVT, V: Mask, DAG, Subtarget);
13234	TrueVal = convertToScalableVector(VT: ContainerVT, V: TrueVal, DAG, Subtarget);
13235	FalseVal = convertToScalableVector(VT: ContainerVT, V: FalseVal, DAG, Subtarget);
13236	}
13237
13238	// Promote to a vector of i8.
13239	MVT PromotedVT = ContainerVT.changeVectorElementType(EltVT: MVT::i8);
13240
13241	// Promote TrueVal and FalseVal using VLMax.
13242	// FIXME: Is there a better way to do this?
13243	SDValue VLMax = DAG.getRegister(Reg: RISCV::X0, VT: XLenVT);
13244	SDValue SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: PromotedVT,
13245	N1: DAG.getUNDEF(VT: PromotedVT),
13246	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: VLMax);
13247	SDValue SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: PromotedVT,
13248	N1: DAG.getUNDEF(VT: PromotedVT),
13249	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: VLMax);
13250	TrueVal = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: PromotedVT, N1: TrueVal, N2: SplatOne,
13251	N3: SplatZero, N4: DAG.getUNDEF(VT: PromotedVT), N5: VL);
13252	// Any element past VL uses FalseVal, so use VLMax
13253	FalseVal = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: PromotedVT, N1: FalseVal,
13254	N2: SplatOne, N3: SplatZero, N4: DAG.getUNDEF(VT: PromotedVT), N5: VLMax);
13255
13256	// VP_MERGE the two promoted values.
13257	SDValue VPMerge = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: PromotedVT, N1: Mask,
13258	N2: TrueVal, N3: FalseVal, N4: FalseVal, N5: VL);
13259
13260	// Convert back to mask.
13261	SDValue TrueMask = DAG.getNode(Opcode: RISCVISD::VMSET_VL, DL, VT: ContainerVT, Operand: VL);
13262	SDValue Result = DAG.getNode(
13263	Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
13264	Ops: {VPMerge, DAG.getConstant(Val: `0`, DL, VT: PromotedVT), DAG.getCondCode(Cond: ISD::SETNE),
13265	DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), TrueMask, VLMax});
13266
13267	if (VT.isFixedLengthVector())
13268	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13269	return Result;
13270	}
13271
13272	SDValue
13273	RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13274	SelectionDAG &DAG) const {
13275	using namespace SDPatternMatch;
13276
13277	SDLoc DL(Op);
13278
13279	SDValue Op1 = Op.getOperand(i: `0`);
13280	SDValue Op2 = Op.getOperand(i: `1`);
13281	SDValue Offset = Op.getOperand(i: `2`);
13282	SDValue Mask = Op.getOperand(i: `3`);
13283	SDValue EVL1 = Op.getOperand(i: `4`);
13284	SDValue EVL2 = Op.getOperand(i: `5`);
13285
13286	const MVT XLenVT = Subtarget.getXLenVT();
13287	MVT VT = Op.getSimpleValueType();
13288	MVT ContainerVT = VT;
13289	if (VT.isFixedLengthVector()) {
13290	ContainerVT = getContainerForFixedLengthVector(VT);
13291	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
13292	Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
13293	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
13294	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13295	}
13296
13297	bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13298	if (IsMaskVector) {
13299	ContainerVT = ContainerVT.changeVectorElementType(EltVT: MVT::i8);
13300
13301	// Expand input operands
13302	SDValue SplatOneOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
13303	N1: DAG.getUNDEF(VT: ContainerVT),
13304	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: EVL1);
13305	SDValue SplatZeroOp1 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
13306	N1: DAG.getUNDEF(VT: ContainerVT),
13307	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: EVL1);
13308	Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op1, N2: SplatOneOp1,
13309	N3: SplatZeroOp1, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL1);
13310
13311	SDValue SplatOneOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
13312	N1: DAG.getUNDEF(VT: ContainerVT),
13313	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: EVL2);
13314	SDValue SplatZeroOp2 = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
13315	N1: DAG.getUNDEF(VT: ContainerVT),
13316	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: EVL2);
13317	Op2 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: ContainerVT, N1: Op2, N2: SplatOneOp2,
13318	N3: SplatZeroOp2, N4: DAG.getUNDEF(VT: ContainerVT), N5: EVL2);
13319	}
13320
13321	auto getVectorFirstEle = [](SDValue Vec) {
13322	SDValue FirstEle;
13323	if (sd_match(N: Vec, P: m_InsertElt(Vec: m_Value(), Val: m_Value(N&: FirstEle), Idx: m_Zero())))
13324	return FirstEle;
13325
13326	if (Vec.getOpcode() == ISD::SPLAT_VECTOR \|\|
13327	Vec.getOpcode() == ISD::BUILD_VECTOR)
13328	return Vec.getOperand(i: `0`);
13329
13330	return SDValue ();
13331	};
13332
13333	if (!IsMaskVector && isNullConstant(V: Offset) && isOneConstant(V: EVL1))
13334	if (auto FirstEle = getVectorFirstEle (Op ->getOperand(Num: `0`))) {
13335	MVT EltVT = ContainerVT.getVectorElementType();
13336	SDValue Result;
13337	if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) \|\|
13338	EltVT == MVT::bf16) {
13339	EltVT = EltVT.changeTypeToInteger();
13340	ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13341	Op2 = DAG.getBitcast(VT: ContainerVT, V: Op2);
13342	FirstEle =
13343	DAG.getAnyExtOrTrunc(Op: DAG.getBitcast(VT: EltVT, V: FirstEle), DL, VT: XLenVT);
13344	}
13345	Result = DAG.getNode(Opcode: EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13346	: RISCVISD::VSLIDE1UP_VL,
13347	DL, VT: ContainerVT, N1: DAG.getUNDEF(VT: ContainerVT), N2: Op2,
13348	N3: FirstEle, N4: Mask, N5: EVL2);
13349	Result = DAG.getBitcast(
13350	VT: ContainerVT.changeVectorElementType(EltVT: VT.getVectorElementType()),
13351	V: Result);
13352	return VT.isFixedLengthVector()
13353	? convertFromScalableVector(VT, V: Result, DAG, Subtarget)
13354	: Result;
13355	}
13356
13357	int64_t ImmValue = cast<ConstantSDNode>(Val&: Offset)->getSExtValue();
13358	SDValue DownOffset, UpOffset;
13359	if (ImmValue >= `0`) {
13360	// The operand is a TargetConstant, we need to rebuild it as a regular
13361	// constant.
13362	DownOffset = DAG.getConstant(Val: ImmValue, DL, VT: XLenVT);
13363	UpOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: DownOffset);
13364	} else {
13365	// The operand is a TargetConstant, we need to rebuild it as a regular
13366	// constant rather than negating the original operand.
13367	UpOffset = DAG.getConstant(Val: -ImmValue, DL, VT: XLenVT);
13368	DownOffset = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL1, N2: UpOffset);
13369	}
13370
13371	if (ImmValue != `0`)
13372	Op1 = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
13373	Passthru: DAG.getUNDEF(VT: ContainerVT), Op: Op1, Offset: DownOffset, Mask,
13374	VL: UpOffset);
13375	SDValue Result = getVSlideup(DAG, Subtarget, DL, VT: ContainerVT, Passthru: Op1, Op: Op2,
13376	Offset: UpOffset, Mask, VL: EVL2, Policy: RISCVVType::TAIL_AGNOSTIC);
13377
13378	if (IsMaskVector) {
13379	// Truncate Result back to a mask vector (Result has same EVL as Op2)
13380	Result = DAG.getNode(
13381	Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT.changeVectorElementType(EltVT: MVT::i1),
13382	Ops: {Result, DAG.getConstant(Val: `0`, DL, VT: ContainerVT),
13383	DAG.getCondCode(Cond: ISD::SETNE), DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)),
13384	Mask, EVL2});
13385	}
13386
13387	if (!VT.isFixedLengthVector())
13388	return Result;
13389	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13390	}
13391
13392	SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13393	SelectionDAG &DAG) const {
13394	SDLoc DL(Op);
13395	SDValue Val = Op.getOperand(i: `0`);
13396	SDValue Mask = Op.getOperand(i: `1`);
13397	SDValue VL = Op.getOperand(i: `2`);
13398	MVT VT = Op.getSimpleValueType();
13399
13400	MVT ContainerVT = VT;
13401	if (VT.isFixedLengthVector()) {
13402	ContainerVT = getContainerForFixedLengthVector(VT);
13403	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
13404	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13405	}
13406
13407	SDValue Result;
13408	if (VT.getScalarType() == MVT::i1) {
13409	if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13410	Result =
13411	DAG.getNode(Opcode: C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13412	VT: ContainerVT, Operand: VL);
13413	} else {
13414	MVT WidenVT = ContainerVT.changeVectorElementType(EltVT: MVT::i8);
13415	SDValue LHS =
13416	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: WidenVT, N1: DAG.getUNDEF(VT: WidenVT),
13417	N2: DAG.getZExtOrTrunc(Op: Val, DL, VT: Subtarget.getXLenVT()), N3: VL);
13418	SDValue RHS = DAG.getConstant(Val: `0`, DL, VT: WidenVT);
13419	Result = DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
13420	Ops: {LHS, RHS, DAG.getCondCode(Cond: ISD::SETNE),
13421	DAG.getUNDEF(VT: ContainerVT), Mask, VL});
13422	}
13423	} else {
13424	Result =
13425	lowerScalarSplat(Passthru: SDValue (), Scalar: Val, VL, VT: ContainerVT, DL, DAG, Subtarget);
13426	}
13427
13428	if (!VT.isFixedLengthVector())
13429	return Result;
13430	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13431	}
13432
13433	SDValue
13434	RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13435	SelectionDAG &DAG) const {
13436	SDLoc DL(Op);
13437	MVT VT = Op.getSimpleValueType();
13438	MVT XLenVT = Subtarget.getXLenVT();
13439
13440	SDValue Op1 = Op.getOperand(i: `0`);
13441	SDValue Mask = Op.getOperand(i: `1`);
13442	SDValue EVL = Op.getOperand(i: `2`);
13443
13444	MVT ContainerVT = VT;
13445	if (VT.isFixedLengthVector()) {
13446	ContainerVT = getContainerForFixedLengthVector(VT);
13447	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
13448	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
13449	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13450	}
13451
13452	MVT GatherVT = ContainerVT;
13453	MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13454	// Check if we are working with mask vectors
13455	bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13456	if (IsMaskVector) {
13457	GatherVT = IndicesVT = ContainerVT.changeVectorElementType(EltVT: MVT::i8);
13458
13459	// Expand input operand
13460	SDValue SplatOne = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
13461	N1: DAG.getUNDEF(VT: IndicesVT),
13462	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT), N3: EVL);
13463	SDValue SplatZero = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
13464	N1: DAG.getUNDEF(VT: IndicesVT),
13465	N2: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N3: EVL);
13466	Op1 = DAG.getNode(Opcode: RISCVISD::VMERGE_VL, DL, VT: IndicesVT, N1: Op1, N2: SplatOne,
13467	N3: SplatZero, N4: DAG.getUNDEF(VT: IndicesVT), N5: EVL);
13468	}
13469
13470	unsigned EltSize = GatherVT.getScalarSizeInBits();
13471	unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13472	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13473	unsigned MaxVLMAX =
13474	RISCVTargetLowering::computeVLMAX(VectorBits: VectorBitsMax, EltSize, MinSize);
13475
13476	unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13477	// If this is SEW=8 and VLMAX is unknown or more than 256, we need
13478	// to use vrgatherei16.vv.
13479	// TODO: It's also possible to use vrgatherei16.vv for other types to
13480	// decrease register width for the index calculation.
13481	// NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13482	if (MaxVLMAX > `256` && EltSize == `8`) {
13483	// If this is LMUL=8, we have to split before using vrgatherei16.vv.
13484	// Split the vector in half and reverse each half using a full register
13485	// reverse.
13486	// Swap the halves and concatenate them.
13487	// Slide the concatenated result by (VLMax - VL).
13488	if (MinSize == (`8` * RISCV::RVVBitsPerBlock)) {
13489	auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT: GatherVT);
13490	auto [Lo, Hi] = DAG.SplitVector(N: Op1, DL);
13491
13492	SDValue LoRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: LoVT, Operand: Lo);
13493	SDValue HiRev = DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT: HiVT, Operand: Hi);
13494
13495	// Reassemble the low and high pieces reversed.
13496	// NOTE: this Result is unmasked (because we do not need masks for
13497	// shuffles). If in the future this has to change, we can use a SELECT_VL
13498	// between Result and UNDEF using the mask originally passed to VP_REVERSE
13499	SDValue Result =
13500	DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: GatherVT, N1: HiRev, N2: LoRev);
13501
13502	// Slide off any elements from past EVL that were reversed into the low
13503	// elements.
13504	unsigned MinElts = GatherVT.getVectorMinNumElements();
13505	SDValue VLMax =
13506	DAG.getVScale(DL, VT: XLenVT, MulImm: APInt (XLenVT.getSizeInBits(), MinElts));
13507	SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: VLMax, N2: EVL);
13508
13509	Result = getVSlidedown(DAG, Subtarget, DL, VT: GatherVT,
13510	Passthru: DAG.getUNDEF(VT: GatherVT), Op: Result, Offset: Diff, Mask, VL: EVL);
13511
13512	if (IsMaskVector) {
13513	// Truncate Result back to a mask vector
13514	Result =
13515	DAG.getNode(Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
13516	Ops: {Result, DAG.getConstant(Val: `0`, DL, VT: GatherVT),
13517	DAG.getCondCode(Cond: ISD::SETNE),
13518	DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL});
13519	}
13520
13521	if (!VT.isFixedLengthVector())
13522	return Result;
13523	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13524	}
13525
13526	// Just promote the int type to i16 which will double the LMUL.
13527	IndicesVT = MVT::getVectorVT(VT: MVT::i16, EC: IndicesVT.getVectorElementCount());
13528	GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13529	}
13530
13531	SDValue VID = DAG.getNode(Opcode: RISCVISD::VID_VL, DL, VT: IndicesVT, N1: Mask, N2: EVL);
13532	SDValue VecLen =
13533	DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: EVL, N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
13534	SDValue VecLenSplat = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: IndicesVT,
13535	N1: DAG.getUNDEF(VT: IndicesVT), N2: VecLen, N3: EVL);
13536	SDValue VRSUB = DAG.getNode(Opcode: RISCVISD::SUB_VL, DL, VT: IndicesVT, N1: VecLenSplat, N2: VID,
13537	N3: DAG.getUNDEF(VT: IndicesVT), N4: Mask, N5: EVL);
13538	SDValue Result = DAG.getNode(Opcode: GatherOpc, DL, VT: GatherVT, N1: Op1, N2: VRSUB,
13539	N3: DAG.getUNDEF(VT: GatherVT), N4: Mask, N5: EVL);
13540
13541	if (IsMaskVector) {
13542	// Truncate Result back to a mask vector
13543	Result = DAG.getNode(
13544	Opcode: RISCVISD::SETCC_VL, DL, VT: ContainerVT,
13545	Ops: {Result, DAG.getConstant(Val: `0`, DL, VT: GatherVT), DAG.getCondCode(Cond: ISD::SETNE),
13546	DAG.getUNDEF(VT: getMaskTypeFor(VecVT: ContainerVT)), Mask, EVL});
13547	}
13548
13549	if (!VT.isFixedLengthVector())
13550	return Result;
13551	return convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13552	}
13553
13554	SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13555	SelectionDAG &DAG) const {
13556	MVT VT = Op.getSimpleValueType();
13557	if (VT.getVectorElementType() != MVT::i1)
13558	return lowerVPOp(Op, DAG);
13559
13560	// It is safe to drop mask parameter as masked-off elements are undef.
13561	SDValue Op1 = Op ->getOperand(Num: `0`);
13562	SDValue Op2 = Op ->getOperand(Num: `1`);
13563	SDValue VL = Op ->getOperand(Num: `3`);
13564
13565	MVT ContainerVT = VT;
13566	const bool IsFixed = VT.isFixedLengthVector();
13567	if (IsFixed) {
13568	ContainerVT = getContainerForFixedLengthVector(VT);
13569	Op1 = convertToScalableVector(VT: ContainerVT, V: Op1, DAG, Subtarget);
13570	Op2 = convertToScalableVector(VT: ContainerVT, V: Op2, DAG, Subtarget);
13571	}
13572
13573	SDLoc DL(Op);
13574	SDValue Val = DAG.getNode(Opcode: getRISCVVLOp(Op), DL, VT: ContainerVT, N1: Op1, N2: Op2, N3: VL);
13575	if (!IsFixed)
13576	return Val;
13577	return convertFromScalableVector(VT, V: Val, DAG, Subtarget);
13578	}
13579
13580	SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13581	SelectionDAG &DAG) const {
13582	SDLoc DL(Op);
13583	MVT XLenVT = Subtarget.getXLenVT();
13584	MVT VT = Op.getSimpleValueType();
13585	MVT ContainerVT = VT;
13586	if (VT.isFixedLengthVector())
13587	ContainerVT = getContainerForFixedLengthVector(VT);
13588
13589	SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other});
13590
13591	auto *VPNode = cast<VPStridedLoadSDNode>(Val&: Op);
13592	// Check if the mask is known to be all ones
13593	SDValue Mask = VPNode->getMask();
13594	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
13595
13596	SDValue IntID = DAG.getTargetConstant(Val: IsUnmasked ? Intrinsic::riscv_vlse
13597	: Intrinsic::riscv_vlse_mask,
13598	DL, VT: XLenVT);
13599	SmallVector<SDValue, `8`> Ops{VPNode->getChain(), IntID,
13600	DAG.getUNDEF(VT: ContainerVT), VPNode->getBasePtr(),
13601	VPNode->getStride()};
13602	if (!IsUnmasked) {
13603	if (VT.isFixedLengthVector()) {
13604	MVT MaskVT = ContainerVT.changeVectorElementType(EltVT: MVT::i1);
13605	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13606	}
13607	Ops.push_back(Elt: Mask);
13608	}
13609	Ops.push_back(Elt: VPNode->getVectorLength());
13610	if (!IsUnmasked) {
13611	SDValue Policy =
13612	DAG.getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT);
13613	Ops.push_back(Elt: Policy);
13614	}
13615
13616	SDValue Result =
13617	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops,
13618	MemVT: VPNode->getMemoryVT(), MMO: VPNode->getMemOperand());
13619	SDValue Chain = Result.getValue(R: `1`);
13620
13621	if (VT.isFixedLengthVector())
13622	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13623
13624	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
13625	}
13626
13627	SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13628	SelectionDAG &DAG) const {
13629	SDLoc DL(Op);
13630	MVT XLenVT = Subtarget.getXLenVT();
13631
13632	auto *VPNode = cast<VPStridedStoreSDNode>(Val&: Op);
13633	SDValue StoreVal = VPNode->getValue();
13634	MVT VT = StoreVal.getSimpleValueType();
13635	MVT ContainerVT = VT;
13636	if (VT.isFixedLengthVector()) {
13637	ContainerVT = getContainerForFixedLengthVector(VT);
13638	StoreVal = convertToScalableVector(VT: ContainerVT, V: StoreVal, DAG, Subtarget);
13639	}
13640
13641	// Check if the mask is known to be all ones
13642	SDValue Mask = VPNode->getMask();
13643	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
13644
13645	SDValue IntID = DAG.getTargetConstant(Val: IsUnmasked ? Intrinsic::riscv_vsse
13646	: Intrinsic::riscv_vsse_mask,
13647	DL, VT: XLenVT);
13648	SmallVector<SDValue, `8`> Ops{VPNode->getChain(), IntID, StoreVal,
13649	VPNode->getBasePtr(), VPNode->getStride()};
13650	if (!IsUnmasked) {
13651	if (VT.isFixedLengthVector()) {
13652	MVT MaskVT = ContainerVT.changeVectorElementType(EltVT: MVT::i1);
13653	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13654	}
13655	Ops.push_back(Elt: Mask);
13656	}
13657	Ops.push_back(Elt: VPNode->getVectorLength());
13658
13659	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL, VTList: VPNode->getVTList(),
13660	Ops, MemVT: VPNode->getMemoryVT(),
13661	MMO: VPNode->getMemOperand());
13662	}
13663
13664	// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
13665	// matched to a RVV indexed load. The RVV indexed load instructions only
13666	// support the "unsigned unscaled" addressing mode; indices are implicitly
13667	// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13668	// signed or scaled indexing is extended to the XLEN value type and scaled
13669	// accordingly.
13670	SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
13671	SelectionDAG &DAG) const {
13672	SDLoc DL(Op);
13673	MVT VT = Op.getSimpleValueType();
13674
13675	const auto *MemSD = cast<MemSDNode>(Val: Op.getNode());
13676	EVT MemVT = MemSD->getMemoryVT();
13677	MachineMemOperand *MMO = MemSD->getMemOperand();
13678	SDValue Chain = MemSD->getChain();
13679	SDValue BasePtr = MemSD->getBasePtr();
13680
13681	[[maybe_unused]] ISD::LoadExtType LoadExtType;
13682	SDValue Index, Mask, PassThru, VL;
13683
13684	if (auto *VPGN = dyn_cast<VPGatherSDNode>(Val: Op.getNode())) {
13685	Index = VPGN->getIndex();
13686	Mask = VPGN->getMask();
13687	PassThru = DAG.getUNDEF(VT);
13688	VL = VPGN->getVectorLength();
13689	// VP doesn't support extending loads.
13690	LoadExtType = ISD::NON_EXTLOAD;
13691	} else {
13692	// Else it must be a MGATHER.
13693	auto *MGN = cast<MaskedGatherSDNode>(Val: Op.getNode());
13694	Index = MGN->getIndex();
13695	Mask = MGN->getMask();
13696	PassThru = MGN->getPassThru();
13697	LoadExtType = MGN->getExtensionType();
13698	}
13699
13700	MVT IndexVT = Index.getSimpleValueType();
13701	MVT XLenVT = Subtarget.getXLenVT();
13702
13703	assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
13704	"Unexpected VTs!");
13705	assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
13706	// Targets have to explicitly opt-in for extending vector loads.
13707	assert(LoadExtType == ISD::NON_EXTLOAD &&
13708	"Unexpected extending MGATHER/VP_GATHER");
13709
13710	// If the mask is known to be all ones, optimize to an unmasked intrinsic;
13711	// the selection of the masked intrinsics doesn't do this for us.
13712	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
13713
13714	MVT ContainerVT = VT;
13715	if (VT.isFixedLengthVector()) {
13716	ContainerVT = getContainerForFixedLengthVector(VT);
13717	IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(),
13718	EC: ContainerVT.getVectorElementCount());
13719
13720	Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget);
13721
13722	if (!IsUnmasked) {
13723	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
13724	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13725	PassThru = convertToScalableVector(VT: ContainerVT, V: PassThru, DAG, Subtarget);
13726	}
13727	}
13728
13729	if (!VL)
13730	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
13731
13732	if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(VT: XLenVT)) {
13733	IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT);
13734	Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index);
13735	}
13736
13737	unsigned IntID =
13738	IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
13739	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
13740	if (IsUnmasked)
13741	Ops.push_back(Elt: DAG.getUNDEF(VT: ContainerVT));
13742	else
13743	Ops.push_back(Elt: PassThru);
13744	Ops.push_back(Elt: BasePtr);
13745	Ops.push_back(Elt: Index);
13746	if (!IsUnmasked)
13747	Ops.push_back(Elt: Mask);
13748	Ops.push_back(Elt: VL);
13749	if (!IsUnmasked)
13750	Ops.push_back(Elt: DAG.getTargetConstant(Val: RISCVVType::TAIL_AGNOSTIC, DL, VT: XLenVT));
13751
13752	SDVTList VTs = DAG.getVTList(VTs: {ContainerVT, MVT::Other});
13753	SDValue Result =
13754	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl: DL, VTList: VTs, Ops, MemVT, MMO);
13755	Chain = Result.getValue(R: `1`);
13756
13757	if (VT.isFixedLengthVector())
13758	Result = convertFromScalableVector(VT, V: Result, DAG, Subtarget);
13759
13760	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
13761	}
13762
13763	// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
13764	// matched to a RVV indexed store. The RVV indexed store instructions only
13765	// support the "unsigned unscaled" addressing mode; indices are implicitly
13766	// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13767	// signed or scaled indexing is extended to the XLEN value type and scaled
13768	// accordingly.
13769	SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
13770	SelectionDAG &DAG) const {
13771	SDLoc DL(Op);
13772	const auto *MemSD = cast<MemSDNode>(Val: Op.getNode());
13773	EVT MemVT = MemSD->getMemoryVT();
13774	MachineMemOperand *MMO = MemSD->getMemOperand();
13775	SDValue Chain = MemSD->getChain();
13776	SDValue BasePtr = MemSD->getBasePtr();
13777
13778	[[maybe_unused]] bool IsTruncatingStore = false;
13779	SDValue Index, Mask, Val, VL;
13780
13781	if (auto *VPSN = dyn_cast<VPScatterSDNode>(Val: Op.getNode())) {
13782	Index = VPSN->getIndex();
13783	Mask = VPSN->getMask();
13784	Val = VPSN->getValue();
13785	VL = VPSN->getVectorLength();
13786	// VP doesn't support truncating stores.
13787	IsTruncatingStore = false;
13788	} else {
13789	// Else it must be a MSCATTER.
13790	auto *MSN = cast<MaskedScatterSDNode>(Val: Op.getNode());
13791	Index = MSN->getIndex();
13792	Mask = MSN->getMask();
13793	Val = MSN->getValue();
13794	IsTruncatingStore = MSN->isTruncatingStore();
13795	}
13796
13797	MVT VT = Val.getSimpleValueType();
13798	MVT IndexVT = Index.getSimpleValueType();
13799	MVT XLenVT = Subtarget.getXLenVT();
13800
13801	assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
13802	"Unexpected VTs!");
13803	assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
13804	// Targets have to explicitly opt-in for extending vector loads and
13805	// truncating vector stores.
13806	assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
13807
13808	// If the mask is known to be all ones, optimize to an unmasked intrinsic;
13809	// the selection of the masked intrinsics doesn't do this for us.
13810	bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(N: Mask.getNode());
13811
13812	MVT ContainerVT = VT;
13813	if (VT.isFixedLengthVector()) {
13814	ContainerVT = getContainerForFixedLengthVector(VT);
13815	IndexVT = MVT::getVectorVT(VT: IndexVT.getVectorElementType(),
13816	EC: ContainerVT.getVectorElementCount());
13817
13818	Index = convertToScalableVector(VT: IndexVT, V: Index, DAG, Subtarget);
13819	Val = convertToScalableVector(VT: ContainerVT, V: Val, DAG, Subtarget);
13820
13821	if (!IsUnmasked) {
13822	MVT MaskVT = getMaskTypeFor(VecVT: ContainerVT);
13823	Mask = convertToScalableVector(VT: MaskVT, V: Mask, DAG, Subtarget);
13824	}
13825	}
13826
13827	if (!VL)
13828	VL = getDefaultVLOps(VecVT: VT, ContainerVT, DL, DAG, Subtarget).second;
13829
13830	if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(VT: XLenVT)) {
13831	IndexVT = IndexVT.changeVectorElementType(EltVT: XLenVT);
13832	Index = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IndexVT, Operand: Index);
13833	}
13834
13835	unsigned IntID =
13836	IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
13837	SmallVector<SDValue, `8`> Ops{Chain, DAG.getTargetConstant(Val: IntID, DL, VT: XLenVT)};
13838	Ops.push_back(Elt: Val);
13839	Ops.push_back(Elt: BasePtr);
13840	Ops.push_back(Elt: Index);
13841	if (!IsUnmasked)
13842	Ops.push_back(Elt: Mask);
13843	Ops.push_back(Elt: VL);
13844
13845	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl: DL,
13846	VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT, MMO);
13847	}
13848
13849	SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
13850	SelectionDAG &DAG) const {
13851	const MVT XLenVT = Subtarget.getXLenVT();
13852	SDLoc DL(Op);
13853	SDValue Chain = Op ->getOperand(Num: `0`);
13854	SDValue SysRegNo = DAG.getTargetConstant(Val: RISCVSysReg::frm, DL, VT: XLenVT);
13855	SDVTList VTs = DAG.getVTList(VT1: XLenVT, VT2: MVT::Other);
13856	SDValue RM = DAG.getNode(Opcode: RISCVISD::READ_CSR, DL, VTList: VTs, N1: Chain, N2: SysRegNo);
13857
13858	// Encoding used for rounding mode in RISC-V differs from that used in
13859	// FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
13860	// table, which consists of a sequence of 4-bit fields, each representing
13861	// corresponding FLT_ROUNDS mode.
13862	static const int Table =
13863	(int(RoundingMode::NearestTiesToEven) << `4` * RISCVFPRndMode::RNE) \|
13864	(int(RoundingMode::TowardZero) << `4` * RISCVFPRndMode::RTZ) \|
13865	(int(RoundingMode::TowardNegative) << `4` * RISCVFPRndMode::RDN) \|
13866	(int(RoundingMode::TowardPositive) << `4` * RISCVFPRndMode::RUP) \|
13867	(int(RoundingMode::NearestTiesToAway) << `4` * RISCVFPRndMode::RMM);
13868
13869	SDValue Shift =
13870	DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RM, N2: DAG.getConstant(Val: `2`, DL, VT: XLenVT));
13871	SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT,
13872	N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift);
13873	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted,
13874	N2: DAG.getConstant(Val: `7`, DL, VT: XLenVT));
13875
13876	return DAG.getMergeValues(Ops: {Masked, Chain}, dl: DL);
13877	}
13878
13879	SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
13880	SelectionDAG &DAG) const {
13881	const MVT XLenVT = Subtarget.getXLenVT();
13882	SDLoc DL(Op);
13883	SDValue Chain = Op ->getOperand(Num: `0`);
13884	SDValue RMValue = Op ->getOperand(Num: `1`);
13885	SDValue SysRegNo = DAG.getTargetConstant(Val: RISCVSysReg::frm, DL, VT: XLenVT);
13886
13887	// Encoding used for rounding mode in RISC-V differs from that used in
13888	// FLT_ROUNDS. To convert it the C rounding mode is used as an index in
13889	// a table, which consists of a sequence of 4-bit fields, each representing
13890	// corresponding RISC-V mode.
13891	static const unsigned Table =
13892	(RISCVFPRndMode::RNE << `4` * int(RoundingMode::NearestTiesToEven)) \|
13893	(RISCVFPRndMode::RTZ << `4` * int(RoundingMode::TowardZero)) \|
13894	(RISCVFPRndMode::RDN << `4` * int(RoundingMode::TowardNegative)) \|
13895	(RISCVFPRndMode::RUP << `4` * int(RoundingMode::TowardPositive)) \|
13896	(RISCVFPRndMode::RMM << `4` * int(RoundingMode::NearestTiesToAway));
13897
13898	RMValue = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: RMValue);
13899
13900	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: XLenVT, N1: RMValue,
13901	N2: DAG.getConstant(Val: `2`, DL, VT: XLenVT));
13902	SDValue Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: XLenVT,
13903	N1: DAG.getConstant(Val: Table, DL, VT: XLenVT), N2: Shift);
13904	RMValue = DAG.getNode(Opcode: ISD::AND, DL, VT: XLenVT, N1: Shifted,
13905	N2: DAG.getConstant(Val: `0x7`, DL, VT: XLenVT));
13906	return DAG.getNode(Opcode: RISCVISD::WRITE_CSR, DL, VT: MVT::Other, N1: Chain, N2: SysRegNo,
13907	N3: RMValue);
13908	}
13909
13910	SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
13911	SelectionDAG &DAG) const {
13912	const MVT XLenVT = Subtarget.getXLenVT();
13913	SDLoc DL(Op);
13914	SDValue Chain = Op ->getOperand(Num: `0`);
13915	SDValue SysRegNo = DAG.getTargetConstant(Val: RISCVSysReg::fcsr, DL, VT: XLenVT);
13916	SDVTList VTs = DAG.getVTList(VT1: XLenVT, VT2: MVT::Other);
13917	return DAG.getNode(Opcode: RISCVISD::READ_CSR, DL, VTList: VTs, N1: Chain, N2: SysRegNo);
13918	}
13919
13920	SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
13921	SelectionDAG &DAG) const {
13922	const MVT XLenVT = Subtarget.getXLenVT();
13923	SDLoc DL(Op);
13924	SDValue Chain = Op ->getOperand(Num: `0`);
13925	SDValue EnvValue = Op ->getOperand(Num: `1`);
13926	SDValue SysRegNo = DAG.getTargetConstant(Val: RISCVSysReg::fcsr, DL, VT: XLenVT);
13927
13928	EnvValue = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: XLenVT, Operand: EnvValue);
13929	return DAG.getNode(Opcode: RISCVISD::WRITE_CSR, DL, VT: MVT::Other, N1: Chain, N2: SysRegNo,
13930	N3: EnvValue);
13931	}
13932
13933	SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
13934	SelectionDAG &DAG) const {
13935	const MVT XLenVT = Subtarget.getXLenVT();
13936	SDLoc DL(Op);
13937	SDValue Chain = Op ->getOperand(Num: `0`);
13938	SDValue EnvValue = DAG.getRegister(Reg: RISCV::X0, VT: XLenVT);
13939	SDValue SysRegNo = DAG.getTargetConstant(Val: RISCVSysReg::fcsr, DL, VT: XLenVT);
13940
13941	return DAG.getNode(Opcode: RISCVISD::WRITE_CSR, DL, VT: MVT::Other, N1: Chain, N2: SysRegNo,
13942	N3: EnvValue);
13943	}
13944
13945	SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
13946	SelectionDAG &DAG) const {
13947	MachineFunction &MF = DAG.getMachineFunction();
13948
13949	bool isRISCV64 = Subtarget.is64Bit();
13950	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
13951
13952	int FI = MF.getFrameInfo().CreateFixedObject(Size: isRISCV64 ? `8` : `4`, SPOffset: `0`, IsImmutable: false);
13953	return DAG.getFrameIndex(FI, VT: PtrVT);
13954	}
13955
13956	// Returns the opcode of the target-specific SDNode that implements the 32-bit
13957	// form of the given Opcode.
13958	static unsigned getRISCVWOpcode(unsigned Opcode) {
13959	switch (Opcode) {
13960	default:
13961	llvm_unreachable("Unexpected opcode");
13962	case ISD::SHL:
13963	return RISCVISD::SLLW;
13964	case ISD::SRA:
13965	return RISCVISD::SRAW;
13966	case ISD::SRL:
13967	return RISCVISD::SRLW;
13968	case ISD::SDIV:
13969	return RISCVISD::DIVW;
13970	case ISD::UDIV:
13971	return RISCVISD::DIVUW;
13972	case ISD::UREM:
13973	return RISCVISD::REMUW;
13974	case ISD::ROTL:
13975	return RISCVISD::ROLW;
13976	case ISD::ROTR:
13977	return RISCVISD::RORW;
13978	}
13979	}
13980
13981	// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
13982	// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
13983	// otherwise be promoted to i64, making it difficult to select the
13984	// SLLW/DIVUW/.../W later one because the fact the operation was originally of*
13985	// type i8/i16/i32 is lost.
13986	static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
13987	unsigned ExtOpc = ISD::ANY_EXTEND) {
13988	SDLoc DL(N);
13989	unsigned WOpcode = getRISCVWOpcode(Opcode: N->getOpcode());
13990	SDValue NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
13991	SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
13992	SDValue NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
13993	// ReplaceNodeResults requires we maintain the same type for the return value.
13994	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewRes);
13995	}
13996
13997	// Converts the given 32-bit operation to a i64 operation with signed extension
13998	// semantic to reduce the signed extension instructions.
13999	static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
14000	SDLoc DL(N);
14001	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14002	SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14003	SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
14004	SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
14005	N2: DAG.getValueType(MVT::i32));
14006	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
14007	}
14008
14009	void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
14010	SmallVectorImpl<SDValue> &Results,
14011	SelectionDAG &DAG) const {
14012	SDLoc DL(N);
14013	switch (N->getOpcode()) {
14014	default:
14015	llvm_unreachable("Don't know how to custom type legalize this operation!");
14016	case ISD::STRICT_FP_TO_SINT:
14017	case ISD::STRICT_FP_TO_UINT:
14018	case ISD::FP_TO_SINT:
14019	case ISD::FP_TO_UINT: {
14020	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14021	"Unexpected custom legalisation");
14022	bool IsStrict = N->isStrictFPOpcode();
14023	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT \|\|
14024	N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14025	SDValue Op0 = IsStrict ? N->getOperand(Num: `1`) : N->getOperand(Num: `0`);
14026	if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) !=
14027	TargetLowering::TypeSoftenFloat) {
14028	if (!isTypeLegal(VT: Op0.getValueType()))
14029	return;
14030	if (IsStrict) {
14031	SDValue Chain = N->getOperand(Num: `0`);
14032	// In absence of Zfh, promote f16 to f32, then convert.
14033	if (Op0.getValueType() == MVT::f16 &&
14034	!Subtarget.hasStdExtZfhOrZhinx()) {
14035	Op0 = DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {MVT::f32, MVT::Other},
14036	Ops: {Chain, Op0});
14037	Chain = Op0.getValue(R: `1`);
14038	}
14039	unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14040	: RISCVISD::STRICT_FCVT_WU_RV64;
14041	SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other);
14042	SDValue Res = DAG.getNode(
14043	Opcode: Opc, DL, VTList: VTs, N1: Chain, N2: Op0,
14044	N3: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: MVT::i64));
14045	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14046	Results.push_back(Elt: Res.getValue(R: `1`));
14047	return;
14048	}
14049	// For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14050	// convert.
14051	if ((Op0.getValueType() == MVT::f16 &&
14052	!Subtarget.hasStdExtZfhOrZhinx()) \|\|
14053	Op0.getValueType() == MVT::bf16)
14054	Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
14055
14056	unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14057	SDValue Res =
14058	DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: Op0,
14059	N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RTZ, DL, VT: MVT::i64));
14060	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14061	return;
14062	}
14063	// If the FP type needs to be softened, emit a library call using the 'si'
14064	// version. If we left it to default legalization we'd end up with 'di'. If
14065	// the FP type doesn't need to be softened just let generic type
14066	// legalization promote the result type.
14067	RTLIB::Libcall LC;
14068	if (IsSigned)
14069	LC = RTLIB::getFPTOSINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: `0`));
14070	else
14071	LC = RTLIB::getFPTOUINT(OpVT: Op0.getValueType(), RetVT: N->getValueType(ResNo: `0`));
14072	MakeLibCallOptions CallOptions;
14073	EVT OpVT = Op0.getValueType();
14074	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: N->getValueType(ResNo: `0`), Value: true);
14075	SDValue Chain = IsStrict ? N->getOperand(Num: `0`) : SDValue ();
14076	SDValue Result;
14077	std::tie(args&: Result, args&: Chain) =
14078	makeLibCall(DAG, LC, RetVT: N->getValueType(ResNo: `0`), Ops: Op0, CallOptions, dl: DL, Chain);
14079	Results.push_back(Elt: Result);
14080	if (IsStrict)
14081	Results.push_back(Elt: Chain);
14082	break;
14083	}
14084	case ISD::LROUND: {
14085	SDValue Op0 = N->getOperand(Num: `0`);
14086	EVT Op0VT = Op0.getValueType();
14087	if (getTypeAction(Context&: *DAG.getContext(), VT: Op0.getValueType()) !=
14088	TargetLowering::TypeSoftenFloat) {
14089	if (!isTypeLegal(VT: Op0VT))
14090	return;
14091
14092	// In absence of Zfh, promote f16 to f32, then convert.
14093	if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14094	Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
14095
14096	SDValue Res =
14097	DAG.getNode(Opcode: RISCVISD::FCVT_W_RV64, DL, VT: MVT::i64, N1: Op0,
14098	N2: DAG.getTargetConstant(Val: RISCVFPRndMode::RMM, DL, VT: MVT::i64));
14099	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14100	return;
14101	}
14102	// If the FP type needs to be softened, emit a library call to lround. We'll
14103	// need to truncate the result. We assume any value that doesn't fit in i32
14104	// is allowed to return an unspecified value.
14105	RTLIB::Libcall LC =
14106	Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14107	MakeLibCallOptions CallOptions;
14108	EVT OpVT = Op0.getValueType();
14109	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64, Value: true);
14110	SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
14111	Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
14112	Results.push_back(Elt: Result);
14113	break;
14114	}
14115	case ISD::READCYCLECOUNTER:
14116	case ISD::READSTEADYCOUNTER: {
14117	assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14118	"has custom type legalization on riscv32");
14119
14120	SDValue LoCounter, HiCounter;
14121	MVT XLenVT = Subtarget.getXLenVT();
14122	if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14123	LoCounter = DAG.getTargetConstant(Val: RISCVSysReg::cycle, DL, VT: XLenVT);
14124	HiCounter = DAG.getTargetConstant(Val: RISCVSysReg::cycleh, DL, VT: XLenVT);
14125	} else {
14126	LoCounter = DAG.getTargetConstant(Val: RISCVSysReg::time, DL, VT: XLenVT);
14127	HiCounter = DAG.getTargetConstant(Val: RISCVSysReg::timeh, DL, VT: XLenVT);
14128	}
14129	SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other);
14130	SDValue RCW = DAG.getNode(Opcode: RISCVISD::READ_COUNTER_WIDE, DL, VTList: VTs,
14131	N1: N->getOperand(Num: `0`), N2: LoCounter, N3: HiCounter);
14132
14133	Results.push_back(
14134	Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: RCW, N2: RCW.getValue(R: `1`)));
14135	Results.push_back(Elt: RCW.getValue(R: `2`));
14136	break;
14137	}
14138	case ISD::LOAD: {
14139	if (!ISD::isNON_EXTLoad(N))
14140	return;
14141
14142	// Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14143	// sext_inreg we emit for ADD/SUB/MUL/SLLI.
14144	LoadSDNode *Ld = cast<LoadSDNode>(Val: N);
14145
14146	if (N->getValueType(ResNo: `0`) == MVT::i64) {
14147	assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14148	"Unexpected custom legalisation");
14149
14150	if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < `8`)
14151	return;
14152
14153	SDLoc DL(N);
14154	SDValue Result = DAG.getMemIntrinsicNode(
14155	Opcode: RISCVISD::LD_RV32, dl: DL,
14156	VTList: DAG.getVTList(VTs: {MVT::i32, MVT::i32, MVT::Other}),
14157	Ops: {Ld->getChain(), Ld->getBasePtr()}, MemVT: MVT::i64, MMO: Ld->getMemOperand());
14158	SDValue Lo = Result.getValue(R: `0`);
14159	SDValue Hi = Result.getValue(R: `1`);
14160	SDValue Pair = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
14161	Results.append(IL: {Pair, Result.getValue(R: `2`)});
14162	return;
14163	}
14164
14165	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14166	"Unexpected custom legalisation");
14167
14168	SDLoc dl(N);
14169	SDValue Res = DAG.getExtLoad(ExtType: ISD::SEXTLOAD, dl, VT: MVT::i64, Chain: Ld->getChain(),
14170	Ptr: Ld->getBasePtr(), MemVT: Ld->getMemoryVT(),
14171	MMO: Ld->getMemOperand());
14172	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Res));
14173	Results.push_back(Elt: Res.getValue(R: `1`));
14174	return;
14175	}
14176	case ISD::MUL: {
14177	unsigned Size = N->getSimpleValueType(ResNo: `0`).getSizeInBits();
14178	unsigned XLen = Subtarget.getXLen();
14179	// This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14180	if (Size > XLen) {
14181	assert(Size == (XLen * `2`) && "Unexpected custom legalisation");
14182	SDValue LHS = N->getOperand(Num: `0`);
14183	SDValue RHS = N->getOperand(Num: `1`);
14184	APInt HighMask = APInt::getHighBitsSet(numBits: Size, hiBitsSet: XLen);
14185
14186	bool LHSIsU = DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask);
14187	bool RHSIsU = DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask);
14188	// We need exactly one side to be unsigned.
14189	if (LHSIsU == RHSIsU)
14190	return;
14191
14192	auto MakeMULPair = [&](SDValue S, SDValue U) {
14193	MVT XLenVT = Subtarget.getXLenVT();
14194	S = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: S);
14195	U = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: XLenVT, Operand: U);
14196	SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: S, N2: U);
14197	SDValue Hi = DAG.getNode(Opcode: RISCVISD::MULHSU, DL, VT: XLenVT, N1: S, N2: U);
14198	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: N->getValueType(ResNo: `0`), N1: Lo, N2: Hi);
14199	};
14200
14201	bool LHSIsS = DAG.ComputeNumSignBits(Op: LHS) > XLen;
14202	bool RHSIsS = DAG.ComputeNumSignBits(Op: RHS) > XLen;
14203
14204	// The other operand should be signed, but still prefer MULH when
14205	// possible.
14206	if (RHSIsU && LHSIsS && !RHSIsS)
14207	Results.push_back(Elt: MakeMULPair (LHS, RHS));
14208	else if (LHSIsU && RHSIsS && !LHSIsS)
14209	Results.push_back(Elt: MakeMULPair (RHS, LHS));
14210
14211	return;
14212	}
14213	[[fallthrough]];
14214	}
14215	case ISD::ADD:
14216	case ISD::SUB:
14217	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14218	"Unexpected custom legalisation");
14219	Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
14220	break;
14221	case ISD::SHL:
14222	case ISD::SRA:
14223	case ISD::SRL:
14224	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14225	"Unexpected custom legalisation");
14226	if (N->getOperand(Num: `1`).getOpcode() != ISD::Constant) {
14227	// If we can use a BSET instruction, allow default promotion to apply.
14228	if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14229	isOneConstant(V: N->getOperand(Num: `0`)))
14230	break;
14231	Results.push_back(Elt: customLegalizeToWOp(N, DAG));
14232	break;
14233	}
14234
14235	// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14236	// similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14237	// shift amount.
14238	if (N->getOpcode() == ISD::SHL) {
14239	SDLoc DL(N);
14240	SDValue NewOp0 =
14241	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14242	SDValue NewOp1 =
14243	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14244	SDValue NewWOp = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
14245	SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
14246	N2: DAG.getValueType(MVT::i32));
14247	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes));
14248	}
14249
14250	break;
14251	case ISD::ROTL:
14252	case ISD::ROTR:
14253	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14254	"Unexpected custom legalisation");
14255	assert((Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb() \|\|
14256	Subtarget.hasVendorXTHeadBb()) &&
14257	"Unexpected custom legalization");
14258	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)) &&
14259	!(Subtarget.hasStdExtZbb() \|\| Subtarget.hasStdExtZbkb()))
14260	return;
14261	Results.push_back(Elt: customLegalizeToWOp(N, DAG));
14262	break;
14263	case ISD::CTTZ:
14264	case ISD::CTTZ_ZERO_UNDEF:
14265	case ISD::CTLZ:
14266	case ISD::CTLZ_ZERO_UNDEF: {
14267	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14268	"Unexpected custom legalisation");
14269
14270	SDValue NewOp0 =
14271	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14272	bool IsCTZ =
14273	N->getOpcode() == ISD::CTTZ \|\| N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14274	unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14275	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: NewOp0);
14276	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14277	return;
14278	}
14279	case ISD::SDIV:
14280	case ISD::UDIV:
14281	case ISD::UREM: {
14282	MVT VT = N->getSimpleValueType(ResNo: `0`);
14283	assert((VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32) &&
14284	Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14285	"Unexpected custom legalisation");
14286	// Don't promote division/remainder by constant since we should expand those
14287	// to multiply by magic constant.
14288	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14289	if (N->getOperand(Num: `1`).getOpcode() == ISD::Constant &&
14290	!isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
14291	return;
14292
14293	// If the input is i32, use ANY_EXTEND since the W instructions don't read
14294	// the upper 32 bits. For other types we need to sign or zero extend
14295	// based on the opcode.
14296	unsigned ExtOpc = ISD::ANY_EXTEND;
14297	if (VT != MVT::i32)
14298	ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14299	: ISD::ZERO_EXTEND;
14300
14301	Results.push_back(Elt: customLegalizeToWOp(N, DAG, ExtOpc));
14302	break;
14303	}
14304	case ISD::SADDO: {
14305	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14306	"Unexpected custom legalisation");
14307
14308	// If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14309	// use the default legalization.
14310	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)))
14311	return;
14312
14313	SDValue LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14314	SDValue RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14315	SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: LHS, N2: RHS);
14316	Res = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Res,
14317	N2: DAG.getValueType(MVT::i32));
14318
14319	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: MVT::i64);
14320
14321	// For an addition, the result should be less than one of the operands (LHS)
14322	// if and only if the other operand (RHS) is negative, otherwise there will
14323	// be overflow.
14324	// For a subtraction, the result should be less than one of the operands
14325	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14326	// otherwise there will be overflow.
14327	EVT OType = N->getValueType(ResNo: `1`);
14328	SDValue ResultLowerThanLHS = DAG.getSetCC(DL, VT: OType, LHS: Res, RHS: LHS, Cond: ISD::SETLT);
14329	SDValue ConditionRHS = DAG.getSetCC(DL, VT: OType, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
14330
14331	SDValue Overflow =
14332	DAG.getNode(Opcode: ISD::XOR, DL, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS);
14333	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14334	Results.push_back(Elt: Overflow);
14335	return;
14336	}
14337	case ISD::UADDO:
14338	case ISD::USUBO: {
14339	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14340	"Unexpected custom legalisation");
14341	bool IsAdd = N->getOpcode() == ISD::UADDO;
14342	// Create an ADDW or SUBW.
14343	SDValue LHS = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14344	SDValue RHS = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14345	SDValue Res =
14346	DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL, VT: MVT::i64, N1: LHS, N2: RHS);
14347	Res = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Res,
14348	N2: DAG.getValueType(MVT::i32));
14349
14350	SDValue Overflow;
14351	if (IsAdd && isOneConstant(V: RHS)) {
14352	// Special case uaddo X, 1 overflowed if the addition result is 0.
14353	// The general case (X + C) < C is not necessarily beneficial. Although we
14354	// reduce the live range of X, we may introduce the materialization of
14355	// constant C, especially when the setcc result is used by branch. We have
14356	// no compare with constant and branch instructions.
14357	Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: `1`), LHS: Res,
14358	RHS: DAG.getConstant(Val: `0`, DL, VT: MVT::i64), Cond: ISD::SETEQ);
14359	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
14360	// Special case uaddo X, -1 overflowed if X != 0.
14361	Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: `1`), LHS: N->getOperand(Num: `0`),
14362	RHS: DAG.getConstant(Val: `0`, DL, VT: MVT::i32), Cond: ISD::SETNE);
14363	} else {
14364	// Sign extend the LHS and perform an unsigned compare with the ADDW
14365	// result. Since the inputs are sign extended from i32, this is equivalent
14366	// to comparing the lower 32 bits.
14367	LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14368	Overflow = DAG.getSetCC(DL, VT: N->getValueType(ResNo: `1`), LHS: Res, RHS: LHS,
14369	Cond: IsAdd ? ISD::SETULT : ISD::SETUGT);
14370	}
14371
14372	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14373	Results.push_back(Elt: Overflow);
14374	return;
14375	}
14376	case ISD::UADDSAT:
14377	case ISD::USUBSAT: {
14378	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14379	!Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14380	// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14381	// promotion for UADDO/USUBO.
14382	Results.push_back(Elt: expandAddSubSat(Node: N, DAG));
14383	return;
14384	}
14385	case ISD::SADDSAT:
14386	case ISD::SSUBSAT: {
14387	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14388	"Unexpected custom legalisation");
14389	Results.push_back(Elt: expandAddSubSat(Node: N, DAG));
14390	return;
14391	}
14392	case ISD::ABS: {
14393	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
14394	"Unexpected custom legalisation");
14395
14396	if (Subtarget.hasStdExtZbb()) {
14397	// Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14398	// This allows us to remember that the result is sign extended. Expanding
14399	// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14400	SDValue Src = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64,
14401	Operand: N->getOperand(Num: `0`));
14402	SDValue Abs = DAG.getNode(Opcode: RISCVISD::ABSW, DL, VT: MVT::i64, Operand: Src);
14403	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Abs));
14404	return;
14405	}
14406
14407	// Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14408	SDValue Src = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
14409
14410	// Freeze the source so we can increase it's use count.
14411	Src = DAG.getFreeze(V: Src);
14412
14413	// Copy sign bit to all bits using the sraiw pattern.
14414	SDValue SignFill = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: Src,
14415	N2: DAG.getValueType(MVT::i32));
14416	SignFill = DAG.getNode(Opcode: ISD::SRA, DL, VT: MVT::i64, N1: SignFill,
14417	N2: DAG.getConstant(Val: `31`, DL, VT: MVT::i64));
14418
14419	SDValue NewRes = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Src, N2: SignFill);
14420	NewRes = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: NewRes, N2: SignFill);
14421
14422	// NOTE: The result is only required to be anyextended, but sext is
14423	// consistent with type legalization of sub.
14424	NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewRes,
14425	N2: DAG.getValueType(MVT::i32));
14426	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes));
14427	return;
14428	}
14429	case ISD::BITCAST: {
14430	EVT VT = N->getValueType(ResNo: `0`);
14431	assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14432	SDValue Op0 = N->getOperand(Num: `0`);
14433	EVT Op0VT = Op0.getValueType();
14434	MVT XLenVT = Subtarget.getXLenVT();
14435	if (VT == MVT::i16 &&
14436	((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) \|\|
14437	(Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14438	SDValue FPConv = DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: XLenVT, Operand: Op0);
14439	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: FPConv));
14440	} else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14441	Subtarget.hasStdExtFOrZfinx()) {
14442	SDValue FPConv =
14443	DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Op0);
14444	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: FPConv));
14445	} else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14446	Subtarget.hasStdExtDOrZdinx()) {
14447	SDValue NewReg = DAG.getNode(Opcode: RISCVISD::SplitF64, DL,
14448	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Op0);
14449	SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
14450	N1: NewReg.getValue(R: `0`), N2: NewReg.getValue(R: `1`));
14451	Results.push_back(Elt: RetReg);
14452	} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14453	isTypeLegal(VT: Op0VT)) {
14454	// Custom-legalize bitcasts from fixed-length vector types to illegal
14455	// scalar types in order to improve codegen. Bitcast the vector to a
14456	// one-element vector type whose element type is the same as the result
14457	// type, and extract the first element.
14458	EVT BVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: `1`);
14459	if (isTypeLegal(VT: BVT)) {
14460	SDValue BVec = DAG.getBitcast(VT: BVT, V: Op0);
14461	Results.push_back(Elt: DAG.getExtractVectorElt(DL, VT, Vec: BVec, Idx: `0`));
14462	}
14463	}
14464	break;
14465	}
14466	case ISD::BITREVERSE: {
14467	assert(N->getValueType(`0`) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14468	"Unexpected custom legalisation");
14469	MVT XLenVT = Subtarget.getXLenVT();
14470	SDValue NewOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: `0`));
14471	SDValue NewRes = DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT: XLenVT, Operand: NewOp);
14472	// ReplaceNodeResults requires we maintain the same type for the return
14473	// value.
14474	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i8, Operand: NewRes));
14475	break;
14476	}
14477	case RISCVISD::BREV8:
14478	case RISCVISD::ORC_B: {
14479	MVT VT = N->getSimpleValueType(ResNo: `0`);
14480	MVT XLenVT = Subtarget.getXLenVT();
14481	assert((VT == MVT::i16 \|\| (VT == MVT::i32 && Subtarget.is64Bit())) &&
14482	"Unexpected custom legalisation");
14483	assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) \|\|
14484	(N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14485	"Unexpected extension");
14486	SDValue NewOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: XLenVT, Operand: N->getOperand(Num: `0`));
14487	SDValue NewRes = DAG.getNode(Opcode: N->getOpcode(), DL, VT: XLenVT, Operand: NewOp);
14488	// ReplaceNodeResults requires we maintain the same type for the return
14489	// value.
14490	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: NewRes));
14491	break;
14492	}
14493	case ISD::EXTRACT_VECTOR_ELT: {
14494	// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14495	// type is illegal (currently only vXi64 RV32).
14496	// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14497	// transferred to the destination register. We issue two of these from the
14498	// upper- and lower- halves of the SEW-bit vector element, slid down to the
14499	// first element.
14500	SDValue Vec = N->getOperand(Num: `0`);
14501	SDValue Idx = N->getOperand(Num: `1`);
14502
14503	// The vector type hasn't been legalized yet so we can't issue target
14504	// specific nodes if it needs legalization.
14505	// FIXME: We would manually legalize if it's important.
14506	if (!isTypeLegal(VT: Vec.getValueType()))
14507	return;
14508
14509	MVT VecVT = Vec.getSimpleValueType();
14510
14511	assert(!Subtarget.is64Bit() && N->getValueType(`0`) == MVT::i64 &&
14512	VecVT.getVectorElementType() == MVT::i64 &&
14513	"Unexpected EXTRACT_VECTOR_ELT legalization");
14514
14515	// If this is a fixed vector, we need to convert it to a scalable vector.
14516	MVT ContainerVT = VecVT;
14517	if (VecVT.isFixedLengthVector()) {
14518	ContainerVT = getContainerForFixedLengthVector(VT: VecVT);
14519	Vec = convertToScalableVector(VT: ContainerVT, V: Vec, DAG, Subtarget);
14520	}
14521
14522	MVT XLenVT = Subtarget.getXLenVT();
14523
14524	// Use a VL of 1 to avoid processing more elements than we need.
14525	auto [Mask, VL] = getDefaultVLOps(NumElts: `1`, ContainerVT, DL, DAG, Subtarget);
14526
14527	// Unless the index is known to be 0, we must slide the vector down to get
14528	// the desired element into index 0.
14529	if (!isNullConstant(V: Idx)) {
14530	Vec = getVSlidedown(DAG, Subtarget, DL, VT: ContainerVT,
14531	Passthru: DAG.getUNDEF(VT: ContainerVT), Op: Vec, Offset: Idx, Mask, VL);
14532	}
14533
14534	// Extract the lower XLEN bits of the correct vector element.
14535	SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
14536
14537	// To extract the upper XLEN bits of the vector element, shift the first
14538	// element right by 32 bits and re-extract the lower XLEN bits.
14539	SDValue ThirtyTwoV = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: ContainerVT,
14540	N1: DAG.getUNDEF(VT: ContainerVT),
14541	N2: DAG.getConstant(Val: `32`, DL, VT: XLenVT), N3: VL);
14542	SDValue LShr32 =
14543	DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: ContainerVT, N1: Vec, N2: ThirtyTwoV,
14544	N3: DAG.getUNDEF(VT: ContainerVT), N4: Mask, N5: VL);
14545
14546	SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32);
14547
14548	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: EltLo, N2: EltHi));
14549	break;
14550	}
14551	case ISD::INTRINSIC_WO_CHAIN: {
14552	unsigned IntNo = N->getConstantOperandVal(Num: `0`);
14553	switch (IntNo) {
14554	default:
14555	llvm_unreachable(
14556	"Don't know how to custom type legalize this intrinsic!");
14557	case Intrinsic::experimental_get_vector_length: {
14558	SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14559	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14560	return;
14561	}
14562	case Intrinsic::experimental_cttz_elts: {
14563	SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14564	Results.push_back(
14565	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: Res));
14566	return;
14567	}
14568	case Intrinsic::riscv_orc_b:
14569	case Intrinsic::riscv_brev8:
14570	case Intrinsic::riscv_sha256sig0:
14571	case Intrinsic::riscv_sha256sig1:
14572	case Intrinsic::riscv_sha256sum0:
14573	case Intrinsic::riscv_sha256sum1:
14574	case Intrinsic::riscv_sm3p0:
14575	case Intrinsic::riscv_sm3p1: {
14576	if (!Subtarget.is64Bit() \|\| N->getValueType(ResNo: `0`) != MVT::i32)
14577	return;
14578	unsigned Opc;
14579	switch (IntNo) {
14580	case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14581	case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14582	case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14583	case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14584	case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14585	case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14586	case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14587	case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14588	}
14589
14590	SDValue NewOp =
14591	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14592	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: NewOp);
14593	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14594	return;
14595	}
14596	case Intrinsic::riscv_sm4ks:
14597	case Intrinsic::riscv_sm4ed: {
14598	unsigned Opc =
14599	IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14600	SDValue NewOp0 =
14601	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14602	SDValue NewOp1 =
14603	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `2`));
14604	SDValue Res =
14605	DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1, N3: N->getOperand(Num: `3`));
14606	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14607	return;
14608	}
14609	case Intrinsic::riscv_mopr: {
14610	if (!Subtarget.is64Bit() \|\| N->getValueType(ResNo: `0`) != MVT::i32)
14611	return;
14612	SDValue NewOp =
14613	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14614	SDValue Res = DAG.getNode(
14615	Opcode: RISCVISD::MOPR, DL, VT: MVT::i64, N1: NewOp,
14616	N2: DAG.getTargetConstant(Val: N->getConstantOperandVal(Num: `2`), DL, VT: MVT::i64));
14617	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14618	return;
14619	}
14620	case Intrinsic::riscv_moprr: {
14621	if (!Subtarget.is64Bit() \|\| N->getValueType(ResNo: `0`) != MVT::i32)
14622	return;
14623	SDValue NewOp0 =
14624	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14625	SDValue NewOp1 =
14626	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `2`));
14627	SDValue Res = DAG.getNode(
14628	Opcode: RISCVISD::MOPRR, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1,
14629	N3: DAG.getTargetConstant(Val: N->getConstantOperandVal(Num: `3`), DL, VT: MVT::i64));
14630	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14631	return;
14632	}
14633	case Intrinsic::riscv_clmul: {
14634	if (!Subtarget.is64Bit() \|\| N->getValueType(ResNo: `0`) != MVT::i32)
14635	return;
14636
14637	SDValue NewOp0 =
14638	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14639	SDValue NewOp1 =
14640	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `2`));
14641	SDValue Res = DAG.getNode(Opcode: RISCVISD::CLMUL, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
14642	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14643	return;
14644	}
14645	case Intrinsic::riscv_clmulh:
14646	case Intrinsic::riscv_clmulr: {
14647	if (!Subtarget.is64Bit() \|\| N->getValueType(ResNo: `0`) != MVT::i32)
14648	return;
14649
14650	// Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
14651	// to the full 128-bit clmul result of multiplying two xlen values.
14652	// Perform clmulr or clmulh on the shifted values. Finally, extract the
14653	// upper 32 bits.
14654	//
14655	// The alternative is to mask the inputs to 32 bits and use clmul, but
14656	// that requires two shifts to mask each input without zext.w.
14657	// FIXME: If the inputs are known zero extended or could be freely
14658	// zero extended, the mask form would be better.
14659	SDValue NewOp0 =
14660	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
14661	SDValue NewOp1 =
14662	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `2`));
14663	NewOp0 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp0,
14664	N2: DAG.getConstant(Val: `32`, DL, VT: MVT::i64));
14665	NewOp1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: NewOp1,
14666	N2: DAG.getConstant(Val: `32`, DL, VT: MVT::i64));
14667	unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
14668	: RISCVISD::CLMULR;
14669	SDValue Res = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
14670	Res = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Res,
14671	N2: DAG.getConstant(Val: `32`, DL, VT: MVT::i64));
14672	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Res));
14673	return;
14674	}
14675	case Intrinsic::riscv_vmv_x_s: {
14676	EVT VT = N->getValueType(ResNo: `0`);
14677	MVT XLenVT = Subtarget.getXLenVT();
14678	if (VT.bitsLT(VT: XLenVT)) {
14679	// Simple case just extract using vmv.x.s and truncate.
14680	SDValue Extract = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL,
14681	VT: Subtarget.getXLenVT(), Operand: N->getOperand(Num: `1`));
14682	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Extract));
14683	return;
14684	}
14685
14686	assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
14687	"Unexpected custom legalization");
14688
14689	// We need to do the move in two steps.
14690	SDValue Vec = N->getOperand(Num: `1`);
14691	MVT VecVT = Vec.getSimpleValueType();
14692
14693	// First extract the lower XLEN bits of the element.
14694	SDValue EltLo = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: Vec);
14695
14696	// To extract the upper XLEN bits of the vector element, shift the first
14697	// element right by 32 bits and re-extract the lower XLEN bits.
14698	auto [Mask, VL] = getDefaultVLOps(NumElts: `1`, ContainerVT: VecVT, DL, DAG, Subtarget);
14699
14700	SDValue ThirtyTwoV =
14701	DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: VecVT, N1: DAG.getUNDEF(VT: VecVT),
14702	N2: DAG.getConstant(Val: `32`, DL, VT: XLenVT), N3: VL);
14703	SDValue LShr32 = DAG.getNode(Opcode: RISCVISD::SRL_VL, DL, VT: VecVT, N1: Vec, N2: ThirtyTwoV,
14704	N3: DAG.getUNDEF(VT: VecVT), N4: Mask, N5: VL);
14705	SDValue EltHi = DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: XLenVT, Operand: LShr32);
14706
14707	Results.push_back(
14708	Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: EltLo, N2: EltHi));
14709	break;
14710	}
14711	}
14712	break;
14713	}
14714	case ISD::VECREDUCE_ADD:
14715	case ISD::VECREDUCE_AND:
14716	case ISD::VECREDUCE_OR:
14717	case ISD::VECREDUCE_XOR:
14718	case ISD::VECREDUCE_SMAX:
14719	case ISD::VECREDUCE_UMAX:
14720	case ISD::VECREDUCE_SMIN:
14721	case ISD::VECREDUCE_UMIN:
14722	if (SDValue V = lowerVECREDUCE(Op: SDValue (N, `0`), DAG))
14723	Results.push_back(Elt: V);
14724	break;
14725	case ISD::VP_REDUCE_ADD:
14726	case ISD::VP_REDUCE_AND:
14727	case ISD::VP_REDUCE_OR:
14728	case ISD::VP_REDUCE_XOR:
14729	case ISD::VP_REDUCE_SMAX:
14730	case ISD::VP_REDUCE_UMAX:
14731	case ISD::VP_REDUCE_SMIN:
14732	case ISD::VP_REDUCE_UMIN:
14733	if (SDValue V = lowerVPREDUCE(Op: SDValue (N, `0`), DAG))
14734	Results.push_back(Elt: V);
14735	break;
14736	case ISD::GET_ROUNDING: {
14737	SDVTList VTs = DAG.getVTList(VT1: Subtarget.getXLenVT(), VT2: MVT::Other);
14738	SDValue Res = DAG.getNode(Opcode: ISD::GET_ROUNDING, DL, VTList: VTs, N: N->getOperand(Num: `0`));
14739	Results.push_back(Elt: Res.getValue(R: `0`));
14740	Results.push_back(Elt: Res.getValue(R: `1`));
14741	break;
14742	}
14743	}
14744	}
14745
14746	/// Given a binary operator, return the associative* generic ISD::VECREDUCE_OP*
14747	/// which corresponds to it.
14748	static unsigned getVecReduceOpcode(unsigned Opc) {
14749	switch (Opc) {
14750	default:
14751	llvm_unreachable("Unhandled binary to transform reduction");
14752	case ISD::ADD:
14753	return ISD::VECREDUCE_ADD;
14754	case ISD::UMAX:
14755	return ISD::VECREDUCE_UMAX;
14756	case ISD::SMAX:
14757	return ISD::VECREDUCE_SMAX;
14758	case ISD::UMIN:
14759	return ISD::VECREDUCE_UMIN;
14760	case ISD::SMIN:
14761	return ISD::VECREDUCE_SMIN;
14762	case ISD::AND:
14763	return ISD::VECREDUCE_AND;
14764	case ISD::OR:
14765	return ISD::VECREDUCE_OR;
14766	case ISD::XOR:
14767	return ISD::VECREDUCE_XOR;
14768	case ISD::FADD:
14769	// Note: This is the associative form of the generic reduction opcode.
14770	return ISD::VECREDUCE_FADD;
14771	}
14772	}
14773
14774	/// Perform two related transforms whose purpose is to incrementally recognize
14775	/// an explode_vector followed by scalar reduction as a vector reduction node.
14776	/// This exists to recover from a deficiency in SLP which can't handle
14777	/// forests with multiple roots sharing common nodes. In some cases, one
14778	/// of the trees will be vectorized, and the other will remain (unprofitably)
14779	/// scalarized.
14780	static SDValue
14781	combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
14782	const RISCVSubtarget &Subtarget) {
14783
14784	// This transforms need to run before all integer types have been legalized
14785	// to i64 (so that the vector element type matches the add type), and while
14786	// it's safe to introduce odd sized vector types.
14787	if (DAG.NewNodesMustHaveLegalTypes)
14788	return SDValue ();
14789
14790	// Without V, this transform isn't useful. We could form the (illegal)
14791	// operations and let them be scalarized again, but there's really no point.
14792	if (!Subtarget.hasVInstructions())
14793	return SDValue ();
14794
14795	const SDLoc DL(N);
14796	const EVT VT = N->getValueType(ResNo: `0`);
14797	const unsigned Opc = N->getOpcode();
14798
14799	// For FADD, we only handle the case with reassociation allowed. We
14800	// could handle strict reduction order, but at the moment, there's no
14801	// known reason to, and the complexity isn't worth it.
14802	// TODO: Handle fminnum and fmaxnum here
14803	if (!VT.isInteger() &&
14804	(Opc != ISD::FADD \|\| !N->getFlags().hasAllowReassociation()))
14805	return SDValue ();
14806
14807	const unsigned ReduceOpc = getVecReduceOpcode(Opc);
14808	assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
14809	"Inconsistent mappings");
14810	SDValue LHS = N->getOperand(Num: `0`);
14811	SDValue RHS = N->getOperand(Num: `1`);
14812
14813	if (!LHS.hasOneUse() \|\| !RHS.hasOneUse())
14814	return SDValue ();
14815
14816	if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14817	std::swap(a&: LHS, b&: RHS);
14818
14819	if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
14820	!isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)))
14821	return SDValue ();
14822
14823	uint64_t RHSIdx = cast<ConstantSDNode>(Val: RHS.getOperand(i: `1`))->getLimitedValue();
14824	SDValue SrcVec = RHS.getOperand(i: `0`);
14825	EVT SrcVecVT = SrcVec.getValueType();
14826	assert(SrcVecVT.getVectorElementType() == VT);
14827	if (SrcVecVT.isScalableVector())
14828	return SDValue ();
14829
14830	if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
14831	return SDValue ();
14832
14833	// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
14834	// reduce_op (extract_subvector [2 x VT] from V). This will form the
14835	// root of our reduction tree. TODO: We could extend this to any two
14836	// adjacent aligned constant indices if desired.
14837	if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14838	LHS.getOperand(i: `0`) == SrcVec && isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`))) {
14839	uint64_t LHSIdx =
14840	cast<ConstantSDNode>(Val: LHS.getOperand(i: `1`))->getLimitedValue();
14841	if (`0` == std::min(a: LHSIdx, b: RHSIdx) && `1` == std::max(a: LHSIdx, b: RHSIdx)) {
14842	EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: `2`);
14843	SDValue Vec = DAG.getExtractSubvector(DL, VT: ReduceVT, Vec: SrcVec, Idx: `0`);
14844	return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec, Flags: N->getFlags());
14845	}
14846	}
14847
14848	// Match (binop (reduce (extract_subvector V, 0),
14849	// (extract_vector_elt V, sizeof(SubVec))))
14850	// into a reduction of one more element from the original vector V.
14851	if (LHS.getOpcode() != ReduceOpc)
14852	return SDValue ();
14853
14854	SDValue ReduceVec = LHS.getOperand(i: `0`);
14855	if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
14856	ReduceVec.hasOneUse() && ReduceVec.getOperand(i: `0`) == RHS.getOperand(i: `0`) &&
14857	isNullConstant(V: ReduceVec.getOperand(i: `1`)) &&
14858	ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
14859	// For illegal types (e.g. 3xi32), most will be combined again into a
14860	// wider (hopefully legal) type. If this is a terminal state, we are
14861	// relying on type legalization here to produce something reasonable
14862	// and this lowering quality could probably be improved. (TODO)
14863	EVT ReduceVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT, NumElements: RHSIdx + `1`);
14864	SDValue Vec = DAG.getExtractSubvector(DL, VT: ReduceVT, Vec: SrcVec, Idx: `0`);
14865	return DAG.getNode(Opcode: ReduceOpc, DL, VT, Operand: Vec,
14866	Flags: ReduceVec ->getFlags() & N->getFlags());
14867	}
14868
14869	return SDValue ();
14870	}
14871
14872
14873	// Try to fold (<bop> x, (reduction.<bop> vec, start))
14874	static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
14875	const RISCVSubtarget &Subtarget) {
14876	auto BinOpToRVVReduce = [](unsigned Opc) {
14877	switch (Opc) {
14878	default:
14879	llvm_unreachable("Unhandled binary to transform reduction");
14880	case ISD::ADD:
14881	return RISCVISD::VECREDUCE_ADD_VL;
14882	case ISD::UMAX:
14883	return RISCVISD::VECREDUCE_UMAX_VL;
14884	case ISD::SMAX:
14885	return RISCVISD::VECREDUCE_SMAX_VL;
14886	case ISD::UMIN:
14887	return RISCVISD::VECREDUCE_UMIN_VL;
14888	case ISD::SMIN:
14889	return RISCVISD::VECREDUCE_SMIN_VL;
14890	case ISD::AND:
14891	return RISCVISD::VECREDUCE_AND_VL;
14892	case ISD::OR:
14893	return RISCVISD::VECREDUCE_OR_VL;
14894	case ISD::XOR:
14895	return RISCVISD::VECREDUCE_XOR_VL;
14896	case ISD::FADD:
14897	return RISCVISD::VECREDUCE_FADD_VL;
14898	case ISD::FMAXNUM:
14899	return RISCVISD::VECREDUCE_FMAX_VL;
14900	case ISD::FMINNUM:
14901	return RISCVISD::VECREDUCE_FMIN_VL;
14902	}
14903	};
14904
14905	auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
14906	return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14907	isNullConstant(V: V.getOperand(i: `1`)) &&
14908	V.getOperand(i: `0`).getOpcode() == BinOpToRVVReduce (Opc);
14909	};
14910
14911	unsigned Opc = N->getOpcode();
14912	unsigned ReduceIdx;
14913	if (IsReduction (N->getOperand(Num: `0`), Opc))
14914	ReduceIdx = `0`;
14915	else if (IsReduction (N->getOperand(Num: `1`), Opc))
14916	ReduceIdx = `1`;
14917	else
14918	return SDValue ();
14919
14920	// Skip if FADD disallows reassociation but the combiner needs.
14921	if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
14922	return SDValue ();
14923
14924	SDValue Extract = N->getOperand(Num: ReduceIdx);
14925	SDValue Reduce = Extract.getOperand(i: `0`);
14926	if (!Extract.hasOneUse() \|\| !Reduce.hasOneUse())
14927	return SDValue ();
14928
14929	SDValue ScalarV = Reduce.getOperand(i: `2`);
14930	EVT ScalarVT = ScalarV.getValueType();
14931	if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
14932	ScalarV.getOperand(i: `0`)->isUndef() &&
14933	isNullConstant(V: ScalarV.getOperand(i: `2`)))
14934	ScalarV = ScalarV.getOperand(i: `1`);
14935
14936	// Make sure that ScalarV is a splat with VL=1.
14937	if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
14938	ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
14939	ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
14940	return SDValue ();
14941
14942	if (!isNonZeroAVL(AVL: ScalarV.getOperand(i: `2`)))
14943	return SDValue ();
14944
14945	// Check the scalar of ScalarV is neutral element
14946	// TODO: Deal with value other than neutral element.
14947	if (!isNeutralConstant(Opc: N->getOpcode(), Flags: N->getFlags(), V: ScalarV.getOperand(i: `1`),
14948	OperandNo: `0`))
14949	return SDValue ();
14950
14951	// If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
14952	// FIXME: We might be able to improve this if operand 0 is undef.
14953	if (!isNonZeroAVL(AVL: Reduce.getOperand(i: `5`)))
14954	return SDValue ();
14955
14956	SDValue NewStart = N->getOperand(Num: `1` - ReduceIdx);
14957
14958	SDLoc DL(N);
14959	SDValue NewScalarV =
14960	lowerScalarInsert(Scalar: NewStart, VL: ScalarV.getOperand(i: `2`),
14961	VT: ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
14962
14963	// If we looked through an INSERT_SUBVECTOR we need to restore it.
14964	if (ScalarVT != ScalarV.getValueType())
14965	NewScalarV =
14966	DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: ScalarVT), SubVec: NewScalarV, Idx: `0`);
14967
14968	SDValue Ops[] = {Reduce.getOperand(i: `0`), Reduce.getOperand(i: `1`),
14969	NewScalarV, Reduce.getOperand(i: `3`),
14970	Reduce.getOperand(i: `4`), Reduce.getOperand(i: `5`)};
14971	SDValue NewReduce =
14972	DAG.getNode(Opcode: Reduce.getOpcode(), DL, VT: Reduce.getValueType(), Ops);
14973	return DAG.getNode(Opcode: Extract.getOpcode(), DL, VT: Extract.getValueType(), N1: NewReduce,
14974	N2: Extract.getOperand(i: `1`));
14975	}
14976
14977	// Optimize (add (shl x, c0), (shl y, c1)) ->
14978	// (SLLI (SHADD x, y), c0), if c1-c0 equals to [1\|2\|3].*
14979	static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
14980	const RISCVSubtarget &Subtarget) {
14981	// Perform this optimization only in the zba/xandesperf extension.
14982	if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXAndesPerf())
14983	return SDValue ();
14984
14985	// Skip for vector types and larger types.
14986	EVT VT = N->getValueType(ResNo: `0`);
14987	if (VT.isVector() \|\| VT.getSizeInBits() > Subtarget.getXLen())
14988	return SDValue ();
14989
14990	// The two operand nodes must be SHL and have no other use.
14991	SDValue N0 = N->getOperand(Num: `0`);
14992	SDValue N1 = N->getOperand(Num: `1`);
14993	if (N0 ->getOpcode() != ISD::SHL \|\| N1 ->getOpcode() != ISD::SHL \|\|
14994	!N0 ->hasOneUse() \|\| !N1 ->hasOneUse())
14995	return SDValue ();
14996
14997	// Check c0 and c1.
14998	auto *N0C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
14999	auto *N1C = dyn_cast<ConstantSDNode>(Val: N1 ->getOperand(Num: `1`));
15000	if (!N0C \|\| !N1C)
15001	return SDValue ();
15002	int64_t C0 = N0C->getSExtValue();
15003	int64_t C1 = N1C->getSExtValue();
15004	if (C0 <= `0` \|\| C1 <= `0`)
15005	return SDValue ();
15006
15007	// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
15008	int64_t Bits = std::min(a: C0, b: C1);
15009	int64_t Diff = std::abs(i: C0 - C1);
15010	if (Diff != `1` && Diff != `2` && Diff != `3`)
15011	return SDValue ();
15012
15013	// Build nodes.
15014	SDLoc DL(N);
15015	SDValue NS = (C0 < C1) ? N0 ->getOperand(Num: `0`) : N1 ->getOperand(Num: `0`);
15016	SDValue NL = (C0 > C1) ? N0 ->getOperand(Num: `0`) : N1 ->getOperand(Num: `0`);
15017	SDValue SHADD = DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: NL,
15018	N2: DAG.getConstant(Val: Diff, DL, VT), N3: NS);
15019	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: SHADD, N2: DAG.getConstant(Val: Bits, DL, VT));
15020	}
15021
15022	// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15023	// or 3.
15024	static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other,
15025	SelectionDAG &DAG) {
15026	using namespace llvm::SDPatternMatch;
15027
15028	// Looking for a reg-reg add and not an addi.
15029	if (isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)))
15030	return SDValue ();
15031
15032	// Based on testing it seems that performance degrades if the ADDI has
15033	// more than 2 uses.
15034	if (AddI ->use_size() > `2`)
15035	return SDValue ();
15036
15037	APInt AddVal;
15038	SDValue SHLVal;
15039	if (!sd_match(N: AddI, P: m_Add(L: m_Value(N&: SHLVal), R: m_ConstInt(V&: AddVal))))
15040	return SDValue ();
15041
15042	APInt VShift;
15043	if (!sd_match(N: SHLVal, P: m_OneUse(P: m_Shl(L: m_Value(), R: m_ConstInt(V&: VShift)))))
15044	return SDValue ();
15045
15046	if (VShift.slt(RHS: `1`) \|\| VShift.sgt(RHS: `3`))
15047	return SDValue ();
15048
15049	SDLoc DL(N);
15050	EVT VT = N->getValueType(ResNo: `0`);
15051	// The shift must be positive but the add can be signed.
15052	uint64_t ShlConst = VShift.getZExtValue();
15053	int64_t AddConst = AddVal.getSExtValue();
15054
15055	SDValue SHADD = DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: SHLVal ->getOperand(Num: `0`),
15056	N2: DAG.getConstant(Val: ShlConst, DL, VT), N3: Other);
15057	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: SHADD,
15058	N2: DAG.getSignedConstant(Val: AddConst, DL, VT));
15059	}
15060
15061	// Optimize (add (add (shl x, c0), c1), y) ->
15062	// (ADDI (SHADD y, x), c1), if c0 equals to [1\|2\|3].*
15063	static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,
15064	const RISCVSubtarget &Subtarget) {
15065	// Perform this optimization only in the zba extension.
15066	if (!ReassocShlAddiAdd \|\| !Subtarget.hasStdExtZba())
15067	return SDValue ();
15068
15069	// Skip for vector types and larger types.
15070	EVT VT = N->getValueType(ResNo: `0`);
15071	if (VT != Subtarget.getXLenVT())
15072	return SDValue ();
15073
15074	SDValue AddI = N->getOperand(Num: `0`);
15075	SDValue Other = N->getOperand(Num: `1`);
15076	if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15077	return V;
15078	if (SDValue V = combineShlAddIAddImpl(N, AddI: Other, Other: AddI, DAG))
15079	return V;
15080	return SDValue ();
15081	}
15082
15083	// Combine a constant select operand into its use:
15084	//
15085	// (and (select cond, -1, c), x)
15086	// -> (select cond, x, (and x, c)) [AllOnes=1]
15087	// (or (select cond, 0, c), x)
15088	// -> (select cond, x, (or x, c)) [AllOnes=0]
15089	// (xor (select cond, 0, c), x)
15090	// -> (select cond, x, (xor x, c)) [AllOnes=0]
15091	// (add (select cond, 0, c), x)
15092	// -> (select cond, x, (add x, c)) [AllOnes=0]
15093	// (sub x, (select cond, 0, c))
15094	// -> (select cond, x, (sub x, c)) [AllOnes=0]
15095	static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
15096	SelectionDAG &DAG, bool AllOnes,
15097	const RISCVSubtarget &Subtarget) {
15098	EVT VT = N->getValueType(ResNo: `0`);
15099
15100	// Skip vectors.
15101	if (VT.isVector())
15102	return SDValue ();
15103
15104	if (!Subtarget.hasConditionalMoveFusion()) {
15105	// (select cond, x, (and x, c)) has custom lowering with Zicond.
15106	if ((!Subtarget.hasStdExtZicond() &&
15107	!Subtarget.hasVendorXVentanaCondOps()) \|\|
15108	N->getOpcode() != ISD::AND)
15109	return SDValue ();
15110
15111	// Maybe harmful when condition code has multiple use.
15112	if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(i: `0`).hasOneUse())
15113	return SDValue ();
15114
15115	// Maybe harmful when VT is wider than XLen.
15116	if (VT.getSizeInBits() > Subtarget.getXLen())
15117	return SDValue ();
15118	}
15119
15120	if ((Slct.getOpcode() != ISD::SELECT &&
15121	Slct.getOpcode() != RISCVISD::SELECT_CC) \|\|
15122	!Slct.hasOneUse())
15123	return SDValue ();
15124
15125	auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15126	return AllOnes ? isAllOnesConstant(V: N) : isNullConstant(V: N);
15127	};
15128
15129	bool SwapSelectOps;
15130	unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? `2` : `0`;
15131	SDValue TrueVal = Slct.getOperand(i: `1` + OpOffset);
15132	SDValue FalseVal = Slct.getOperand(i: `2` + OpOffset);
15133	SDValue NonConstantVal;
15134	if (isZeroOrAllOnes (TrueVal, AllOnes)) {
15135	SwapSelectOps = false;
15136	NonConstantVal = FalseVal;
15137	} else if (isZeroOrAllOnes (FalseVal, AllOnes)) {
15138	SwapSelectOps = true;
15139	NonConstantVal = TrueVal;
15140	} else
15141	return SDValue ();
15142
15143	// Slct is now know to be the desired identity constant when CC is true.
15144	TrueVal = OtherOp;
15145	FalseVal = DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT, N1: OtherOp, N2: NonConstantVal);
15146	// Unless SwapSelectOps says the condition should be false.
15147	if (SwapSelectOps)
15148	std::swap(a&: TrueVal, b&: FalseVal);
15149
15150	if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15151	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL: SDLoc (N), VT,
15152	Ops: {Slct.getOperand(i: `0`), Slct.getOperand(i: `1`),
15153	Slct.getOperand(i: `2`), TrueVal, FalseVal});
15154
15155	return DAG.getNode(Opcode: ISD::SELECT, DL: SDLoc (N), VT,
15156	Ops: {Slct.getOperand(i: `0`), TrueVal, FalseVal});
15157	}
15158
15159	// Attempt combineSelectAndUse on each operand of a commutative operator N.
15160	static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
15161	bool AllOnes,
15162	const RISCVSubtarget &Subtarget) {
15163	SDValue N0 = N->getOperand(Num: `0`);
15164	SDValue N1 = N->getOperand(Num: `1`);
15165	if (SDValue Result = combineSelectAndUse(N, Slct: N0, OtherOp: N1, DAG, AllOnes, Subtarget))
15166	return Result;
15167	if (SDValue Result = combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, AllOnes, Subtarget))
15168	return Result;
15169	return SDValue ();
15170	}
15171
15172	// Transform (add (mul x, c0), c1) ->
15173	// (add (mul (add x, c1/c0), c0), c1%c0).
15174	// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15175	// that should be excluded is when c0(c1/c0) is simm12, which will lead*
15176	// to an infinite loop in DAGCombine if transformed.
15177	// Or transform (add (mul x, c0), c1) ->
15178	// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15179	// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15180	// case that should be excluded is when c0(c1/c0+1) is simm12, which will*
15181	// lead to an infinite loop in DAGCombine if transformed.
15182	// Or transform (add (mul x, c0), c1) ->
15183	// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15184	// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15185	// case that should be excluded is when c0(c1/c0-1) is simm12, which will*
15186	// lead to an infinite loop in DAGCombine if transformed.
15187	// Or transform (add (mul x, c0), c1) ->
15188	// (mul (add x, c1/c0), c0).
15189	// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15190	static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
15191	const RISCVSubtarget &Subtarget) {
15192	// Skip for vector types and larger types.
15193	EVT VT = N->getValueType(ResNo: `0`);
15194	if (VT.isVector() \|\| VT.getSizeInBits() > Subtarget.getXLen())
15195	return SDValue ();
15196	// The first operand node must be a MUL and has no other use.
15197	SDValue N0 = N->getOperand(Num: `0`);
15198	if (!N0 ->hasOneUse() \|\| N0 ->getOpcode() != ISD::MUL)
15199	return SDValue ();
15200	// Check if c0 and c1 match above conditions.
15201	auto *N0C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
15202	auto *N1C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
15203	if (!N0C \|\| !N1C)
15204	return SDValue ();
15205	// If N0C has multiple uses it's possible one of the cases in
15206	// DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15207	// in an infinite loop.
15208	if (!N0C->hasOneUse())
15209	return SDValue ();
15210	int64_t C0 = N0C->getSExtValue();
15211	int64_t C1 = N1C->getSExtValue();
15212	int64_t CA, CB;
15213	if (C0 == -`1` \|\| C0 == `0` \|\| C0 == `1` \|\| isInt<`12`>(x: C1))
15214	return SDValue ();
15215	// Search for proper CA (non-zero) and CB that both are simm12.
15216	if ((C1 / C0) != `0` && isInt<`12`>(x: C1 / C0) && isInt<`12`>(x: C1 % C0) &&
15217	!isInt<`12`>(x: C0 * (C1 / C0))) {
15218	CA = C1 / C0;
15219	CB = C1 % C0;
15220	} else if ((C1 / C0 + `1`) != `0` && isInt<`12`>(x: C1 / C0 + `1`) &&
15221	isInt<`12`>(x: C1 % C0 - C0) && !isInt<`12`>(x: C0 * (C1 / C0 + `1`))) {
15222	CA = C1 / C0 + `1`;
15223	CB = C1 % C0 - C0;
15224	} else if ((C1 / C0 - `1`) != `0` && isInt<`12`>(x: C1 / C0 - `1`) &&
15225	isInt<`12`>(x: C1 % C0 + C0) && !isInt<`12`>(x: C0 * (C1 / C0 - `1`))) {
15226	CA = C1 / C0 - `1`;
15227	CB = C1 % C0 + C0;
15228	} else
15229	return SDValue ();
15230	// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15231	SDLoc DL(N);
15232	SDValue New0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0 ->getOperand(Num: `0`),
15233	N2: DAG.getSignedConstant(Val: CA, DL, VT));
15234	SDValue New1 =
15235	DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: New0, N2: DAG.getSignedConstant(Val: C0, DL, VT));
15236	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: New1, N2: DAG.getSignedConstant(Val: CB, DL, VT));
15237	}
15238
15239	// add (zext, zext) -> zext (add (zext, zext))
15240	// sub (zext, zext) -> sext (sub (zext, zext))
15241	// mul (zext, zext) -> zext (mul (zext, zext))
15242	// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15243	// udiv (zext, zext) -> zext (udiv (zext, zext))
15244	// srem (zext, zext) -> zext (srem (zext, zext))
15245	// urem (zext, zext) -> zext (urem (zext, zext))
15246	//
15247	// where the sum of the extend widths match, and the the range of the bin op
15248	// fits inside the width of the narrower bin op. (For profitability on rvv, we
15249	// use a power of two for both inner and outer extend.)
15250	static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) {
15251
15252	EVT VT = N->getValueType(ResNo: `0`);
15253	if (!VT.isVector() \|\| !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15254	return SDValue ();
15255
15256	SDValue N0 = N->getOperand(Num: `0`);
15257	SDValue N1 = N->getOperand(Num: `1`);
15258	if (N0.getOpcode() != ISD::ZERO_EXTEND \|\| N1.getOpcode() != ISD::ZERO_EXTEND)
15259	return SDValue ();
15260	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
15261	return SDValue ();
15262
15263	SDValue Src0 = N0.getOperand(i: `0`);
15264	SDValue Src1 = N1.getOperand(i: `0`);
15265	EVT SrcVT = Src0.getValueType();
15266	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT: SrcVT) \|\|
15267	SrcVT != Src1.getValueType() \|\| SrcVT.getScalarSizeInBits() < `8` \|\|
15268	SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / `2`)
15269	return SDValue ();
15270
15271	LLVMContext &C = *DAG.getContext();
15272	EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(Context&: C);
15273	EVT NarrowVT = EVT::getVectorVT(Context&: C, VT: ElemVT, EC: VT.getVectorElementCount());
15274
15275	Src0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Src0), VT: NarrowVT, Operand: Src0);
15276	Src1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Src1), VT: NarrowVT, Operand: Src1);
15277
15278	// Src0 and Src1 are zero extended, so they're always positive if signed.
15279	//
15280	// sub can produce a negative from two positive operands, so it needs sign
15281	// extended. Other nodes produce a positive from two positive operands, so
15282	// zero extend instead.
15283	unsigned OuterExtend =
15284	N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15285
15286	return DAG.getNode(
15287	Opcode: OuterExtend, DL: SDLoc (N), VT,
15288	Operand: DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT: NarrowVT, N1: Src0, N2: Src1));
15289	}
15290
15291	// Try to turn (add (xor bool, 1) -1) into (neg bool).
15292	static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
15293	SDValue N0 = N->getOperand(Num: `0`);
15294	SDValue N1 = N->getOperand(Num: `1`);
15295	EVT VT = N->getValueType(ResNo: `0`);
15296	SDLoc DL(N);
15297
15298	// RHS should be -1.
15299	if (!isAllOnesConstant(V: N1))
15300	return SDValue ();
15301
15302	// Look for (xor X, 1).
15303	if (N0.getOpcode() != ISD::XOR \|\| !isOneConstant(V: N0.getOperand(i: `1`)))
15304	return SDValue ();
15305
15306	// First xor input should be 0 or 1.
15307	APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: `1`);
15308	if (!DAG.MaskedValueIsZero(Op: N0.getOperand(i: `0`), Mask))
15309	return SDValue ();
15310
15311	// Emit a negate of the setcc.
15312	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT),
15313	N2: N0.getOperand(i: `0`));
15314	}
15315
15316	static SDValue performADDCombine(SDNode *N,
15317	TargetLowering::DAGCombinerInfo &DCI,
15318	const RISCVSubtarget &Subtarget) {
15319	SelectionDAG &DAG = DCI.DAG;
15320	if (SDValue V = combineAddOfBooleanXor(N, DAG))
15321	return V;
15322	if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15323	return V;
15324	if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15325	if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15326	return V;
15327	if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15328	return V;
15329	}
15330	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15331	return V;
15332	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15333	return V;
15334	if (SDValue V = combineBinOpOfZExt(N, DAG))
15335	return V;
15336
15337	// fold (add (select lhs, rhs, cc, 0, y), x) ->
15338	// (select lhs, rhs, cc, x, (add x, y))
15339	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ false, Subtarget);
15340	}
15341
15342	// Try to turn a sub boolean RHS and constant LHS into an addi.
15343	static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
15344	SDValue N0 = N->getOperand(Num: `0`);
15345	SDValue N1 = N->getOperand(Num: `1`);
15346	EVT VT = N->getValueType(ResNo: `0`);
15347	SDLoc DL(N);
15348
15349	// Require a constant LHS.
15350	auto *N0C = dyn_cast<ConstantSDNode>(Val&: N0);
15351	if (!N0C)
15352	return SDValue ();
15353
15354	// All our optimizations involve subtracting 1 from the immediate and forming
15355	// an ADDI. Make sure the new immediate is valid for an ADDI.
15356	APInt ImmValMinus1 = N0C->getAPIntValue() - `1`;
15357	if (!ImmValMinus1.isSignedIntN(N: `12`))
15358	return SDValue ();
15359
15360	SDValue NewLHS;
15361	if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15362	// (sub constant, (setcc x, y, eq/neq)) ->
15363	// (add (setcc x, y, neq/eq), constant - 1)
15364	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: `2`))->get();
15365	EVT SetCCOpVT = N1.getOperand(i: `0`).getValueType();
15366	if (!isIntEqualitySetCC(Code: CCVal) \|\| !SetCCOpVT.isInteger())
15367	return SDValue ();
15368	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT);
15369	NewLHS =
15370	DAG.getSetCC(DL: SDLoc (N1), VT, LHS: N1.getOperand(i: `0`), RHS: N1.getOperand(i: `1`), Cond: CCVal);
15371	} else if (N1.getOpcode() == ISD::XOR && isOneConstant(V: N1.getOperand(i: `1`)) &&
15372	N1.getOperand(i: `0`).getOpcode() == ISD::SETCC) {
15373	// (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15374	// Since setcc returns a bool the xor is equivalent to 1-setcc.
15375	NewLHS = N1.getOperand(i: `0`);
15376	} else
15377	return SDValue ();
15378
15379	SDValue NewRHS = DAG.getConstant(Val: ImmValMinus1, DL, VT);
15380	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NewLHS, N2: NewRHS);
15381	}
15382
15383	// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15384	// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15385	// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15386	// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15387	static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
15388	const RISCVSubtarget &Subtarget) {
15389	if (!Subtarget.hasStdExtZbb())
15390	return SDValue ();
15391
15392	EVT VT = N->getValueType(ResNo: `0`);
15393
15394	if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15395	return SDValue ();
15396
15397	SDValue N0 = N->getOperand(Num: `0`);
15398	SDValue N1 = N->getOperand(Num: `1`);
15399
15400	if (N0 ->getOpcode() != ISD::SHL)
15401	return SDValue ();
15402
15403	auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
15404	if (!ShAmtCLeft)
15405	return SDValue ();
15406	unsigned ShiftedAmount = `8` - ShAmtCLeft->getZExtValue();
15407
15408	if (ShiftedAmount >= `8`)
15409	return SDValue ();
15410
15411	SDValue LeftShiftOperand = N0 ->getOperand(Num: `0`);
15412	SDValue RightShiftOperand = N1;
15413
15414	if (ShiftedAmount != `0`) { // Right operand must be a right shift.
15415	if (N1 ->getOpcode() != ISD::SRL)
15416	return SDValue ();
15417	auto *ShAmtCRight = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`));
15418	if (!ShAmtCRight \|\| ShAmtCRight->getZExtValue() != ShiftedAmount)
15419	return SDValue ();
15420	RightShiftOperand = N1.getOperand(i: `0`);
15421	}
15422
15423	// At least one shift should have a single use.
15424	if (!N0.hasOneUse() && (ShiftedAmount == `0` \|\| !N1.hasOneUse()))
15425	return SDValue ();
15426
15427	if (LeftShiftOperand != RightShiftOperand)
15428	return SDValue ();
15429
15430	APInt Mask = APInt::getSplat(NewLen: VT.getSizeInBits(), V: APInt (`8`, `0x1`));
15431	Mask <<= ShiftedAmount;
15432	// Check that X has indeed the right shape (only the Y-th bit can be set in
15433	// every byte).
15434	if (!DAG.MaskedValueIsZero(Op: LeftShiftOperand, Mask: ~Mask))
15435	return SDValue ();
15436
15437	return DAG.getNode(Opcode: RISCVISD::ORC_B, DL: SDLoc (N), VT, Operand: LeftShiftOperand);
15438	}
15439
15440	static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
15441	const RISCVSubtarget &Subtarget) {
15442	if (SDValue V = combineSubOfBoolean(N, DAG))
15443	return V;
15444
15445	EVT VT = N->getValueType(ResNo: `0`);
15446	SDValue N0 = N->getOperand(Num: `0`);
15447	SDValue N1 = N->getOperand(Num: `1`);
15448	// fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15449	if (isNullConstant(V: N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15450	isNullConstant(V: N1.getOperand(i: `1`))) {
15451	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: N1.getOperand(i: `2`))->get();
15452	if (CCVal == ISD::SETLT) {
15453	SDLoc DL(N);
15454	unsigned ShAmt = N0.getValueSizeInBits() - `1`;
15455	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: N1.getOperand(i: `0`),
15456	N2: DAG.getConstant(Val: ShAmt, DL, VT));
15457	}
15458	}
15459
15460	if (SDValue V = combineBinOpOfZExt(N, DAG))
15461	return V;
15462	if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15463	return V;
15464
15465	// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15466	// (select lhs, rhs, cc, x, (sub x, y))
15467	return combineSelectAndUse(N, Slct: N1, OtherOp: N0, DAG, /AllOnes/ false, Subtarget);
15468	}
15469
15470	// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15471	// Legalizing setcc can introduce xors like this. Doing this transform reduces
15472	// the number of xors and may allow the xor to fold into a branch condition.
15473	static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
15474	SDValue N0 = N->getOperand(Num: `0`);
15475	SDValue N1 = N->getOperand(Num: `1`);
15476	bool IsAnd = N->getOpcode() == ISD::AND;
15477
15478	if (N0.getOpcode() != ISD::XOR \|\| N1.getOpcode() != ISD::XOR)
15479	return SDValue ();
15480
15481	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
15482	return SDValue ();
15483
15484	SDValue N01 = N0.getOperand(i: `1`);
15485	SDValue N11 = N1.getOperand(i: `1`);
15486
15487	// For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15488	// (xor X, -1) based on the upper bits of the other operand being 0. If the
15489	// operation is And, allow one of the Xors to use -1.
15490	if (isOneConstant(V: N01)) {
15491	if (!isOneConstant(V: N11) && !(IsAnd && isAllOnesConstant(V: N11)))
15492	return SDValue ();
15493	} else if (isOneConstant(V: N11)) {
15494	// N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15495	if (!(IsAnd && isAllOnesConstant(V: N01)))
15496	return SDValue ();
15497	} else
15498	return SDValue ();
15499
15500	EVT VT = N->getValueType(ResNo: `0`);
15501
15502	SDValue N00 = N0.getOperand(i: `0`);
15503	SDValue N10 = N1.getOperand(i: `0`);
15504
15505	// The LHS of the xors needs to be 0/1.
15506	APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: `1`);
15507	if (!DAG.MaskedValueIsZero(Op: N00, Mask) \|\| !DAG.MaskedValueIsZero(Op: N10, Mask))
15508	return SDValue ();
15509
15510	// Invert the opcode and insert a new xor.
15511	SDLoc DL(N);
15512	unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15513	SDValue Logic = DAG.getNode(Opcode: Opc, DL, VT, N1: N00, N2: N10);
15514	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Logic, N2: DAG.getConstant(Val: `1`, DL, VT));
15515	}
15516
15517	// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15518	// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15519	// value to an unsigned value. This will be lowered to vmax and series of
15520	// vnclipu instructions later. This can be extended to other truncated types
15521	// other than i8 by replacing 256 and 255 with the equivalent constants for the
15522	// type.
15523	static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {
15524	EVT VT = N->getValueType(ResNo: `0`);
15525	SDValue N0 = N->getOperand(Num: `0`);
15526	EVT SrcVT = N0.getValueType();
15527
15528	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15529	if (!VT.isVector() \|\| !TLI.isTypeLegal(VT) \|\| !TLI.isTypeLegal(VT: SrcVT))
15530	return SDValue ();
15531
15532	if (N0.getOpcode() != ISD::VSELECT \|\| !N0.hasOneUse())
15533	return SDValue ();
15534
15535	SDValue Cond = N0.getOperand(i: `0`);
15536	SDValue True = N0.getOperand(i: `1`);
15537	SDValue False = N0.getOperand(i: `2`);
15538
15539	if (Cond.getOpcode() != ISD::SETCC)
15540	return SDValue ();
15541
15542	// FIXME: Support the version of this pattern with the select operands
15543	// swapped.
15544	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
15545	if (CCVal != ISD::SETULT)
15546	return SDValue ();
15547
15548	SDValue CondLHS = Cond.getOperand(i: `0`);
15549	SDValue CondRHS = Cond.getOperand(i: `1`);
15550
15551	if (CondLHS != True)
15552	return SDValue ();
15553
15554	unsigned ScalarBits = VT.getScalarSizeInBits();
15555
15556	// FIXME: Support other constants.
15557	ConstantSDNode *CondRHSC = isConstOrConstSplat(N: CondRHS);
15558	if (!CondRHSC \|\| CondRHSC->getAPIntValue() != (`1ULL` << ScalarBits))
15559	return SDValue ();
15560
15561	if (False.getOpcode() != ISD::SIGN_EXTEND)
15562	return SDValue ();
15563
15564	False = False.getOperand(i: `0`);
15565
15566	if (False.getOpcode() != ISD::SETCC \|\| False.getOperand(i: `0`) != True)
15567	return SDValue ();
15568
15569	ConstantSDNode *FalseRHSC = isConstOrConstSplat(N: False.getOperand(i: `1`));
15570	if (!FalseRHSC \|\| !FalseRHSC->isZero())
15571	return SDValue ();
15572
15573	ISD::CondCode CCVal2 = cast<CondCodeSDNode>(Val: False.getOperand(i: `2`))->get();
15574	if (CCVal2 != ISD::SETGT)
15575	return SDValue ();
15576
15577	// Emit the signed to unsigned saturation pattern.
15578	SDLoc DL(N);
15579	SDValue Max =
15580	DAG.getNode(Opcode: ISD::SMAX, DL, VT: SrcVT, N1: True, N2: DAG.getConstant(Val: `0`, DL, VT: SrcVT));
15581	SDValue Min =
15582	DAG.getNode(Opcode: ISD::SMIN, DL, VT: SrcVT, N1: Max,
15583	N2: DAG.getConstant(Val: (`1ULL` << ScalarBits) - `1`, DL, VT: SrcVT));
15584	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Min);
15585	}
15586
15587	static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
15588	const RISCVSubtarget &Subtarget) {
15589	SDValue N0 = N->getOperand(Num: `0`);
15590	EVT VT = N->getValueType(ResNo: `0`);
15591
15592	// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15593	// extending X. This is safe since we only need the LSB after the shift and
15594	// shift amounts larger than 31 would produce poison. If we wait until
15595	// type legalization, we'll create RISCVISD::SRLW and we can't recover it
15596	// to use a BEXT instruction.
15597	if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
15598	N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
15599	!isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) && N0.hasOneUse()) {
15600	SDLoc DL(N0);
15601	SDValue Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: `0`));
15602	SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: `1`));
15603	SDValue Srl = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Op0, N2: Op1);
15604	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc (N), VT, Operand: Srl);
15605	}
15606
15607	return combineTruncSelectToSMaxUSat(N, DAG);
15608	}
15609
15610	// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
15611	// truncation. But RVV doesn't have truncation instructions for more than twice
15612	// the bitwidth.
15613	//
15614	// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
15615	//
15616	// vsetvli a0, zero, e32, m2, ta, ma
15617	// vnsrl.wi v12, v8, 0
15618	// vsetvli zero, zero, e16, m1, ta, ma
15619	// vnsrl.wi v8, v12, 0
15620	// vsetvli zero, zero, e8, mf2, ta, ma
15621	// vnsrl.wi v8, v8, 0
15622	//
15623	// So reverse the combine so we generate an vmseq/vmsne again:
15624	//
15625	// and (lshr (trunc X), ShAmt), 1
15626	// -->
15627	// zext (icmp ne (and X, (1 << ShAmt)), 0)
15628	//
15629	// and (lshr (not (trunc X)), ShAmt), 1
15630	// -->
15631	// zext (icmp eq (and X, (1 << ShAmt)), 0)
15632	static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG,
15633	const RISCVSubtarget &Subtarget) {
15634	using namespace SDPatternMatch;
15635	SDLoc DL(N);
15636
15637	if (!Subtarget.hasVInstructions())
15638	return SDValue ();
15639
15640	EVT VT = N->getValueType(ResNo: `0`);
15641	if (!VT.isVector())
15642	return SDValue ();
15643
15644	APInt ShAmt;
15645	SDValue Inner;
15646	if (!sd_match(N, P: m_And(L: m_OneUse(P: m_Srl(L: m_Value(N&: Inner), R: m_ConstInt(V&: ShAmt))),
15647	R: m_One())))
15648	return SDValue ();
15649
15650	SDValue X;
15651	bool IsNot;
15652	if (sd_match(N: Inner, P: m_Not(V: m_Trunc(Op: m_Value(N&: X)))))
15653	IsNot = true;
15654	else if (sd_match(N: Inner, P: m_Trunc(Op: m_Value(N&: X))))
15655	IsNot = false;
15656	else
15657	return SDValue ();
15658
15659	EVT WideVT = X.getValueType();
15660	if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / `2`)
15661	return SDValue ();
15662
15663	SDValue Res =
15664	DAG.getNode(Opcode: ISD::AND, DL, VT: WideVT, N1: X,
15665	N2: DAG.getConstant(Val: `1ULL` << ShAmt.getZExtValue(), DL, VT: WideVT));
15666	Res = DAG.getSetCC(DL,
15667	VT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
15668	EC: WideVT.getVectorElementCount()),
15669	LHS: Res, RHS: DAG.getConstant(Val: `0`, DL, VT: WideVT),
15670	Cond: IsNot ? ISD::SETEQ : ISD::SETNE);
15671	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT, Operand: Res);
15672	}
15673
15674	static SDValue reduceANDOfAtomicLoad(SDNode *N,
15675	TargetLowering::DAGCombinerInfo &DCI) {
15676	SelectionDAG &DAG = DCI.DAG;
15677	if (N->getOpcode() != ISD::AND)
15678	return SDValue ();
15679
15680	SDValue N0 = N->getOperand(Num: `0`);
15681	if (N0.getOpcode() != ISD::ATOMIC_LOAD)
15682	return SDValue ();
15683	if (!N0.hasOneUse())
15684	return SDValue ();
15685
15686	AtomicSDNode *ALoad = cast<AtomicSDNode>(Val: N0.getNode());
15687	if (isStrongerThanMonotonic(AO: ALoad->getSuccessOrdering()))
15688	return SDValue ();
15689
15690	EVT LoadedVT = ALoad->getMemoryVT();
15691	ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
15692	if (!MaskConst)
15693	return SDValue ();
15694	uint64_t Mask = MaskConst->getZExtValue();
15695	uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(N: LoadedVT.getSizeInBits());
15696	if (Mask != ExpectedMask)
15697	return SDValue ();
15698
15699	SDValue ZextLoad = DAG.getAtomicLoad(
15700	ExtType: ISD::ZEXTLOAD, dl: SDLoc (N), MemVT: ALoad->getMemoryVT(), VT: N->getValueType(ResNo: `0`),
15701	Chain: ALoad->getChain(), Ptr: ALoad->getBasePtr(), MMO: ALoad->getMemOperand());
15702	DCI.CombineTo(N, Res: ZextLoad);
15703	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N0.getNode(), `1`), To: ZextLoad.getValue(R: `1`));
15704	DCI.recursivelyDeleteUnusedNodes(N: N0.getNode());
15705	return SDValue (N, `0`);
15706	}
15707
15708	// Combines two comparison operation and logic operation to one selection
15709	// operation(min, max) and logic operation. Returns new constructed Node if
15710	// conditions for optimization are satisfied.
15711	static SDValue performANDCombine(SDNode *N,
15712	TargetLowering::DAGCombinerInfo &DCI,
15713	const RISCVSubtarget &Subtarget) {
15714	SelectionDAG &DAG = DCI.DAG;
15715
15716	SDValue N0 = N->getOperand(Num: `0`);
15717	// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
15718	// extending X. This is safe since we only need the LSB after the shift and
15719	// shift amounts larger than 31 would produce poison. If we wait until
15720	// type legalization, we'll create RISCVISD::SRLW and we can't recover it
15721	// to use a BEXT instruction.
15722	if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
15723	N->getValueType(ResNo: `0`) == MVT::i32 && isOneConstant(V: N->getOperand(Num: `1`)) &&
15724	N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) &&
15725	N0.hasOneUse()) {
15726	SDLoc DL(N);
15727	SDValue Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: `0`));
15728	SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: `1`));
15729	SDValue Srl = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Op0, N2: Op1);
15730	SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, N1: Srl,
15731	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i64));
15732	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: And);
15733	}
15734
15735	if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
15736	return V;
15737
15738	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15739	return V;
15740	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15741	return V;
15742	if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
15743	return V;
15744
15745	if (DCI.isAfterLegalizeDAG())
15746	if (SDValue V = combineDeMorganOfBoolean(N, DAG))
15747	return V;
15748
15749	// fold (and (select lhs, rhs, cc, -1, y), x) ->
15750	// (select lhs, rhs, cc, x, (and x, y))
15751	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ true, Subtarget);
15752	}
15753
15754	// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
15755	// FIXME: Generalize to other binary operators with same operand.
15756	static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
15757	SelectionDAG &DAG) {
15758	assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
15759
15760	if (N0.getOpcode() != RISCVISD::CZERO_EQZ \|\|
15761	N1.getOpcode() != RISCVISD::CZERO_NEZ \|\|
15762	!N0.hasOneUse() \|\| !N1.hasOneUse())
15763	return SDValue ();
15764
15765	// Should have the same condition.
15766	SDValue Cond = N0.getOperand(i: `1`);
15767	if (Cond != N1.getOperand(i: `1`))
15768	return SDValue ();
15769
15770	SDValue TrueV = N0.getOperand(i: `0`);
15771	SDValue FalseV = N1.getOperand(i: `0`);
15772
15773	if (TrueV.getOpcode() != ISD::XOR \|\| FalseV.getOpcode() != ISD::XOR \|\|
15774	TrueV.getOperand(i: `1`) != FalseV.getOperand(i: `1`) \|\|
15775	!isOneConstant(V: TrueV.getOperand(i: `1`)) \|\|
15776	!TrueV.hasOneUse() \|\| !FalseV.hasOneUse())
15777	return SDValue ();
15778
15779	EVT VT = N->getValueType(ResNo: `0`);
15780	SDLoc DL(N);
15781
15782	SDValue NewN0 = DAG.getNode(Opcode: RISCVISD::CZERO_EQZ, DL, VT, N1: TrueV.getOperand(i: `0`),
15783	N2: Cond);
15784	SDValue NewN1 = DAG.getNode(Opcode: RISCVISD::CZERO_NEZ, DL, VT, N1: FalseV.getOperand(i: `0`),
15785	N2: Cond);
15786	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: NewN0, N2: NewN1);
15787	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewOr, N2: TrueV.getOperand(i: `1`));
15788	}
15789
15790	static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
15791	const RISCVSubtarget &Subtarget) {
15792	SelectionDAG &DAG = DCI.DAG;
15793
15794	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15795	return V;
15796	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15797	return V;
15798
15799	if (DCI.isAfterLegalizeDAG())
15800	if (SDValue V = combineDeMorganOfBoolean(N, DAG))
15801	return V;
15802
15803	// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
15804	// We may be able to pull a common operation out of the true and false value.
15805	SDValue N0 = N->getOperand(Num: `0`);
15806	SDValue N1 = N->getOperand(Num: `1`);
15807	if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
15808	return V;
15809	if (SDValue V = combineOrOfCZERO(N, N0: N1, N1: N0, DAG))
15810	return V;
15811
15812	// fold (or (select cond, 0, y), x) ->
15813	// (select cond, x, (or x, y))
15814	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ false, Subtarget);
15815	}
15816
15817	static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
15818	const RISCVSubtarget &Subtarget) {
15819	SDValue N0 = N->getOperand(Num: `0`);
15820	SDValue N1 = N->getOperand(Num: `1`);
15821
15822	// Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
15823	// (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
15824	// RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
15825	if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
15826	N->getValueType(ResNo: `0`) == MVT::i32 && isAllOnesConstant(V: N1) &&
15827	N0.getOpcode() == ISD::SHL && isAllOnesConstant(V: N0.getOperand(i: `0`)) &&
15828	!isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) && N0.hasOneUse()) {
15829	SDLoc DL(N);
15830	SDValue Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: `0`));
15831	SDValue Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: N0.getOperand(i: `1`));
15832	SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: Op0, N2: Op1);
15833	SDValue And = DAG.getNOT(DL, Val: Shl, VT: MVT::i64);
15834	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: And);
15835	}
15836
15837	// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
15838	// NOTE: Assumes ROL being legal means ROLW is legal.
15839	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15840	if (N0.getOpcode() == RISCVISD::SLLW &&
15841	isAllOnesConstant(V: N1) && isOneConstant(V: N0.getOperand(i: `0`)) &&
15842	TLI.isOperationLegal(Op: ISD::ROTL, VT: MVT::i64)) {
15843	SDLoc DL(N);
15844	return DAG.getNode(Opcode: RISCVISD::ROLW, DL, VT: MVT::i64,
15845	N1: DAG.getConstant(Val: ~`1`, DL, VT: MVT::i64), N2: N0.getOperand(i: `1`));
15846	}
15847
15848	// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
15849	if (N0.getOpcode() == ISD::SETCC && isOneConstant(V: N1) && N0.hasOneUse()) {
15850	auto *ConstN00 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`));
15851	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
15852	if (ConstN00 && CC == ISD::SETLT) {
15853	EVT VT = N0.getValueType();
15854	SDLoc DL(N0);
15855	const APInt &Imm = ConstN00->getAPIntValue();
15856	if ((Imm + `1`).isSignedIntN(N: `12`))
15857	return DAG.getSetCC(DL, VT, LHS: N0.getOperand(i: `1`),
15858	RHS: DAG.getConstant(Val: Imm + `1`, DL, VT), Cond: CC);
15859	}
15860	}
15861
15862	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15863	return V;
15864	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15865	return V;
15866
15867	// fold (xor (select cond, 0, y), x) ->
15868	// (select cond, x, (xor x, y))
15869	return combineSelectAndUseCommutative(N, DAG, /AllOnes/ false, Subtarget);
15870	}
15871
15872	// Try to expand a multiply to a sequence of shifts and add/subs,
15873	// for a machine without native mul instruction.
15874	static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
15875	uint64_t MulAmt) {
15876	SDLoc DL(N);
15877	EVT VT = N->getValueType(ResNo: `0`);
15878	const uint64_t BitWidth = VT.getFixedSizeInBits();
15879
15880	SDValue Result = DAG.getConstant(Val: `0`, DL, VT: N->getValueType(ResNo: `0`));
15881	SDValue N0 = N->getOperand(Num: `0`);
15882
15883	// Find the Non-adjacent form of the multiplier.
15884	for (uint64_t E = MulAmt, I = `0`; E && I < BitWidth; ++I, E >>= `1`) {
15885	if (E & `1`) {
15886	bool IsAdd = (E & `3`) == `1`;
15887	E -= IsAdd ? `1` : -`1`;
15888	SDValue ShiftVal = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N0,
15889	N2: DAG.getShiftAmountConstant(Val: I, VT, DL));
15890	ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
15891	Result = DAG.getNode(Opcode: AddSubOp, DL, VT, N1: Result, N2: ShiftVal);
15892	}
15893	}
15894
15895	return Result;
15896	}
15897
15898	// X (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))*
15899	static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
15900	uint64_t MulAmt) {
15901	uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
15902	ISD::NodeType Op;
15903	uint64_t ShiftAmt1;
15904	if (isPowerOf2_64(Value: MulAmt + MulAmtLowBit)) {
15905	Op = ISD::SUB;
15906	ShiftAmt1 = MulAmt + MulAmtLowBit;
15907	} else if (isPowerOf2_64(Value: MulAmt - MulAmtLowBit)) {
15908	Op = ISD::ADD;
15909	ShiftAmt1 = MulAmt - MulAmtLowBit;
15910	} else {
15911	return SDValue ();
15912	}
15913	EVT VT = N->getValueType(ResNo: `0`);
15914	SDLoc DL(N);
15915	SDValue Shift1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
15916	N2: DAG.getConstant(Val: Log2_64(Value: ShiftAmt1), DL, VT));
15917	SDValue Shift2 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
15918	N2: DAG.getConstant(Val: Log2_64(Value: MulAmtLowBit), DL, VT));
15919	return DAG.getNode(Opcode: Op, DL, VT, N1: Shift1, N2: Shift2);
15920	}
15921
15922	// Try to expand a scalar multiply to a faster sequence.
15923	static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
15924	TargetLowering::DAGCombinerInfo &DCI,
15925	const RISCVSubtarget &Subtarget) {
15926
15927	EVT VT = N->getValueType(ResNo: `0`);
15928
15929	// LI + MUL is usually smaller than the alternative sequence.
15930	if (DAG.getMachineFunction().getFunction().hasMinSize())
15931	return SDValue ();
15932
15933	if (VT != Subtarget.getXLenVT())
15934	return SDValue ();
15935
15936	bool ShouldExpandMul =
15937	(!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) \|\|
15938	!Subtarget.hasStdExtZmmul();
15939	if (!ShouldExpandMul)
15940	return SDValue ();
15941
15942	ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
15943	if (!CNode)
15944	return SDValue ();
15945	uint64_t MulAmt = CNode->getZExtValue();
15946
15947	const bool HasShlAdd = Subtarget.hasStdExtZba() \|\|
15948	Subtarget.hasVendorXTHeadBa() \|\|
15949	Subtarget.hasVendorXAndesPerf();
15950
15951	// WARNING: The code below is knowingly incorrect with regards to undef semantics.
15952	// We're adding additional uses of X here, and in principle, we should be freezing
15953	// X before doing so. However, adding freeze here causes real regressions, and no
15954	// other target properly freezes X in these cases either.
15955	SDValue X = N->getOperand(Num: `0`);
15956
15957	if (HasShlAdd) {
15958	for (uint64_t Divisor : {`3`, `5`, `9`}) {
15959	if (MulAmt % Divisor != `0`)
15960	continue;
15961	uint64_t MulAmt2 = MulAmt / Divisor;
15962	// 3/5/9 2^N -> shl (shXadd X, X), N*
15963	if (isPowerOf2_64(Value: MulAmt2)) {
15964	SDLoc DL(N);
15965	SDValue X = N->getOperand(Num: `0`);
15966	// Put the shift first if we can fold a zext into the
15967	// shift forming a slli.uw.
15968	if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(Val: X.getOperand(i: `1`)) &&
15969	X.getConstantOperandVal(i: `1`) == UINT64_C(`0xffffffff`)) {
15970	SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X,
15971	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2), DL, VT));
15972	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Shl,
15973	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT),
15974	N3: Shl);
15975	}
15976	// Otherwise, put rhe shl second so that it can fold with following
15977	// instructions (e.g. sext or add).
15978	SDValue Mul359 =
15979	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
15980	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT), N3: X);
15981	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Mul359,
15982	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2), DL, VT));
15983	}
15984
15985	// 3/5/9 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)*
15986	if (MulAmt2 == `3` \|\| MulAmt2 == `5` \|\| MulAmt2 == `9`) {
15987	SDLoc DL(N);
15988	SDValue Mul359 =
15989	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
15990	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT), N3: X);
15991	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359,
15992	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt2 - `1`), DL, VT),
15993	N3: Mul359);
15994	}
15995	}
15996
15997	// If this is a power 2 + 2/4/8, we can use a shift followed by a single
15998	// shXadd. First check if this a sum of two power of 2s because that's
15999	// easy. Then count how many zeros are up to the first bit.
16000	if (isPowerOf2_64(Value: MulAmt & (MulAmt - `1`))) {
16001	unsigned ScaleShift = llvm::countr_zero(Val: MulAmt);
16002	if (ScaleShift >= `1` && ScaleShift < `4`) {
16003	unsigned ShiftAmt = Log2_64(Value: (MulAmt & (MulAmt - `1`)));
16004	SDLoc DL(N);
16005	SDValue Shift1 =
16006	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
16007	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
16008	N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: Shift1);
16009	}
16010	}
16011
16012	// 2^(1,2,3) 3,5,9 + 1 -> (shXadd (shYadd x, x), x)*
16013	// This is the two instruction form, there are also three instruction
16014	// variants we could implement. e.g.
16015	// (2^(1,2,3) 3,5,9 + 1) << C2*
16016	// 2^(C1>3) 3,5,9 +/- 1*
16017	for (uint64_t Divisor : {`3`, `5`, `9`}) {
16018	uint64_t C = MulAmt - `1`;
16019	if (C <= Divisor)
16020	continue;
16021	unsigned TZ = llvm::countr_zero(Val: C);
16022	if ((C >> TZ) == Divisor && (TZ == `1` \|\| TZ == `2` \|\| TZ == `3`)) {
16023	SDLoc DL(N);
16024	SDValue Mul359 =
16025	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
16026	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT), N3: X);
16027	return DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359,
16028	N2: DAG.getConstant(Val: TZ, DL, VT), N3: X);
16029	}
16030	}
16031
16032	// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16033	if (MulAmt > `2` && isPowerOf2_64(Value: (MulAmt - `1`) & (MulAmt - `2`))) {
16034	unsigned ScaleShift = llvm::countr_zero(Val: MulAmt - `1`);
16035	if (ScaleShift >= `1` && ScaleShift < `4`) {
16036	unsigned ShiftAmt = Log2_64(Value: ((MulAmt - `1`) & (MulAmt - `2`)));
16037	SDLoc DL(N);
16038	SDValue Shift1 =
16039	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
16040	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shift1,
16041	N2: DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
16042	N2: DAG.getConstant(Val: ScaleShift, DL, VT), N3: X));
16043	}
16044	}
16045
16046	// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16047	for (uint64_t Offset : {`3`, `5`, `9`}) {
16048	if (isPowerOf2_64(Value: MulAmt + Offset)) {
16049	SDLoc DL(N);
16050	SDValue Shift1 =
16051	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X,
16052	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt + Offset), DL, VT));
16053	SDValue Mul359 =
16054	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
16055	N2: DAG.getConstant(Val: Log2_64(Value: Offset - `1`), DL, VT), N3: X);
16056	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Shift1, N2: Mul359);
16057	}
16058	}
16059
16060	for (uint64_t Divisor : {`3`, `5`, `9`}) {
16061	if (MulAmt % Divisor != `0`)
16062	continue;
16063	uint64_t MulAmt2 = MulAmt / Divisor;
16064	// 3/5/9 3/5/9 * 2^N - In particular, this covers multiples*
16065	// of 25 which happen to be quite common.
16066	for (uint64_t Divisor2 : {`3`, `5`, `9`}) {
16067	if (MulAmt2 % Divisor2 != `0`)
16068	continue;
16069	uint64_t MulAmt3 = MulAmt2 / Divisor2;
16070	if (isPowerOf2_64(Value: MulAmt3)) {
16071	SDLoc DL(N);
16072	SDValue Mul359A =
16073	DAG.getNode(Opcode: RISCVISD::SHL_ADD, DL, VT, N1: X,
16074	N2: DAG.getConstant(Val: Log2_64(Value: Divisor - `1`), DL, VT), N3: X);
16075	SDValue Mul359B = DAG.getNode(
16076	Opcode: RISCVISD::SHL_ADD, DL, VT, N1: Mul359A,
16077	N2: DAG.getConstant(Val: Log2_64(Value: Divisor2 - `1`), DL, VT), N3: Mul359A);
16078	return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Mul359B,
16079	N2: DAG.getConstant(Val: Log2_64(Value: MulAmt3), DL, VT));
16080	}
16081	}
16082	}
16083	}
16084
16085	if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16086	return V;
16087
16088	if (!Subtarget.hasStdExtZmmul())
16089	return expandMulToNAFSequence(N, DAG, MulAmt);
16090
16091	return SDValue ();
16092	}
16093
16094	// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16095	// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16096	// Same for other equivalent types with other equivalent constants.
16097	static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) {
16098	EVT VT = N->getValueType(ResNo: `0`);
16099	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16100
16101	// Do this for legal vectors unless they are i1 or i8 vectors.
16102	if (!VT.isVector() \|\| !TLI.isTypeLegal(VT) \|\| VT.getScalarSizeInBits() < `16`)
16103	return SDValue ();
16104
16105	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND \|\|
16106	N->getOperand(Num: `0`).getOperand(i: `0`).getOpcode() != ISD::SRL)
16107	return SDValue ();
16108
16109	SDValue And = N->getOperand(Num: `0`);
16110	SDValue Srl = And.getOperand(i: `0`);
16111
16112	APInt V1, V2, V3;
16113	if (!ISD::isConstantSplatVector(N: N->getOperand(Num: `1`).getNode(), SplatValue&: V1) \|\|
16114	!ISD::isConstantSplatVector(N: And.getOperand(i: `1`).getNode(), SplatValue&: V2) \|\|
16115	!ISD::isConstantSplatVector(N: Srl.getOperand(i: `1`).getNode(), SplatValue&: V3))
16116	return SDValue ();
16117
16118	unsigned HalfSize = VT.getScalarSizeInBits() / `2`;
16119	if (!V1.isMask(numBits: HalfSize) \|\| V2 != (`1ULL` \| `1ULL` << HalfSize) \|\|
16120	V3 != (HalfSize - `1`))
16121	return SDValue ();
16122
16123	EVT HalfVT = EVT::getVectorVT(Context&: *DAG.getContext(),
16124	VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: HalfSize),
16125	EC: VT.getVectorElementCount() * `2`);
16126	SDLoc DL(N);
16127	SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Srl.getOperand(i: `0`));
16128	SDValue Sra = DAG.getNode(Opcode: ISD::SRA, DL, VT: HalfVT, N1: Cast,
16129	N2: DAG.getConstant(Val: HalfSize - `1`, DL, VT: HalfVT));
16130	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Sra);
16131	}
16132
16133	static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
16134	TargetLowering::DAGCombinerInfo &DCI,
16135	const RISCVSubtarget &Subtarget) {
16136	EVT VT = N->getValueType(ResNo: `0`);
16137	if (!VT.isVector())
16138	return expandMul(N, DAG, DCI, Subtarget);
16139
16140	SDLoc DL(N);
16141	SDValue N0 = N->getOperand(Num: `0`);
16142	SDValue N1 = N->getOperand(Num: `1`);
16143	SDValue MulOper;
16144	unsigned AddSubOpc;
16145
16146	// vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16147	// (mul x, add (y, 1)) -> (add x, (mul x, y))
16148	// vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16149	// (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16150	auto IsAddSubWith1 = [&](SDValue V) -> bool {
16151	AddSubOpc = V ->getOpcode();
16152	if ((AddSubOpc == ISD::ADD \|\| AddSubOpc == ISD::SUB) && V ->hasOneUse()) {
16153	SDValue Opnd = V ->getOperand(Num: `1`);
16154	MulOper = V ->getOperand(Num: `0`);
16155	if (AddSubOpc == ISD::SUB)
16156	std::swap(a&: Opnd, b&: MulOper);
16157	if (isOneOrOneSplat(V: Opnd))
16158	return true;
16159	}
16160	return false;
16161	};
16162
16163	if (IsAddSubWith1 (N0)) {
16164	SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1, N2: MulOper);
16165	return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1, N2: MulVal);
16166	}
16167
16168	if (IsAddSubWith1 (N1)) {
16169	SDValue MulVal = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N0, N2: MulOper);
16170	return DAG.getNode(Opcode: AddSubOpc, DL, VT, N1: N0, N2: MulVal);
16171	}
16172
16173	if (SDValue V = combineBinOpOfZExt(N, DAG))
16174	return V;
16175
16176	if (SDValue V = combineVectorMulToSraBitcast(N, DAG))
16177	return V;
16178
16179	return SDValue ();
16180	}
16181
16182	/// According to the property that indexed load/store instructions zero-extend
16183	/// their indices, try to narrow the type of index operand.
16184	static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16185	if (isIndexTypeSigned(IndexType))
16186	return false;
16187
16188	if (!N ->hasOneUse())
16189	return false;
16190
16191	EVT VT = N.getValueType();
16192	SDLoc DL(N);
16193
16194	// In general, what we're doing here is seeing if we can sink a truncate to
16195	// a smaller element type into the expression tree building our index.
16196	// TODO: We can generalize this and handle a bunch more cases if useful.
16197
16198	// Narrow a buildvector to the narrowest element type. This requires less
16199	// work and less register pressure at high LMUL, and creates smaller constants
16200	// which may be cheaper to materialize.
16201	if (ISD::isBuildVectorOfConstantSDNodes(N: N.getNode())) {
16202	KnownBits Known = DAG.computeKnownBits(Op: N);
16203	unsigned ActiveBits = std::max(a: `8u`, b: Known.countMaxActiveBits());
16204	LLVMContext &C = *DAG.getContext();
16205	EVT ResultVT = EVT::getIntegerVT(Context&: C, BitWidth: ActiveBits).getRoundIntegerType(Context&: C);
16206	if (ResultVT.bitsLT(VT: VT.getVectorElementType())) {
16207	N = DAG.getNode(Opcode: ISD::TRUNCATE, DL,
16208	VT: VT.changeVectorElementType(EltVT: ResultVT), Operand: N);
16209	return true;
16210	}
16211	}
16212
16213	// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16214	if (N.getOpcode() != ISD::SHL)
16215	return false;
16216
16217	SDValue N0 = N.getOperand(i: `0`);
16218	if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16219	N0.getOpcode() != RISCVISD::VZEXT_VL)
16220	return false;
16221	if (!N0 ->hasOneUse())
16222	return false;
16223
16224	APInt ShAmt;
16225	SDValue N1 = N.getOperand(i: `1`);
16226	if (!ISD::isConstantSplatVector(N: N1.getNode(), SplatValue&: ShAmt))
16227	return false;
16228
16229	SDValue Src = N0.getOperand(i: `0`);
16230	EVT SrcVT = Src.getValueType();
16231	unsigned SrcElen = SrcVT.getScalarSizeInBits();
16232	unsigned ShAmtV = ShAmt.getZExtValue();
16233	unsigned NewElen = PowerOf2Ceil(A: SrcElen + ShAmtV);
16234	NewElen = std::max(a: NewElen, b: `8U`);
16235
16236	// Skip if NewElen is not narrower than the original extended type.
16237	if (NewElen >= N0.getValueType().getScalarSizeInBits())
16238	return false;
16239
16240	EVT NewEltVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NewElen);
16241	EVT NewVT = SrcVT.changeVectorElementType(EltVT: NewEltVT);
16242
16243	SDValue NewExt = DAG.getNode(Opcode: N0 ->getOpcode(), DL, VT: NewVT, Ops: N0 ->ops());
16244	SDValue NewShAmtVec = DAG.getConstant(Val: ShAmtV, DL, VT: NewVT);
16245	N = DAG.getNode(Opcode: ISD::SHL, DL, VT: NewVT, N1: NewExt, N2: NewShAmtVec);
16246	return true;
16247	}
16248
16249	/// Try to map an integer comparison with size > XLEN to vector instructions
16250	/// before type legalization splits it up into chunks.
16251	static SDValue
16252	combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
16253	const SDLoc &DL, SelectionDAG &DAG,
16254	const RISCVSubtarget &Subtarget) {
16255	assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16256
16257	if (!Subtarget.hasVInstructions())
16258	return SDValue ();
16259
16260	MVT XLenVT = Subtarget.getXLenVT();
16261	EVT OpVT = X.getValueType();
16262	// We're looking for an oversized integer equality comparison.
16263	if (!OpVT.isScalarInteger())
16264	return SDValue ();
16265
16266	unsigned OpSize = OpVT.getSizeInBits();
16267	// The size should be larger than XLen and smaller than the maximum vector
16268	// size.
16269	if (OpSize <= Subtarget.getXLen() \|\|
16270	OpSize > Subtarget.getRealMinVLen() *
16271	Subtarget.getMaxLMULForFixedLengthVectors())
16272	return SDValue ();
16273
16274	// Don't perform this combine if constructing the vector will be expensive.
16275	auto IsVectorBitCastCheap = [](SDValue X) {
16276	X = peekThroughBitcasts(V: X);
16277	return isa<ConstantSDNode>(Val: X) \|\| X.getValueType().isVector() \|\|
16278	X.getOpcode() == ISD::LOAD;
16279	};
16280	if (!IsVectorBitCastCheap (X) \|\| !IsVectorBitCastCheap (Y))
16281	return SDValue ();
16282
16283	if (DAG.getMachineFunction().getFunction().hasFnAttribute(
16284	Kind: Attribute::NoImplicitFloat))
16285	return SDValue ();
16286
16287	// Bail out for non-byte-sized types.
16288	if (!OpVT.isByteSized())
16289	return SDValue ();
16290
16291	unsigned VecSize = OpSize / `8`;
16292	EVT VecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: VecSize);
16293	EVT CmpVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1, NumElements: VecSize);
16294
16295	SDValue VecX = DAG.getBitcast(VT: VecVT, V: X);
16296	SDValue VecY = DAG.getBitcast(VT: VecVT, V: Y);
16297	SDValue Mask = DAG.getAllOnesConstant(DL, VT: CmpVT);
16298	SDValue VL = DAG.getConstant(Val: VecSize, DL, VT: XLenVT);
16299
16300	SDValue Cmp = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: CmpVT, N1: VecX, N2: VecY,
16301	N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: VL);
16302	return DAG.getSetCC(DL, VT,
16303	LHS: DAG.getNode(Opcode: ISD::VP_REDUCE_OR, DL, VT: XLenVT,
16304	N1: DAG.getConstant(Val: `0`, DL, VT: XLenVT), N2: Cmp, N3: Mask,
16305	N4: VL),
16306	RHS: DAG.getConstant(Val: `0`, DL, VT: XLenVT), Cond: CC);
16307	}
16308
16309	// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16310	// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16311	// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
16312	// can become a sext.w instead of a shift pair.
16313	static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
16314	const RISCVSubtarget &Subtarget) {
16315	SDLoc dl(N);
16316	SDValue N0 = N->getOperand(Num: `0`);
16317	SDValue N1 = N->getOperand(Num: `1`);
16318	EVT VT = N->getValueType(ResNo: `0`);
16319	EVT OpVT = N0.getValueType();
16320
16321	ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
16322	// Looking for an equality compare.
16323	if (!isIntEqualitySetCC(Code: Cond))
16324	return SDValue ();
16325
16326	if (SDValue V =
16327	combineVectorSizedSetCCEquality(VT, X: N0, Y: N1, CC: Cond, DL: dl, DAG, Subtarget))
16328	return V;
16329
16330	if (OpVT != MVT::i64 \|\| !Subtarget.is64Bit())
16331	return SDValue ();
16332
16333	// RHS needs to be a constant.
16334	auto *N1C = dyn_cast<ConstantSDNode>(Val&: N1);
16335	if (!N1C)
16336	return SDValue ();
16337
16338	// LHS needs to be (and X, 0xffffffff).
16339	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse() \|\|
16340	!isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) \|\|
16341	N0.getConstantOperandVal(i: `1`) != UINT64_C(`0xffffffff`))
16342	return SDValue ();
16343
16344	// Don't do this if the sign bit is provably zero, it will be turned back into
16345	// an AND.
16346	APInt SignMask = APInt::getOneBitSet(numBits: `64`, BitNo: `31`);
16347	if (DAG.MaskedValueIsZero(Op: N0.getOperand(i: `0`), Mask: SignMask))
16348	return SDValue ();
16349
16350	const APInt &C1 = N1C->getAPIntValue();
16351
16352	// If the constant is larger than 2^32 - 1 it is impossible for both sides
16353	// to be equal.
16354	if (C1.getActiveBits() > `32`)
16355	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
16356
16357	SDValue SExtOp = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: N, VT: OpVT,
16358	N1: N0.getOperand(i: `0`), N2: DAG.getValueType(MVT::i32));
16359	return DAG.getSetCC(DL: dl, VT, LHS: SExtOp, RHS: DAG.getConstant(Val: C1.trunc(width: `32`).sext(width: `64`),
16360	DL: dl, VT: OpVT), Cond);
16361	}
16362
16363	static SDValue
16364	performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
16365	const RISCVSubtarget &Subtarget) {
16366	SDValue Src = N->getOperand(Num: `0`);
16367	EVT VT = N->getValueType(ResNo: `0`);
16368	EVT SrcVT = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT();
16369	unsigned Opc = Src.getOpcode();
16370
16371	// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16372	// Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16373	if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(VT: MVT::i16) &&
16374	Subtarget.hasStdExtZfhmin())
16375	return DAG.getNode(Opcode: RISCVISD::FMV_X_SIGNEXTH, DL: SDLoc (N), VT,
16376	Operand: Src.getOperand(i: `0`));
16377
16378	// Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16379	if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16380	VT == MVT::i64 && !isa<ConstantSDNode>(Val: Src.getOperand(i: `1`)) &&
16381	DAG.computeKnownBits(Op: Src.getOperand(i: `1`)).countMaxActiveBits() <= `5`)
16382	return DAG.getNode(Opcode: RISCVISD::SLLW, DL: SDLoc (N), VT, N1: Src.getOperand(i: `0`),
16383	N2: Src.getOperand(i: `1`));
16384
16385	return SDValue ();
16386	}
16387
16388	namespace {
16389	// Forward declaration of the structure holding the necessary information to
16390	// apply a combine.
16391	struct CombineResult;
16392
16393	enum ExtKind : uint8_t {
16394	ZExt = `1` << `0`,
16395	SExt = `1` << `1`,
16396	FPExt = `1` << `2`,
16397	BF16Ext = `1` << `3`
16398	};
16399	/// Helper class for folding sign/zero extensions.
16400	/// In particular, this class is used for the following combines:
16401	/// add \| add_vl \| or disjoint -> vwadd(u) \| vwadd(u)_w
16402	/// sub \| sub_vl -> vwsub(u) \| vwsub(u)_w
16403	/// mul \| mul_vl -> vwmul(u) \| vwmul_su
16404	/// shl \| shl_vl -> vwsll
16405	/// fadd -> vfwadd \| vfwadd_w
16406	/// fsub -> vfwsub \| vfwsub_w
16407	/// fmul -> vfwmul
16408	/// An object of this class represents an operand of the operation we want to
16409	/// combine.
16410	/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16411	/// NodeExtensionHelper for `a` and one for `b`.
16412	///
16413	/// This class abstracts away how the extension is materialized and
16414	/// how its number of users affect the combines.
16415	///
16416	/// In particular:
16417	/// - VWADD_W is conceptually == add(op0, sext(op1))
16418	/// - VWADDU_W == add(op0, zext(op1))
16419	/// - VWSUB_W == sub(op0, sext(op1))
16420	/// - VWSUBU_W == sub(op0, zext(op1))
16421	/// - VFWADD_W == fadd(op0, fpext(op1))
16422	/// - VFWSUB_W == fsub(op0, fpext(op1))
16423	/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16424	/// zext\|sext(smaller_value).
16425	struct NodeExtensionHelper {
16426	/// Records if this operand is like being zero extended.
16427	bool SupportsZExt;
16428	/// Records if this operand is like being sign extended.
16429	/// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16430	/// instance, a splat constant (e.g., 3), would support being both sign and
16431	/// zero extended.
16432	bool SupportsSExt;
16433	/// Records if this operand is like being floating point extended.
16434	bool SupportsFPExt;
16435	/// Records if this operand is extended from bf16.
16436	bool SupportsBF16Ext;
16437	/// This boolean captures whether we care if this operand would still be
16438	/// around after the folding happens.
16439	bool EnforceOneUse;
16440	/// Original value that this NodeExtensionHelper represents.
16441	SDValue OrigOperand;
16442
16443	/// Get the value feeding the extension or the value itself.
16444	/// E.g., for zext(a), this would return a.
16445	SDValue getSource() const {
16446	switch (OrigOperand.getOpcode()) {
16447	case ISD::ZERO_EXTEND:
16448	case ISD::SIGN_EXTEND:
16449	case RISCVISD::VSEXT_VL:
16450	case RISCVISD::VZEXT_VL:
16451	case RISCVISD::FP_EXTEND_VL:
16452	return OrigOperand.getOperand(i: `0`);
16453	default:
16454	return OrigOperand;
16455	}
16456	}
16457
16458	/// Check if this instance represents a splat.
16459	bool isSplat() const {
16460	return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL \|\|
16461	OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16462	}
16463
16464	/// Get the extended opcode.
16465	unsigned getExtOpc(ExtKind SupportsExt) const {
16466	switch (SupportsExt) {
16467	case ExtKind::SExt:
16468	return RISCVISD::VSEXT_VL;
16469	case ExtKind::ZExt:
16470	return RISCVISD::VZEXT_VL;
16471	case ExtKind::FPExt:
16472	case ExtKind::BF16Ext:
16473	return RISCVISD::FP_EXTEND_VL;
16474	}
16475	llvm_unreachable("Unknown ExtKind enum");
16476	}
16477
16478	/// Get or create a value that can feed \p Root with the given extension \p
16479	/// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16480	/// operand. \see ::getSource().
16481	SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16482	const RISCVSubtarget &Subtarget,
16483	std::optional<ExtKind> SupportsExt) const {
16484	if (!SupportsExt.has_value())
16485	return OrigOperand;
16486
16487	MVT NarrowVT = getNarrowType(Root, SupportsExt: *SupportsExt);
16488
16489	SDValue Source = getSource();
16490	assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16491	if (Source.getValueType() == NarrowVT)
16492	return Source;
16493
16494	unsigned ExtOpc = getExtOpc(SupportsExt: *SupportsExt);
16495
16496	// If we need an extension, we should be changing the type.
16497	SDLoc DL(OrigOperand);
16498	auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
16499	switch (OrigOperand.getOpcode()) {
16500	case ISD::ZERO_EXTEND:
16501	case ISD::SIGN_EXTEND:
16502	case RISCVISD::VSEXT_VL:
16503	case RISCVISD::VZEXT_VL:
16504	case RISCVISD::FP_EXTEND_VL:
16505	return DAG.getNode(Opcode: ExtOpc, DL, VT: NarrowVT, N1: Source, N2: Mask, N3: VL);
16506	case ISD::SPLAT_VECTOR:
16507	return DAG.getSplat(VT: NarrowVT, DL, Op: Source.getOperand(i: `0`));
16508	case RISCVISD::VMV_V_X_VL:
16509	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT: NarrowVT,
16510	N1: DAG.getUNDEF(VT: NarrowVT), N2: Source.getOperand(i: `1`), N3: VL);
16511	case RISCVISD::VFMV_V_F_VL:
16512	Source = Source.getOperand(i: `1`);
16513	assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
16514	Source = Source.getOperand(i: `0`);
16515	assert(Source.getValueType() == NarrowVT.getVectorElementType());
16516	return DAG.getNode(Opcode: RISCVISD::VFMV_V_F_VL, DL, VT: NarrowVT,
16517	N1: DAG.getUNDEF(VT: NarrowVT), N2: Source, N3: VL);
16518	default:
16519	// Other opcodes can only come from the original LHS of VW(ADD\|SUB)_W_VL
16520	// and that operand should already have the right NarrowVT so no
16521	// extension should be required at this point.
16522	llvm_unreachable("Unsupported opcode");
16523	}
16524	}
16525
16526	/// Helper function to get the narrow type for \p Root.
16527	/// The narrow type is the type of \p Root where we divided the size of each
16528	/// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
16529	/// \pre Both the narrow type and the original type should be legal.
16530	static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
16531	MVT VT = Root->getSimpleValueType(ResNo: `0`);
16532
16533	// Determine the narrow size.
16534	unsigned NarrowSize = VT.getScalarSizeInBits() / `2`;
16535
16536	MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
16537	: SupportsExt == ExtKind::FPExt
16538	? MVT::getFloatingPointVT(BitWidth: NarrowSize)
16539	: MVT::getIntegerVT(BitWidth: NarrowSize);
16540
16541	assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? `16` : `8`) &&
16542	"Trying to extend something we can't represent");
16543	MVT NarrowVT = MVT::getVectorVT(VT: EltVT, EC: VT.getVectorElementCount());
16544	return NarrowVT;
16545	}
16546
16547	/// Get the opcode to materialize:
16548	/// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
16549	static unsigned getSExtOpcode(unsigned Opcode) {
16550	switch (Opcode) {
16551	case ISD::ADD:
16552	case RISCVISD::ADD_VL:
16553	case RISCVISD::VWADD_W_VL:
16554	case RISCVISD::VWADDU_W_VL:
16555	case ISD::OR:
16556	case RISCVISD::OR_VL:
16557	return RISCVISD::VWADD_VL;
16558	case ISD::SUB:
16559	case RISCVISD::SUB_VL:
16560	case RISCVISD::VWSUB_W_VL:
16561	case RISCVISD::VWSUBU_W_VL:
16562	return RISCVISD::VWSUB_VL;
16563	case ISD::MUL:
16564	case RISCVISD::MUL_VL:
16565	return RISCVISD::VWMUL_VL;
16566	default:
16567	llvm_unreachable("Unexpected opcode");
16568	}
16569	}
16570
16571	/// Get the opcode to materialize:
16572	/// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
16573	static unsigned getZExtOpcode(unsigned Opcode) {
16574	switch (Opcode) {
16575	case ISD::ADD:
16576	case RISCVISD::ADD_VL:
16577	case RISCVISD::VWADD_W_VL:
16578	case RISCVISD::VWADDU_W_VL:
16579	case ISD::OR:
16580	case RISCVISD::OR_VL:
16581	return RISCVISD::VWADDU_VL;
16582	case ISD::SUB:
16583	case RISCVISD::SUB_VL:
16584	case RISCVISD::VWSUB_W_VL:
16585	case RISCVISD::VWSUBU_W_VL:
16586	return RISCVISD::VWSUBU_VL;
16587	case ISD::MUL:
16588	case RISCVISD::MUL_VL:
16589	return RISCVISD::VWMULU_VL;
16590	case ISD::SHL:
16591	case RISCVISD::SHL_VL:
16592	return RISCVISD::VWSLL_VL;
16593	default:
16594	llvm_unreachable("Unexpected opcode");
16595	}
16596	}
16597
16598	/// Get the opcode to materialize:
16599	/// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
16600	static unsigned getFPExtOpcode(unsigned Opcode) {
16601	switch (Opcode) {
16602	case RISCVISD::FADD_VL:
16603	case RISCVISD::VFWADD_W_VL:
16604	return RISCVISD::VFWADD_VL;
16605	case RISCVISD::FSUB_VL:
16606	case RISCVISD::VFWSUB_W_VL:
16607	return RISCVISD::VFWSUB_VL;
16608	case RISCVISD::FMUL_VL:
16609	return RISCVISD::VFWMUL_VL;
16610	case RISCVISD::VFMADD_VL:
16611	return RISCVISD::VFWMADD_VL;
16612	case RISCVISD::VFMSUB_VL:
16613	return RISCVISD::VFWMSUB_VL;
16614	case RISCVISD::VFNMADD_VL:
16615	return RISCVISD::VFWNMADD_VL;
16616	case RISCVISD::VFNMSUB_VL:
16617	return RISCVISD::VFWNMSUB_VL;
16618	default:
16619	llvm_unreachable("Unexpected opcode");
16620	}
16621	}
16622
16623	/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
16624	/// newOpcode(a, b).
16625	static unsigned getSUOpcode(unsigned Opcode) {
16626	assert((Opcode == RISCVISD::MUL_VL \|\| Opcode == ISD::MUL) &&
16627	"SU is only supported for MUL");
16628	return RISCVISD::VWMULSU_VL;
16629	}
16630
16631	/// Get the opcode to materialize
16632	/// \p Opcode(a, s\|z\|fpext(b)) -> newOpcode(a, b).
16633	static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
16634	switch (Opcode) {
16635	case ISD::ADD:
16636	case RISCVISD::ADD_VL:
16637	case ISD::OR:
16638	case RISCVISD::OR_VL:
16639	return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
16640	: RISCVISD::VWADDU_W_VL;
16641	case ISD::SUB:
16642	case RISCVISD::SUB_VL:
16643	return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
16644	: RISCVISD::VWSUBU_W_VL;
16645	case RISCVISD::FADD_VL:
16646	return RISCVISD::VFWADD_W_VL;
16647	case RISCVISD::FSUB_VL:
16648	return RISCVISD::VFWSUB_W_VL;
16649	default:
16650	llvm_unreachable("Unexpected opcode");
16651	}
16652	}
16653
16654	using CombineToTry = std::function<std::optional<CombineResult>(
16655	SDNode * /Root/, const NodeExtensionHelper & /LHS/,
16656	const NodeExtensionHelper & /RHS/, SelectionDAG &,
16657	const RISCVSubtarget &)>;
16658
16659	/// Check if this node needs to be fully folded or extended for all users.
16660	bool needToPromoteOtherUsers() const { return EnforceOneUse; }
16661
16662	void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
16663	const RISCVSubtarget &Subtarget) {
16664	unsigned Opc = OrigOperand.getOpcode();
16665	MVT VT = OrigOperand.getSimpleValueType();
16666
16667	assert((Opc == ISD::SPLAT_VECTOR \|\| Opc == RISCVISD::VMV_V_X_VL) &&
16668	"Unexpected Opcode");
16669
16670	// The pasthru must be undef for tail agnostic.
16671	if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(i: `0`).isUndef())
16672	return;
16673
16674	// Get the scalar value.
16675	SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(i: `0`)
16676	: OrigOperand.getOperand(i: `1`);
16677
16678	// See if we have enough sign bits or zero bits in the scalar to use a
16679	// widening opcode by splatting to smaller element size.
16680	unsigned EltBits = VT.getScalarSizeInBits();
16681	unsigned ScalarBits = Op.getValueSizeInBits();
16682	// If we're not getting all bits from the element, we need special handling.
16683	if (ScalarBits < EltBits) {
16684	// This should only occur on RV32.
16685	assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == `64` && ScalarBits == `32` &&
16686	!Subtarget.is64Bit() && "Unexpected splat");
16687	// vmv.v.x sign extends narrow inputs.
16688	SupportsSExt = true;
16689
16690	// If the input is positive, then sign extend is also zero extend.
16691	if (DAG.SignBitIsZero(Op))
16692	SupportsZExt = true;
16693
16694	EnforceOneUse = false;
16695	return;
16696	}
16697
16698	unsigned NarrowSize = EltBits / `2`;
16699	// If the narrow type cannot be expressed with a legal VMV,
16700	// this is not a valid candidate.
16701	if (NarrowSize < `8`)
16702	return;
16703
16704	if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
16705	SupportsSExt = true;
16706
16707	if (DAG.MaskedValueIsZero(Op,
16708	Mask: APInt::getBitsSetFrom(numBits: ScalarBits, loBit: NarrowSize)))
16709	SupportsZExt = true;
16710
16711	EnforceOneUse = false;
16712	}
16713
16714	bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
16715	return (NarrowEltVT == MVT::f32 \|\|
16716	(NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
16717	}
16718
16719	bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
16720	return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
16721	}
16722
16723	/// Helper method to set the various fields of this struct based on the
16724	/// type of \p Root.
16725	void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
16726	const RISCVSubtarget &Subtarget) {
16727	SupportsZExt = false;
16728	SupportsSExt = false;
16729	SupportsFPExt = false;
16730	SupportsBF16Ext = false;
16731	EnforceOneUse = true;
16732	unsigned Opc = OrigOperand.getOpcode();
16733	// For the nodes we handle below, we end up using their inputs directly: see
16734	// getSource(). However since they either don't have a passthru or we check
16735	// that their passthru is undef, we can safely ignore their mask and VL.
16736	switch (Opc) {
16737	case ISD::ZERO_EXTEND:
16738	case ISD::SIGN_EXTEND: {
16739	MVT VT = OrigOperand.getSimpleValueType();
16740	if (!VT.isVector())
16741	break;
16742
16743	SDValue NarrowElt = OrigOperand.getOperand(i: `0`);
16744	MVT NarrowVT = NarrowElt.getSimpleValueType();
16745	// i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
16746	if (NarrowVT.getVectorElementType() == MVT::i1)
16747	break;
16748
16749	SupportsZExt = Opc == ISD::ZERO_EXTEND;
16750	SupportsSExt = Opc == ISD::SIGN_EXTEND;
16751	break;
16752	}
16753	case RISCVISD::VZEXT_VL:
16754	SupportsZExt = true;
16755	break;
16756	case RISCVISD::VSEXT_VL:
16757	SupportsSExt = true;
16758	break;
16759	case RISCVISD::FP_EXTEND_VL: {
16760	MVT NarrowEltVT =
16761	OrigOperand.getOperand(i: `0`).getSimpleValueType().getVectorElementType();
16762	if (isSupportedFPExtend(NarrowEltVT, Subtarget))
16763	SupportsFPExt = true;
16764	if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
16765	SupportsBF16Ext = true;
16766
16767	break;
16768	}
16769	case ISD::SPLAT_VECTOR:
16770	case RISCVISD::VMV_V_X_VL:
16771	fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
16772	break;
16773	case RISCVISD::VFMV_V_F_VL: {
16774	MVT VT = OrigOperand.getSimpleValueType();
16775
16776	if (!OrigOperand.getOperand(i: `0`).isUndef())
16777	break;
16778
16779	SDValue Op = OrigOperand.getOperand(i: `1`);
16780	if (Op.getOpcode() != ISD::FP_EXTEND)
16781	break;
16782
16783	unsigned NarrowSize = VT.getScalarSizeInBits() / `2`;
16784	unsigned ScalarBits = Op.getOperand(i: `0`).getValueSizeInBits();
16785	if (NarrowSize != ScalarBits)
16786	break;
16787
16788	if (isSupportedFPExtend(NarrowEltVT: Op.getOperand(i: `0`).getSimpleValueType(), Subtarget))
16789	SupportsFPExt = true;
16790	if (isSupportedBF16Extend(NarrowEltVT: Op.getOperand(i: `0`).getSimpleValueType(),
16791	Subtarget))
16792	SupportsBF16Ext = true;
16793	break;
16794	}
16795	default:
16796	break;
16797	}
16798	}
16799
16800	/// Check if \p Root supports any extension folding combines.
16801	static bool isSupportedRoot(const SDNode *Root,
16802	const RISCVSubtarget &Subtarget) {
16803	switch (Root->getOpcode()) {
16804	case ISD::ADD:
16805	case ISD::SUB:
16806	case ISD::MUL: {
16807	return Root->getValueType(ResNo: `0`).isScalableVector();
16808	}
16809	case ISD::OR: {
16810	return Root->getValueType(ResNo: `0`).isScalableVector() &&
16811	Root->getFlags().hasDisjoint();
16812	}
16813	// Vector Widening Integer Add/Sub/Mul Instructions
16814	case RISCVISD::ADD_VL:
16815	case RISCVISD::MUL_VL:
16816	case RISCVISD::VWADD_W_VL:
16817	case RISCVISD::VWADDU_W_VL:
16818	case RISCVISD::SUB_VL:
16819	case RISCVISD::VWSUB_W_VL:
16820	case RISCVISD::VWSUBU_W_VL:
16821	// Vector Widening Floating-Point Add/Sub/Mul Instructions
16822	case RISCVISD::FADD_VL:
16823	case RISCVISD::FSUB_VL:
16824	case RISCVISD::FMUL_VL:
16825	case RISCVISD::VFWADD_W_VL:
16826	case RISCVISD::VFWSUB_W_VL:
16827	return true;
16828	case RISCVISD::OR_VL:
16829	return Root->getFlags().hasDisjoint();
16830	case ISD::SHL:
16831	return Root->getValueType(ResNo: `0`).isScalableVector() &&
16832	Subtarget.hasStdExtZvbb();
16833	case RISCVISD::SHL_VL:
16834	return Subtarget.hasStdExtZvbb();
16835	case RISCVISD::VFMADD_VL:
16836	case RISCVISD::VFNMSUB_VL:
16837	case RISCVISD::VFNMADD_VL:
16838	case RISCVISD::VFMSUB_VL:
16839	return true;
16840	default:
16841	return false;
16842	}
16843	}
16844
16845	/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
16846	NodeExtensionHelper(SDNode Root, unsigned* OperandIdx, SelectionDAG &DAG,
16847	const RISCVSubtarget &Subtarget) {
16848	assert(isSupportedRoot(Root, Subtarget) &&
16849	"Trying to build an helper with an "
16850	"unsupported root");
16851	assert(OperandIdx < `2` && "Requesting something else than LHS or RHS");
16852	assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(`0`)));
16853	OrigOperand = Root->getOperand(Num: OperandIdx);
16854
16855	unsigned Opc = Root->getOpcode();
16856	switch (Opc) {
16857	// We consider
16858	// VW<ADD\|SUB>_W(LHS, RHS) -> <ADD\|SUB>(LHS, SEXT(RHS))
16859	// VW<ADD\|SUB>U_W(LHS, RHS) -> <ADD\|SUB>(LHS, ZEXT(RHS))
16860	// VFW<ADD\|SUB>_W(LHS, RHS) -> F<ADD\|SUB>(LHS, FPEXT(RHS))
16861	case RISCVISD::VWADD_W_VL:
16862	case RISCVISD::VWADDU_W_VL:
16863	case RISCVISD::VWSUB_W_VL:
16864	case RISCVISD::VWSUBU_W_VL:
16865	case RISCVISD::VFWADD_W_VL:
16866	case RISCVISD::VFWSUB_W_VL:
16867	if (OperandIdx == `1`) {
16868	SupportsZExt =
16869	Opc == RISCVISD::VWADDU_W_VL \|\| Opc == RISCVISD::VWSUBU_W_VL;
16870	SupportsSExt =
16871	Opc == RISCVISD::VWADD_W_VL \|\| Opc == RISCVISD::VWSUB_W_VL;
16872	SupportsFPExt =
16873	Opc == RISCVISD::VFWADD_W_VL \|\| Opc == RISCVISD::VFWSUB_W_VL;
16874	// There's no existing extension here, so we don't have to worry about
16875	// making sure it gets removed.
16876	EnforceOneUse = false;
16877	break;
16878	}
16879	[[fallthrough]];
16880	default:
16881	fillUpExtensionSupport(Root, DAG, Subtarget);
16882	break;
16883	}
16884	}
16885
16886	/// Helper function to get the Mask and VL from \p Root.
16887	static std::pair<SDValue, SDValue>
16888	getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
16889	const RISCVSubtarget &Subtarget) {
16890	assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
16891	switch (Root->getOpcode()) {
16892	case ISD::ADD:
16893	case ISD::SUB:
16894	case ISD::MUL:
16895	case ISD::OR:
16896	case ISD::SHL: {
16897	SDLoc DL(Root);
16898	MVT VT = Root->getSimpleValueType(ResNo: `0`);
16899	return getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
16900	}
16901	default:
16902	return std::make_pair(x: Root->getOperand(Num: `3`), y: Root->getOperand(Num: `4`));
16903	}
16904	}
16905
16906	/// Helper function to check if \p N is commutative with respect to the
16907	/// foldings that are supported by this class.
16908	static bool isCommutative(const SDNode *N) {
16909	switch (N->getOpcode()) {
16910	case ISD::ADD:
16911	case ISD::MUL:
16912	case ISD::OR:
16913	case RISCVISD::ADD_VL:
16914	case RISCVISD::MUL_VL:
16915	case RISCVISD::OR_VL:
16916	case RISCVISD::VWADD_W_VL:
16917	case RISCVISD::VWADDU_W_VL:
16918	case RISCVISD::FADD_VL:
16919	case RISCVISD::FMUL_VL:
16920	case RISCVISD::VFWADD_W_VL:
16921	case RISCVISD::VFMADD_VL:
16922	case RISCVISD::VFNMSUB_VL:
16923	case RISCVISD::VFNMADD_VL:
16924	case RISCVISD::VFMSUB_VL:
16925	return true;
16926	case ISD::SUB:
16927	case RISCVISD::SUB_VL:
16928	case RISCVISD::VWSUB_W_VL:
16929	case RISCVISD::VWSUBU_W_VL:
16930	case RISCVISD::FSUB_VL:
16931	case RISCVISD::VFWSUB_W_VL:
16932	case ISD::SHL:
16933	case RISCVISD::SHL_VL:
16934	return false;
16935	default:
16936	llvm_unreachable("Unexpected opcode");
16937	}
16938	}
16939
16940	/// Get a list of combine to try for folding extensions in \p Root.
16941	/// Note that each returned CombineToTry function doesn't actually modify
16942	/// anything. Instead they produce an optional CombineResult that if not None,
16943	/// need to be materialized for the combine to be applied.
16944	/// \see CombineResult::materialize.
16945	/// If the related CombineToTry function returns std::nullopt, that means the
16946	/// combine didn't match.
16947	static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
16948	};
16949
16950	/// Helper structure that holds all the necessary information to materialize a
16951	/// combine that does some extension folding.
16952	struct CombineResult {
16953	/// Opcode to be generated when materializing the combine.
16954	unsigned TargetOpcode;
16955	// No value means no extension is needed.
16956	std::optional<ExtKind> LHSExt;
16957	std::optional<ExtKind> RHSExt;
16958	/// Root of the combine.
16959	SDNode *Root;
16960	/// LHS of the TargetOpcode.
16961	NodeExtensionHelper LHS;
16962	/// RHS of the TargetOpcode.
16963	NodeExtensionHelper RHS;
16964
16965	CombineResult(unsigned TargetOpcode, SDNode *Root,
16966	const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
16967	const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
16968	: TargetOpcode(TargetOpcode), LHSExt (LHSExt), RHSExt (RHSExt), Root(Root),
16969	LHS (LHS), RHS (RHS) {}
16970
16971	/// Return a value that uses TargetOpcode and that can be used to replace
16972	/// Root.
16973	/// The actual replacement is not* done in that method.*
16974	SDValue materialize(SelectionDAG &DAG,
16975	const RISCVSubtarget &Subtarget) const {
16976	SDValue Mask, VL, Passthru;
16977	std::tie(args&: Mask, args&: VL) =
16978	NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
16979	switch (Root->getOpcode()) {
16980	default:
16981	Passthru = Root->getOperand(Num: `2`);
16982	break;
16983	case ISD::ADD:
16984	case ISD::SUB:
16985	case ISD::MUL:
16986	case ISD::OR:
16987	case ISD::SHL:
16988	Passthru = DAG.getUNDEF(VT: Root->getValueType(ResNo: `0`));
16989	break;
16990	}
16991	return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc (Root), VT: Root->getValueType(ResNo: `0`),
16992	N1: LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: LHSExt),
16993	N2: RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SupportsExt: RHSExt),
16994	N3: Passthru, N4: Mask, N5: VL);
16995	}
16996	};
16997
16998	/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
16999	/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17000	/// are zext) and LHS and RHS can be folded into Root.
17001	/// AllowExtMask define which form `ext` can take in this pattern.
17002	///
17003	/// \note If the pattern can match with both zext and sext, the returned
17004	/// CombineResult will feature the zext result.
17005	///
17006	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17007	/// can be used to apply the pattern.
17008	static std::optional<CombineResult>
17009	canFoldToVWWithSameExtensionImpl(SDNode Root, const* NodeExtensionHelper &LHS,
17010	const NodeExtensionHelper &RHS,
17011	uint8_t AllowExtMask, SelectionDAG &DAG,
17012	const RISCVSubtarget &Subtarget) {
17013	if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17014	return CombineResult (NodeExtensionHelper::getZExtOpcode(Opcode: Root->getOpcode()),
17015	Root, LHS, /LHSExt=/{ExtKind::ZExt}, RHS,
17016	/RHSExt=/{ExtKind::ZExt});
17017	if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17018	return CombineResult (NodeExtensionHelper::getSExtOpcode(Opcode: Root->getOpcode()),
17019	Root, LHS, /LHSExt=/{ExtKind::SExt}, RHS,
17020	/RHSExt=/{ExtKind::SExt});
17021	if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17022	return CombineResult (NodeExtensionHelper::getFPExtOpcode(Opcode: Root->getOpcode()),
17023	Root, LHS, /LHSExt=/{ExtKind::FPExt}, RHS,
17024	/RHSExt=/{ExtKind::FPExt});
17025	if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17026	RHS.SupportsBF16Ext)
17027	return CombineResult (NodeExtensionHelper::getFPExtOpcode(Opcode: Root->getOpcode()),
17028	Root, LHS, /LHSExt=/{ExtKind::BF16Ext}, RHS,
17029	/RHSExt=/{ExtKind::BF16Ext});
17030	return std::nullopt;
17031	}
17032
17033	/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17034	/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17035	/// are zext) and LHS and RHS can be folded into Root.
17036	///
17037	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17038	/// can be used to apply the pattern.
17039	static std::optional<CombineResult>
17040	canFoldToVWWithSameExtension(SDNode Root, const* NodeExtensionHelper &LHS,
17041	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17042	const RISCVSubtarget &Subtarget) {
17043	return canFoldToVWWithSameExtensionImpl(
17044	Root, LHS, RHS, AllowExtMask: ExtKind::ZExt \| ExtKind::SExt \| ExtKind::FPExt, DAG,
17045	Subtarget);
17046	}
17047
17048	/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17049	///
17050	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17051	/// can be used to apply the pattern.
17052	static std::optional<CombineResult>
17053	canFoldToVW_W(SDNode Root, const* NodeExtensionHelper &LHS,
17054	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17055	const RISCVSubtarget &Subtarget) {
17056	if (RHS.SupportsFPExt)
17057	return CombineResult (
17058	NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::FPExt),
17059	Root, LHS, /LHSExt=/std::nullopt, RHS, /RHSExt=/{ExtKind::FPExt});
17060
17061	// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17062	// sext/zext?
17063	// Control this behavior behind an option (AllowSplatInVW_W) for testing
17064	// purposes.
17065	if (RHS.SupportsZExt && (!RHS.isSplat() \|\| AllowSplatInVW_W))
17066	return CombineResult (
17067	NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::ZExt), Root,
17068	LHS, /LHSExt=/std::nullopt, RHS, /RHSExt=/{ExtKind::ZExt});
17069	if (RHS.SupportsSExt && (!RHS.isSplat() \|\| AllowSplatInVW_W))
17070	return CombineResult (
17071	NodeExtensionHelper::getWOpcode(Opcode: Root->getOpcode(), SupportsExt: ExtKind::SExt), Root,
17072	LHS, /LHSExt=/std::nullopt, RHS, /RHSExt=/{ExtKind::SExt});
17073	return std::nullopt;
17074	}
17075
17076	/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
17077	///
17078	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17079	/// can be used to apply the pattern.
17080	static std::optional<CombineResult>
17081	canFoldToVWWithSEXT(SDNode Root, const* NodeExtensionHelper &LHS,
17082	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17083	const RISCVSubtarget &Subtarget) {
17084	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::SExt, DAG,
17085	Subtarget);
17086	}
17087
17088	/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17089	///
17090	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17091	/// can be used to apply the pattern.
17092	static std::optional<CombineResult>
17093	canFoldToVWWithZEXT(SDNode Root, const* NodeExtensionHelper &LHS,
17094	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17095	const RISCVSubtarget &Subtarget) {
17096	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::ZExt, DAG,
17097	Subtarget);
17098	}
17099
17100	/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
17101	///
17102	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17103	/// can be used to apply the pattern.
17104	static std::optional<CombineResult>
17105	canFoldToVWWithFPEXT(SDNode Root, const* NodeExtensionHelper &LHS,
17106	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17107	const RISCVSubtarget &Subtarget) {
17108	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::FPExt, DAG,
17109	Subtarget);
17110	}
17111
17112	/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17113	///
17114	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17115	/// can be used to apply the pattern.
17116	static std::optional<CombineResult>
17117	canFoldToVWWithBF16EXT(SDNode Root, const* NodeExtensionHelper &LHS,
17118	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17119	const RISCVSubtarget &Subtarget) {
17120	return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, AllowExtMask: ExtKind::BF16Ext, DAG,
17121	Subtarget);
17122	}
17123
17124	/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17125	///
17126	/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17127	/// can be used to apply the pattern.
17128	static std::optional<CombineResult>
17129	canFoldToVW_SU(SDNode Root, const* NodeExtensionHelper &LHS,
17130	const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17131	const RISCVSubtarget &Subtarget) {
17132
17133	if (!LHS.SupportsSExt \|\| !RHS.SupportsZExt)
17134	return std::nullopt;
17135	return CombineResult (NodeExtensionHelper::getSUOpcode(Opcode: Root->getOpcode()),
17136	Root, LHS, /LHSExt=/{ExtKind::SExt}, RHS,
17137	/RHSExt=/{ExtKind::ZExt});
17138	}
17139
17140	SmallVector<NodeExtensionHelper::CombineToTry>
17141	NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17142	SmallVector<CombineToTry> Strategies;
17143	switch (Root->getOpcode()) {
17144	case ISD::ADD:
17145	case ISD::SUB:
17146	case ISD::OR:
17147	case RISCVISD::ADD_VL:
17148	case RISCVISD::SUB_VL:
17149	case RISCVISD::OR_VL:
17150	case RISCVISD::FADD_VL:
17151	case RISCVISD::FSUB_VL:
17152	// add\|sub\|fadd\|fsub-> vwadd(u)\|vwsub(u)\|vfwadd\|vfwsub
17153	Strategies.push_back(Elt: canFoldToVWWithSameExtension);
17154	// add\|sub\|fadd\|fsub -> vwadd(u)_w\|vwsub(u)_w}\|vfwadd_w\|vfwsub_w
17155	Strategies.push_back(Elt: canFoldToVW_W);
17156	break;
17157	case RISCVISD::FMUL_VL:
17158	case RISCVISD::VFMADD_VL:
17159	case RISCVISD::VFMSUB_VL:
17160	case RISCVISD::VFNMADD_VL:
17161	case RISCVISD::VFNMSUB_VL:
17162	Strategies.push_back(Elt: canFoldToVWWithSameExtension);
17163	if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17164	Strategies.push_back(Elt: canFoldToVWWithBF16EXT);
17165	break;
17166	case ISD::MUL:
17167	case RISCVISD::MUL_VL:
17168	// mul -> vwmul(u)
17169	Strategies.push_back(Elt: canFoldToVWWithSameExtension);
17170	// mul -> vwmulsu
17171	Strategies.push_back(Elt: canFoldToVW_SU);
17172	break;
17173	case ISD::SHL:
17174	case RISCVISD::SHL_VL:
17175	// shl -> vwsll
17176	Strategies.push_back(Elt: canFoldToVWWithZEXT);
17177	break;
17178	case RISCVISD::VWADD_W_VL:
17179	case RISCVISD::VWSUB_W_VL:
17180	// vwadd_w\|vwsub_w -> vwadd\|vwsub
17181	Strategies.push_back(Elt: canFoldToVWWithSEXT);
17182	break;
17183	case RISCVISD::VWADDU_W_VL:
17184	case RISCVISD::VWSUBU_W_VL:
17185	// vwaddu_w\|vwsubu_w -> vwaddu\|vwsubu
17186	Strategies.push_back(Elt: canFoldToVWWithZEXT);
17187	break;
17188	case RISCVISD::VFWADD_W_VL:
17189	case RISCVISD::VFWSUB_W_VL:
17190	// vfwadd_w\|vfwsub_w -> vfwadd\|vfwsub
17191	Strategies.push_back(Elt: canFoldToVWWithFPEXT);
17192	break;
17193	default:
17194	llvm_unreachable("Unexpected opcode");
17195	}
17196	return Strategies;
17197	}
17198	} // End anonymous namespace.
17199
17200	static SDValue simplifyOp_VL(SDNode *N) {
17201	// TODO: Extend this to other binops using generic identity logic
17202	assert(N->getOpcode() == RISCVISD::ADD_VL);
17203	SDValue A = N->getOperand(Num: `0`);
17204	SDValue B = N->getOperand(Num: `1`);
17205	SDValue Passthru = N->getOperand(Num: `2`);
17206	if (!Passthru.isUndef())
17207	// TODO:This could be a vmerge instead
17208	return SDValue ();
17209	;
17210	if (ISD::isConstantSplatVectorAllZeros(N: B.getNode()))
17211	return A;
17212	// Peek through fixed to scalable
17213	if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(i: `0`).isUndef() &&
17214	ISD::isConstantSplatVectorAllZeros(N: B.getOperand(i: `1`).getNode()))
17215	return A;
17216	return SDValue ();
17217	}
17218
17219	/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17220	/// The supported combines are:
17221	/// add \| add_vl \| or disjoint \| or_vl disjoint -> vwadd(u) \| vwadd(u)_w
17222	/// sub \| sub_vl -> vwsub(u) \| vwsub(u)_w
17223	/// mul \| mul_vl -> vwmul(u) \| vwmul_su
17224	/// shl \| shl_vl -> vwsll
17225	/// fadd_vl -> vfwadd \| vfwadd_w
17226	/// fsub_vl -> vfwsub \| vfwsub_w
17227	/// fmul_vl -> vfwmul
17228	/// vwadd_w(u) -> vwadd(u)
17229	/// vwsub_w(u) -> vwsub(u)
17230	/// vfwadd_w -> vfwadd
17231	/// vfwsub_w -> vfwsub
17232	static SDValue combineOp_VLToVWOp_VL(SDNode *N,
17233	TargetLowering::DAGCombinerInfo &DCI,
17234	const RISCVSubtarget &Subtarget) {
17235	SelectionDAG &DAG = DCI.DAG;
17236	if (DCI.isBeforeLegalize())
17237	return SDValue ();
17238
17239	if (!NodeExtensionHelper::isSupportedRoot(Root: N, Subtarget))
17240	return SDValue ();
17241
17242	SmallVector<SDNode *> Worklist;
17243	SmallSet<SDNode *, `8`> Inserted;
17244	Worklist.push_back(Elt: N);
17245	Inserted.insert(Ptr: N);
17246	SmallVector<CombineResult> CombinesToApply;
17247
17248	while (!Worklist.empty()) {
17249	SDNode *Root = Worklist.pop_back_val();
17250
17251	NodeExtensionHelper LHS(Root, `0`, DAG, Subtarget);
17252	NodeExtensionHelper RHS(Root, `1`, DAG, Subtarget);
17253	auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17254	&Inserted](const NodeExtensionHelper &Op) {
17255	if (Op.needToPromoteOtherUsers()) {
17256	for (SDUse &Use : Op.OrigOperand ->uses()) {
17257	SDNode *TheUser = Use.getUser();
17258	if (!NodeExtensionHelper::isSupportedRoot(Root: TheUser, Subtarget))
17259	return false;
17260	// We only support the first 2 operands of FMA.
17261	if (Use.getOperandNo() >= `2`)
17262	return false;
17263	if (Inserted.insert(Ptr: TheUser).second)
17264	Worklist.push_back(Elt: TheUser);
17265	}
17266	}
17267	return true;
17268	};
17269
17270	// Control the compile time by limiting the number of node we look at in
17271	// total.
17272	if (Inserted.size() > ExtensionMaxWebSize)
17273	return SDValue ();
17274
17275	SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
17276	NodeExtensionHelper::getSupportedFoldings(Root);
17277
17278	assert(!FoldingStrategies.empty() && "Nothing to be folded");
17279	bool Matched = false;
17280	for (int Attempt = `0`;
17281	(Attempt != `1` + NodeExtensionHelper::isCommutative(N: Root)) && !Matched;
17282	++Attempt) {
17283
17284	for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17285	FoldingStrategies) {
17286	std::optional<CombineResult> Res =
17287	FoldingStrategy (Root, LHS, RHS, DAG, Subtarget);
17288	if (Res) {
17289	Matched = true;
17290	CombinesToApply.push_back(Elt: *Res);
17291	// All the inputs that are extended need to be folded, otherwise
17292	// we would be leaving the old input (since it is may still be used),
17293	// and the new one.
17294	if (Res ->LHSExt.has_value())
17295	if (!AppendUsersIfNeeded (LHS))
17296	return SDValue ();
17297	if (Res ->RHSExt.has_value())
17298	if (!AppendUsersIfNeeded (RHS))
17299	return SDValue ();
17300	break;
17301	}
17302	}
17303	std::swap(a&: LHS, b&: RHS);
17304	}
17305	// Right now we do an all or nothing approach.
17306	if (!Matched)
17307	return SDValue ();
17308	}
17309	// Store the value for the replacement of the input node separately.
17310	SDValue InputRootReplacement;
17311	// We do the RAUW after we materialize all the combines, because some replaced
17312	// nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17313	// some of these nodes may appear in the NodeExtensionHelpers of some of the
17314	// yet-to-be-visited CombinesToApply roots.
17315	SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
17316	ValuesToReplace.reserve(N: CombinesToApply.size());
17317	for (CombineResult Res : CombinesToApply) {
17318	SDValue NewValue = Res.materialize(DAG, Subtarget);
17319	if (!InputRootReplacement) {
17320	assert(Res.Root == N &&
17321	"First element is expected to be the current node");
17322	InputRootReplacement = NewValue;
17323	} else {
17324	ValuesToReplace.emplace_back(Args: SDValue (Res.Root, `0`), Args&: NewValue);
17325	}
17326	}
17327	for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17328	DAG.ReplaceAllUsesOfValueWith(From: OldNewValues.first, To: OldNewValues.second);
17329	DCI.AddToWorklist(N: OldNewValues.second.getNode());
17330	}
17331	return InputRootReplacement;
17332	}
17333
17334	// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17335	// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17336	// y will be the Passthru and cond will be the Mask.
17337	static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
17338	unsigned Opc = N->getOpcode();
17339	assert(Opc == RISCVISD::VWADD_W_VL \|\| Opc == RISCVISD::VWADDU_W_VL \|\|
17340	Opc == RISCVISD::VWSUB_W_VL \|\| Opc == RISCVISD::VWSUBU_W_VL);
17341
17342	SDValue Y = N->getOperand(Num: `0`);
17343	SDValue MergeOp = N->getOperand(Num: `1`);
17344	unsigned MergeOpc = MergeOp.getOpcode();
17345
17346	if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17347	return SDValue ();
17348
17349	SDValue X = MergeOp ->getOperand(Num: `1`);
17350
17351	if (!MergeOp.hasOneUse())
17352	return SDValue ();
17353
17354	// Passthru should be undef
17355	SDValue Passthru = N->getOperand(Num: `2`);
17356	if (!Passthru.isUndef())
17357	return SDValue ();
17358
17359	// Mask should be all ones
17360	SDValue Mask = N->getOperand(Num: `3`);
17361	if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17362	return SDValue ();
17363
17364	// False value of MergeOp should be all zeros
17365	SDValue Z = MergeOp ->getOperand(Num: `2`);
17366
17367	if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17368	(isNullOrNullSplat(V: Z.getOperand(i: `0`)) \|\| Z.getOperand(i: `0`).isUndef()))
17369	Z = Z.getOperand(i: `1`);
17370
17371	if (!ISD::isConstantSplatVectorAllZeros(N: Z.getNode()))
17372	return SDValue ();
17373
17374	return DAG.getNode(Opcode: Opc, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
17375	Ops: {Y, X, Y, MergeOp ->getOperand(Num: `0`), N->getOperand(Num: `4`)},
17376	Flags: N->getFlags());
17377	}
17378
17379	static SDValue performVWADDSUBW_VLCombine(SDNode *N,
17380	TargetLowering::DAGCombinerInfo &DCI,
17381	const RISCVSubtarget &Subtarget) {
17382	[[maybe_unused]] unsigned Opc = N->getOpcode();
17383	assert(Opc == RISCVISD::VWADD_W_VL \|\| Opc == RISCVISD::VWADDU_W_VL \|\|
17384	Opc == RISCVISD::VWSUB_W_VL \|\| Opc == RISCVISD::VWSUBU_W_VL);
17385
17386	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17387	return V;
17388
17389	return combineVWADDSUBWSelect(N, DAG&: DCI.DAG);
17390	}
17391
17392	// Helper function for performMemPairCombine.
17393	// Try to combine the memory loads/stores LSNode1 and LSNode2
17394	// into a single memory pair operation.
17395	static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
17396	LSBaseSDNode *LSNode2, SDValue BasePtr,
17397	uint64_t Imm) {
17398	SmallPtrSet<const SDNode *, `32`> Visited;
17399	SmallVector<const SDNode *, `8`> Worklist = {LSNode1, LSNode2};
17400
17401	if (SDNode::hasPredecessorHelper(N: LSNode1, Visited, Worklist) \|\|
17402	SDNode::hasPredecessorHelper(N: LSNode2, Visited, Worklist))
17403	return SDValue ();
17404
17405	MachineFunction &MF = DAG.getMachineFunction();
17406	const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17407
17408	// The new operation has twice the width.
17409	MVT XLenVT = Subtarget.getXLenVT();
17410	EVT MemVT = LSNode1->getMemoryVT();
17411	EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17412	MachineMemOperand *MMO = LSNode1->getMemOperand();
17413	MachineMemOperand *NewMMO = MF.getMachineMemOperand(
17414	MMO, PtrInfo: MMO->getPointerInfo(), Size: MemVT == MVT::i32 ? `8` : `16`);
17415
17416	if (LSNode1->getOpcode() == ISD::LOAD) {
17417	auto Ext = cast<LoadSDNode>(Val: LSNode1)->getExtensionType();
17418	unsigned Opcode;
17419	if (MemVT == MVT::i32)
17420	Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17421	else
17422	Opcode = RISCVISD::TH_LDD;
17423
17424	SDValue Res = DAG.getMemIntrinsicNode(
17425	Opcode, dl: SDLoc (LSNode1), VTList: DAG.getVTList(VTs: {XLenVT, XLenVT, MVT::Other}),
17426	Ops: {LSNode1->getChain(), BasePtr,
17427	DAG.getConstant(Val: Imm, DL: SDLoc (LSNode1), VT: XLenVT)},
17428	MemVT: NewMemVT, MMO: NewMMO);
17429
17430	SDValue Node1 =
17431	DAG.getMergeValues(Ops: {Res.getValue(R: `0`), Res.getValue(R: `2`)}, dl: SDLoc (LSNode1));
17432	SDValue Node2 =
17433	DAG.getMergeValues(Ops: {Res.getValue(R: `1`), Res.getValue(R: `2`)}, dl: SDLoc (LSNode2));
17434
17435	DAG.ReplaceAllUsesWith(From: LSNode2, To: Node2.getNode());
17436	return Node1;
17437	} else {
17438	unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17439
17440	SDValue Res = DAG.getMemIntrinsicNode(
17441	Opcode, dl: SDLoc (LSNode1), VTList: DAG.getVTList(VT: MVT::Other),
17442	Ops: {LSNode1->getChain(), LSNode1->getOperand(Num: `1`), LSNode2->getOperand(Num: `1`),
17443	BasePtr, DAG.getConstant(Val: Imm, DL: SDLoc (LSNode1), VT: XLenVT)},
17444	MemVT: NewMemVT, MMO: NewMMO);
17445
17446	DAG.ReplaceAllUsesWith(From: LSNode2, To: Res.getNode());
17447	return Res;
17448	}
17449	}
17450
17451	// Try to combine two adjacent loads/stores to a single pair instruction from
17452	// the XTHeadMemPair vendor extension.
17453	static SDValue performMemPairCombine(SDNode *N,
17454	TargetLowering::DAGCombinerInfo &DCI) {
17455	SelectionDAG &DAG = DCI.DAG;
17456	MachineFunction &MF = DAG.getMachineFunction();
17457	const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17458
17459	// Target does not support load/store pair.
17460	if (!Subtarget.hasVendorXTHeadMemPair())
17461	return SDValue ();
17462
17463	LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(Val: N);
17464	EVT MemVT = LSNode1->getMemoryVT();
17465	unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? `1` : `2`;
17466
17467	// No volatile, indexed or atomic loads/stores.
17468	if (!LSNode1->isSimple() \|\| LSNode1->isIndexed())
17469	return SDValue ();
17470
17471	// Function to get a base + constant representation from a memory value.
17472	auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17473	if (Ptr ->getOpcode() == ISD::ADD)
17474	if (auto *C1 = dyn_cast<ConstantSDNode>(Val: Ptr ->getOperand(Num: `1`)))
17475	return {Ptr ->getOperand(Num: `0`), C1->getZExtValue()};
17476	return {Ptr, `0`};
17477	};
17478
17479	auto [Base1, Offset1] = ExtractBaseAndOffset (LSNode1->getOperand(Num: OpNum));
17480
17481	SDValue Chain = N->getOperand(Num: `0`);
17482	for (SDUse &Use : Chain ->uses()) {
17483	if (Use.getUser() != N && Use.getResNo() == `0` &&
17484	Use.getUser()->getOpcode() == N->getOpcode()) {
17485	LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Val: Use.getUser());
17486
17487	// No volatile, indexed or atomic loads/stores.
17488	if (!LSNode2->isSimple() \|\| LSNode2->isIndexed())
17489	continue;
17490
17491	// Check if LSNode1 and LSNode2 have the same type and extension.
17492	if (LSNode1->getOpcode() == ISD::LOAD)
17493	if (cast<LoadSDNode>(Val: LSNode2)->getExtensionType() !=
17494	cast<LoadSDNode>(Val: LSNode1)->getExtensionType())
17495	continue;
17496
17497	if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
17498	continue;
17499
17500	auto [Base2, Offset2] = ExtractBaseAndOffset (LSNode2->getOperand(Num: OpNum));
17501
17502	// Check if the base pointer is the same for both instruction.
17503	if (Base1 != Base2)
17504	continue;
17505
17506	// Check if the offsets match the XTHeadMemPair encoding constraints.
17507	bool Valid = false;
17508	if (MemVT == MVT::i32) {
17509	// Check for adjacent i32 values and a 2-bit index.
17510	if ((Offset1 + `4` == Offset2) && isShiftedUInt<`2`, `3`>(x: Offset1))
17511	Valid = true;
17512	} else if (MemVT == MVT::i64) {
17513	// Check for adjacent i64 values and a 2-bit index.
17514	if ((Offset1 + `8` == Offset2) && isShiftedUInt<`2`, `4`>(x: Offset1))
17515	Valid = true;
17516	}
17517
17518	if (!Valid)
17519	continue;
17520
17521	// Try to combine.
17522	if (SDValue Res =
17523	tryMemPairCombine(DAG, LSNode1, LSNode2, BasePtr: Base1, Imm: Offset1))
17524	return Res;
17525	}
17526	}
17527
17528	return SDValue ();
17529	}
17530
17531	// Fold
17532	// (fp_to_int (froundeven X)) -> fcvt X, rne
17533	// (fp_to_int (ftrunc X)) -> fcvt X, rtz
17534	// (fp_to_int (ffloor X)) -> fcvt X, rdn
17535	// (fp_to_int (fceil X)) -> fcvt X, rup
17536	// (fp_to_int (fround X)) -> fcvt X, rmm
17537	// (fp_to_int (frint X)) -> fcvt X
17538	static SDValue performFP_TO_INTCombine(SDNode *N,
17539	TargetLowering::DAGCombinerInfo &DCI,
17540	const RISCVSubtarget &Subtarget) {
17541	SelectionDAG &DAG = DCI.DAG;
17542	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17543	MVT XLenVT = Subtarget.getXLenVT();
17544
17545	SDValue Src = N->getOperand(Num: `0`);
17546
17547	// Don't do this for strict-fp Src.
17548	if (Src ->isStrictFPOpcode())
17549	return SDValue ();
17550
17551	// Ensure the FP type is legal.
17552	if (!TLI.isTypeLegal(VT: Src.getValueType()))
17553	return SDValue ();
17554
17555	// Don't do this for f16 with Zfhmin and not Zfh.
17556	if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
17557	return SDValue ();
17558
17559	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode());
17560	// If the result is invalid, we didn't find a foldable instruction.
17561	if (FRM == RISCVFPRndMode::Invalid)
17562	return SDValue ();
17563
17564	SDLoc DL(N);
17565	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
17566	EVT VT = N->getValueType(ResNo: `0`);
17567
17568	if (VT.isVector() && TLI.isTypeLegal(VT)) {
17569	MVT SrcVT = Src.getSimpleValueType();
17570	MVT SrcContainerVT = SrcVT;
17571	MVT ContainerVT = VT.getSimpleVT();
17572	SDValue XVal = Src.getOperand(i: `0`);
17573
17574	// For widening and narrowing conversions we just combine it into a
17575	// VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
17576	// end up getting lowered to their appropriate pseudo instructions based on
17577	// their operand types
17578	if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * `2` \|\|
17579	VT.getScalarSizeInBits() * `2` < SrcVT.getScalarSizeInBits())
17580	return SDValue ();
17581
17582	// Make fixed-length vectors scalable first
17583	if (SrcVT.isFixedLengthVector()) {
17584	SrcContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcVT, Subtarget);
17585	XVal = convertToScalableVector(VT: SrcContainerVT, V: XVal, DAG, Subtarget);
17586	ContainerVT =
17587	getContainerForFixedLengthVector(DAG, VT: ContainerVT, Subtarget);
17588	}
17589
17590	auto [Mask, VL] =
17591	getDefaultVLOps(VecVT: SrcVT, ContainerVT: SrcContainerVT, DL, DAG, Subtarget);
17592
17593	SDValue FpToInt;
17594	if (FRM == RISCVFPRndMode::RTZ) {
17595	// Use the dedicated trunc static rounding mode if we're truncating so we
17596	// don't need to generate calls to fsrmi/fsrm
17597	unsigned Opc =
17598	IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
17599	FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask, N3: VL);
17600	} else {
17601	unsigned Opc =
17602	IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
17603	FpToInt = DAG.getNode(Opcode: Opc, DL, VT: ContainerVT, N1: XVal, N2: Mask,
17604	N3: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT), N4: VL);
17605	}
17606
17607	// If converted from fixed-length to scalable, convert back
17608	if (VT.isFixedLengthVector())
17609	FpToInt = convertFromScalableVector(VT, V: FpToInt, DAG, Subtarget);
17610
17611	return FpToInt;
17612	}
17613
17614	// Only handle XLen or i32 types. Other types narrower than XLen will
17615	// eventually be legalized to XLenVT.
17616	if (VT != MVT::i32 && VT != XLenVT)
17617	return SDValue ();
17618
17619	unsigned Opc;
17620	if (VT == XLenVT)
17621	Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
17622	else
17623	Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
17624
17625	SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src.getOperand(i: `0`),
17626	N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT));
17627	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: FpToInt);
17628	}
17629
17630	// Fold
17631	// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
17632	// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
17633	// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
17634	// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
17635	// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
17636	// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
17637	static SDValue performFP_TO_INT_SATCombine(SDNode *N,
17638	TargetLowering::DAGCombinerInfo &DCI,
17639	const RISCVSubtarget &Subtarget) {
17640	SelectionDAG &DAG = DCI.DAG;
17641	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17642	MVT XLenVT = Subtarget.getXLenVT();
17643
17644	// Only handle XLen types. Other types narrower than XLen will eventually be
17645	// legalized to XLenVT.
17646	EVT DstVT = N->getValueType(ResNo: `0`);
17647	if (DstVT != XLenVT)
17648	return SDValue ();
17649
17650	SDValue Src = N->getOperand(Num: `0`);
17651
17652	// Don't do this for strict-fp Src.
17653	if (Src ->isStrictFPOpcode())
17654	return SDValue ();
17655
17656	// Ensure the FP type is also legal.
17657	if (!TLI.isTypeLegal(VT: Src.getValueType()))
17658	return SDValue ();
17659
17660	// Don't do this for f16 with Zfhmin and not Zfh.
17661	if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
17662	return SDValue ();
17663
17664	EVT SatVT = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT();
17665
17666	RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Opc: Src.getOpcode());
17667	if (FRM == RISCVFPRndMode::Invalid)
17668	return SDValue ();
17669
17670	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
17671
17672	unsigned Opc;
17673	if (SatVT == DstVT)
17674	Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
17675	else if (DstVT == MVT::i64 && SatVT == MVT::i32)
17676	Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
17677	else
17678	return SDValue ();
17679	// FIXME: Support other SatVTs by clamping before or after the conversion.
17680
17681	Src = Src.getOperand(i: `0`);
17682
17683	SDLoc DL(N);
17684	SDValue FpToInt = DAG.getNode(Opcode: Opc, DL, VT: XLenVT, N1: Src,
17685	N2: DAG.getTargetConstant(Val: FRM, DL, VT: XLenVT));
17686
17687	// fcvt.wu. sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero*
17688	// extend.
17689	if (Opc == RISCVISD::FCVT_WU_RV64)
17690	FpToInt = DAG.getZeroExtendInReg(Op: FpToInt, DL, VT: MVT::i32);
17691
17692	// RISC-V FP-to-int conversions saturate to the destination register size, but
17693	// don't produce 0 for nan.
17694	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL, VT: DstVT);
17695	return DAG.getSelectCC(DL, LHS: Src, RHS: Src, True: ZeroInt, False: FpToInt, Cond: ISD::CondCode::SETUO);
17696	}
17697
17698	// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
17699	// smaller than XLenVT.
17700	static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
17701	const RISCVSubtarget &Subtarget) {
17702	assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
17703
17704	SDValue Src = N->getOperand(Num: `0`);
17705	if (Src.getOpcode() != ISD::BSWAP)
17706	return SDValue ();
17707
17708	EVT VT = N->getValueType(ResNo: `0`);
17709	if (!VT.isScalarInteger() \|\| VT.getSizeInBits() >= Subtarget.getXLen() \|\|
17710	!llvm::has_single_bit<uint32_t>(Value: VT.getSizeInBits()))
17711	return SDValue ();
17712
17713	SDLoc DL(N);
17714	return DAG.getNode(Opcode: RISCVISD::BREV8, DL, VT, Operand: Src.getOperand(i: `0`));
17715	}
17716
17717	static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG,
17718	const RISCVSubtarget &Subtarget) {
17719	// Fold:
17720	// vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
17721
17722	// Check if its first operand is a vp.load.
17723	auto *VPLoad = dyn_cast<VPLoadSDNode>(Val: N->getOperand(Num: `0`));
17724	if (!VPLoad)
17725	return SDValue ();
17726
17727	EVT LoadVT = VPLoad->getValueType(ResNo: `0`);
17728	// We do not have a strided_load version for masks, and the evl of vp.reverse
17729	// and vp.load should always be the same.
17730	if (!LoadVT.getVectorElementType().isByteSized() \|\|
17731	N->getOperand(Num: `2`) != VPLoad->getVectorLength() \|\|
17732	!N->getOperand(Num: `0`).hasOneUse())
17733	return SDValue ();
17734
17735	// Check if the mask of outer vp.reverse are all 1's.
17736	if (!isOneOrOneSplat(V: N->getOperand(Num: `1`)))
17737	return SDValue ();
17738
17739	SDValue LoadMask = VPLoad->getMask();
17740	// If Mask is all ones, then load is unmasked and can be reversed.
17741	if (!isOneOrOneSplat(V: LoadMask)) {
17742	// If the mask is not all ones, we can reverse the load if the mask was also
17743	// reversed by an unmasked vp.reverse with the same EVL.
17744	if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE \|\|
17745	!isOneOrOneSplat(V: LoadMask.getOperand(i: `1`)) \|\|
17746	LoadMask.getOperand(i: `2`) != VPLoad->getVectorLength())
17747	return SDValue ();
17748	LoadMask = LoadMask.getOperand(i: `0`);
17749	}
17750
17751	// Base = LoadAddr + (NumElem - 1) ElemWidthByte*
17752	SDLoc DL(N);
17753	MVT XLenVT = Subtarget.getXLenVT();
17754	SDValue NumElem = VPLoad->getVectorLength();
17755	uint64_t ElemWidthByte = VPLoad->getValueType(ResNo: `0`).getScalarSizeInBits() / `8`;
17756
17757	SDValue Temp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: NumElem,
17758	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
17759	SDValue Temp2 = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Temp1,
17760	N2: DAG.getConstant(Val: ElemWidthByte, DL, VT: XLenVT));
17761	SDValue Base = DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: VPLoad->getBasePtr(), N2: Temp2);
17762	SDValue Stride = DAG.getSignedConstant(Val: -ElemWidthByte, DL, VT: XLenVT);
17763
17764	MachineFunction &MF = DAG.getMachineFunction();
17765	MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
17766	MachineMemOperand *MMO = MF.getMachineMemOperand(
17767	PtrInfo, F: VPLoad->getMemOperand()->getFlags(),
17768	Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: VPLoad->getAlign());
17769
17770	SDValue Ret = DAG.getStridedLoadVP(
17771	VT: LoadVT, DL, Chain: VPLoad->getChain(), Ptr: Base, Stride, Mask: LoadMask,
17772	EVL: VPLoad->getVectorLength(), MMO, IsExpanding: VPLoad->isExpandingLoad());
17773
17774	DAG.ReplaceAllUsesOfValueWith(From: SDValue (VPLoad, `1`), To: Ret.getValue(R: `1`));
17775
17776	return Ret;
17777	}
17778
17779	static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG,
17780	const RISCVSubtarget &Subtarget) {
17781	// Fold:
17782	// vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
17783	// -1, MASK)
17784	auto *VPStore = cast<VPStoreSDNode>(Val: N);
17785
17786	if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
17787	return SDValue ();
17788
17789	SDValue VPReverse = VPStore->getValue();
17790	EVT ReverseVT = VPReverse ->getValueType(ResNo: `0`);
17791
17792	// We do not have a strided_store version for masks, and the evl of vp.reverse
17793	// and vp.store should always be the same.
17794	if (!ReverseVT.getVectorElementType().isByteSized() \|\|
17795	VPStore->getVectorLength() != VPReverse.getOperand(i: `2`) \|\|
17796	!VPReverse.hasOneUse())
17797	return SDValue ();
17798
17799	SDValue StoreMask = VPStore->getMask();
17800	// If Mask is all ones, then load is unmasked and can be reversed.
17801	if (!isOneOrOneSplat(V: StoreMask)) {
17802	// If the mask is not all ones, we can reverse the store if the mask was
17803	// also reversed by an unmasked vp.reverse with the same EVL.
17804	if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE \|\|
17805	!isOneOrOneSplat(V: StoreMask.getOperand(i: `1`)) \|\|
17806	StoreMask.getOperand(i: `2`) != VPStore->getVectorLength())
17807	return SDValue ();
17808	StoreMask = StoreMask.getOperand(i: `0`);
17809	}
17810
17811	// Base = StoreAddr + (NumElem - 1) ElemWidthByte*
17812	SDLoc DL(N);
17813	MVT XLenVT = Subtarget.getXLenVT();
17814	SDValue NumElem = VPStore->getVectorLength();
17815	uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / `8`;
17816
17817	SDValue Temp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: XLenVT, N1: NumElem,
17818	N2: DAG.getConstant(Val: `1`, DL, VT: XLenVT));
17819	SDValue Temp2 = DAG.getNode(Opcode: ISD::MUL, DL, VT: XLenVT, N1: Temp1,
17820	N2: DAG.getConstant(Val: ElemWidthByte, DL, VT: XLenVT));
17821	SDValue Base =
17822	DAG.getNode(Opcode: ISD::ADD, DL, VT: XLenVT, N1: VPStore->getBasePtr(), N2: Temp2);
17823	SDValue Stride = DAG.getSignedConstant(Val: -ElemWidthByte, DL, VT: XLenVT);
17824
17825	MachineFunction &MF = DAG.getMachineFunction();
17826	MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
17827	MachineMemOperand *MMO = MF.getMachineMemOperand(
17828	PtrInfo, F: VPStore->getMemOperand()->getFlags(),
17829	Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: VPStore->getAlign());
17830
17831	return DAG.getStridedStoreVP(
17832	Chain: VPStore->getChain(), DL, Val: VPReverse.getOperand(i: `0`), Ptr: Base,
17833	Offset: VPStore->getOffset(), Stride, Mask: StoreMask, EVL: VPStore->getVectorLength(),
17834	MemVT: VPStore->getMemoryVT(), MMO, AM: VPStore->getAddressingMode(),
17835	IsTruncating: VPStore->isTruncatingStore(), IsCompressing: VPStore->isCompressingStore());
17836	}
17837
17838	// Peephole avgceil pattern.
17839	// %1 = zext <N x i8> %a to <N x i32>
17840	// %2 = zext <N x i8> %b to <N x i32>
17841	// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
17842	// %4 = add nuw nsw <N x i32> %3, %2
17843	// %5 = lshr <N x i32> %4, splat (i32 1)
17844	// %6 = trunc <N x i32> %5 to <N x i8>
17845	static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG,
17846	const RISCVSubtarget &Subtarget) {
17847	EVT VT = N->getValueType(ResNo: `0`);
17848
17849	// Ignore fixed vectors.
17850	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17851	if (!VT.isScalableVector() \|\| !TLI.isTypeLegal(VT))
17852	return SDValue ();
17853
17854	SDValue In = N->getOperand(Num: `0`);
17855	SDValue Mask = N->getOperand(Num: `1`);
17856	SDValue VL = N->getOperand(Num: `2`);
17857
17858	// Input should be a vp_srl with same mask and VL.
17859	if (In.getOpcode() != ISD::VP_SRL \|\| In.getOperand(i: `2`) != Mask \|\|
17860	In.getOperand(i: `3`) != VL)
17861	return SDValue ();
17862
17863	// Shift amount should be 1.
17864	if (!isOneOrOneSplat(V: In.getOperand(i: `1`)))
17865	return SDValue ();
17866
17867	// Shifted value should be a vp_add with same mask and VL.
17868	SDValue LHS = In.getOperand(i: `0`);
17869	if (LHS.getOpcode() != ISD::VP_ADD \|\| LHS.getOperand(i: `2`) != Mask \|\|
17870	LHS.getOperand(i: `3`) != VL)
17871	return SDValue ();
17872
17873	SDValue Operands[`3`];
17874
17875	// Matches another VP_ADD with same VL and Mask.
17876	auto FindAdd = [&](SDValue V, SDValue Other) {
17877	if (V.getOpcode() != ISD::VP_ADD \|\| V.getOperand(i: `2`) != Mask \|\|
17878	V.getOperand(i: `3`) != VL)
17879	return false;
17880
17881	Operands[`0`] = Other;
17882	Operands[`1`] = V.getOperand(i: `1`);
17883	Operands[`2`] = V.getOperand(i: `0`);
17884	return true;
17885	};
17886
17887	// We need to find another VP_ADD in one of the operands.
17888	SDValue LHS0 = LHS.getOperand(i: `0`);
17889	SDValue LHS1 = LHS.getOperand(i: `1`);
17890	if (!FindAdd (LHS0, LHS1) && !FindAdd (LHS1, LHS0))
17891	return SDValue ();
17892
17893	// Now we have three operands of two additions. Check that one of them is a
17894	// constant vector with ones.
17895	auto I = llvm::find_if(Range&: Operands,
17896	P: [](const SDValue &Op) { return isOneOrOneSplat(V: Op); });
17897	if (I == std::end(arr&: Operands))
17898	return SDValue ();
17899	// We found a vector with ones, move if it to the end of the Operands array.
17900	std::swap(a&: *I, b&: Operands[`2`]);
17901
17902	// Make sure the other 2 operands can be promoted from the result type.
17903	for (SDValue Op : drop_end(RangeOrContainer&: Operands)) {
17904	if (Op.getOpcode() != ISD::VP_ZERO_EXTEND \|\| Op.getOperand(i: `1`) != Mask \|\|
17905	Op.getOperand(i: `2`) != VL)
17906	return SDValue ();
17907	// Input must be the same size or smaller than our result.
17908	if (Op.getOperand(i: `0`).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
17909	return SDValue ();
17910	}
17911
17912	// Pattern is detected.
17913	// Rebuild the zero extends in case the inputs are smaller than our result.
17914	SDValue NewOp0 = DAG.getNode(Opcode: ISD::VP_ZERO_EXTEND, DL: SDLoc (Operands[`0`]), VT,
17915	N1: Operands[`0`].getOperand(i: `0`), N2: Mask, N3: VL);
17916	SDValue NewOp1 = DAG.getNode(Opcode: ISD::VP_ZERO_EXTEND, DL: SDLoc (Operands[`1`]), VT,
17917	N1: Operands[`1`].getOperand(i: `0`), N2: Mask, N3: VL);
17918	// Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
17919	// mode.
17920	SDLoc DL(N);
17921	return DAG.getNode(Opcode: RISCVISD::AVGCEILU_VL, DL, VT,
17922	Ops: {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
17923	}
17924
17925	// Convert from one FMA opcode to another based on whether we are negating the
17926	// multiply result and/or the accumulator.
17927	// NOTE: Only supports RVV operations with VL.
17928	static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
17929	// Negating the multiply result changes ADD<->SUB and toggles 'N'.
17930	if (NegMul) {
17931	// clang-format off
17932	switch (Opcode) {
17933	default: llvm_unreachable("Unexpected opcode");
17934	case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
17935	case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
17936	case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
17937	case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
17938	case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
17939	case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
17940	case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
17941	case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
17942	}
17943	// clang-format on
17944	}
17945
17946	// Negating the accumulator changes ADD<->SUB.
17947	if (NegAcc) {
17948	// clang-format off
17949	switch (Opcode) {
17950	default: llvm_unreachable("Unexpected opcode");
17951	case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
17952	case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
17953	case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
17954	case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
17955	case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
17956	case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
17957	case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
17958	case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
17959	}
17960	// clang-format on
17961	}
17962
17963	return Opcode;
17964	}
17965
17966	static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
17967	// Fold FNEG_VL into FMA opcodes.
17968	// The first operand of strict-fp is chain.
17969	bool IsStrict =
17970	DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(Opcode: N->getOpcode());
17971	unsigned Offset = IsStrict ? `1` : `0`;
17972	SDValue A = N->getOperand(Num: `0` + Offset);
17973	SDValue B = N->getOperand(Num: `1` + Offset);
17974	SDValue C = N->getOperand(Num: `2` + Offset);
17975	SDValue Mask = N->getOperand(Num: `3` + Offset);
17976	SDValue VL = N->getOperand(Num: `4` + Offset);
17977
17978	auto invertIfNegative = [&Mask, &VL](SDValue &V) {
17979	if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(i: `1`) == Mask &&
17980	V.getOperand(i: `2`) == VL) {
17981	// Return the negated input.
17982	V = V.getOperand(i: `0`);
17983	return true;
17984	}
17985
17986	return false;
17987	};
17988
17989	bool NegA = invertIfNegative (A);
17990	bool NegB = invertIfNegative (B);
17991	bool NegC = invertIfNegative (C);
17992
17993	// If no operands are negated, we're done.
17994	if (!NegA && !NegB && !NegC)
17995	return SDValue ();
17996
17997	unsigned NewOpcode = negateFMAOpcode(Opcode: N->getOpcode(), NegMul: NegA != NegB, NegAcc: NegC);
17998	if (IsStrict)
17999	return DAG.getNode(Opcode: NewOpcode, DL: SDLoc (N), VTList: N->getVTList(),
18000	Ops: {N->getOperand(Num: `0`), A, B, C, Mask, VL});
18001	return DAG.getNode(Opcode: NewOpcode, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), N1: A, N2: B, N3: C, N4: Mask,
18002	N5: VL);
18003	}
18004
18005	static SDValue performVFMADD_VLCombine(SDNode *N,
18006	TargetLowering::DAGCombinerInfo &DCI,
18007	const RISCVSubtarget &Subtarget) {
18008	SelectionDAG &DAG = DCI.DAG;
18009
18010	if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
18011	return V;
18012
18013	// FIXME: Ignore strict opcodes for now.
18014	if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(Opcode: N->getOpcode()))
18015	return SDValue ();
18016
18017	return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18018	}
18019
18020	static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
18021	const RISCVSubtarget &Subtarget) {
18022	assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18023
18024	EVT VT = N->getValueType(ResNo: `0`);
18025
18026	if (VT != Subtarget.getXLenVT())
18027	return SDValue ();
18028
18029	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)))
18030	return SDValue ();
18031	uint64_t ShAmt = N->getConstantOperandVal(Num: `1`);
18032
18033	SDValue N0 = N->getOperand(Num: `0`);
18034
18035	// Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18036	// (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18037	if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18038	unsigned ExtSize =
18039	cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT().getSizeInBits();
18040	if (ShAmt < ExtSize && N0.getOperand(i: `0`).getOpcode() == ISD::SHL &&
18041	N0.getOperand(i: `0`).hasOneUse() &&
18042	isa<ConstantSDNode>(Val: N0.getOperand(i: `0`).getOperand(i: `1`))) {
18043	uint64_t LShAmt = N0.getOperand(i: `0`).getConstantOperandVal(i: `1`);
18044	if (LShAmt < ExtSize) {
18045	unsigned Size = VT.getSizeInBits();
18046	SDLoc ShlDL(N0.getOperand(i: `0`));
18047	SDValue Shl =
18048	DAG.getNode(Opcode: ISD::SHL, DL: ShlDL, VT, N1: N0.getOperand(i: `0`).getOperand(i: `0`),
18049	N2: DAG.getConstant(Val: LShAmt + (Size - ExtSize), DL: ShlDL, VT));
18050	SDLoc DL(N);
18051	return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Shl,
18052	N2: DAG.getConstant(Val: ShAmt + (Size - ExtSize), DL, VT));
18053	}
18054	}
18055	}
18056
18057	if (ShAmt > `32` \|\| VT != MVT::i64)
18058	return SDValue ();
18059
18060	// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18061	// FIXME: Should this be a generic combine? There's a similar combine on X86.
18062	//
18063	// Also try these folds where an add or sub is in the middle.
18064	// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18065	// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18066	SDValue Shl;
18067	ConstantSDNode AddC = nullptr*;
18068
18069	// We might have an ADD or SUB between the SRA and SHL.
18070	bool IsAdd = N0.getOpcode() == ISD::ADD;
18071	if ((IsAdd \|\| N0.getOpcode() == ISD::SUB)) {
18072	// Other operand needs to be a constant we can modify.
18073	AddC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: IsAdd ? `1` : `0`));
18074	if (!AddC)
18075	return SDValue ();
18076
18077	// AddC needs to have at least 32 trailing zeros.
18078	if (llvm::countr_zero(Val: AddC->getZExtValue()) < `32`)
18079	return SDValue ();
18080
18081	// All users should be a shift by constant less than or equal to 32. This
18082	// ensures we'll do this optimization for each of them to produce an
18083	// add/sub+sext_inreg they can all share.
18084	for (SDNode *U : N0 ->users()) {
18085	if (U->getOpcode() != ISD::SRA \|\|
18086	!isa<ConstantSDNode>(Val: U->getOperand(Num: `1`)) \|\|
18087	U->getConstantOperandVal(Num: `1`) > `32`)
18088	return SDValue ();
18089	}
18090
18091	Shl = N0.getOperand(i: IsAdd ? `0` : `1`);
18092	} else {
18093	// Not an ADD or SUB.
18094	Shl = N0;
18095	}
18096
18097	// Look for a shift left by 32.
18098	if (Shl.getOpcode() != ISD::SHL \|\| !isa<ConstantSDNode>(Val: Shl.getOperand(i: `1`)) \|\|
18099	Shl.getConstantOperandVal(i: `1`) != `32`)
18100	return SDValue ();
18101
18102	// We if we didn't look through an add/sub, then the shl should have one use.
18103	// If we did look through an add/sub, the sext_inreg we create is free so
18104	// we're only creating 2 new instructions. It's enough to only remove the
18105	// original sra+add/sub.
18106	if (!AddC && !Shl.hasOneUse())
18107	return SDValue ();
18108
18109	SDLoc DL(N);
18110	SDValue In = Shl.getOperand(i: `0`);
18111
18112	// If we looked through an ADD or SUB, we need to rebuild it with the shifted
18113	// constant.
18114	if (AddC) {
18115	SDValue ShiftedAddC =
18116	DAG.getConstant(Val: AddC->getZExtValue() >> `32`, DL, VT: MVT::i64);
18117	if (IsAdd)
18118	In = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: In, N2: ShiftedAddC);
18119	else
18120	In = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: ShiftedAddC, N2: In);
18121	}
18122
18123	SDValue SExt = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: In,
18124	N2: DAG.getValueType(MVT::i32));
18125	if (ShAmt == `32`)
18126	return SExt;
18127
18128	return DAG.getNode(
18129	Opcode: ISD::SHL, DL, VT: MVT::i64, N1: SExt,
18130	N2: DAG.getConstant(Val: `32` - ShAmt, DL, VT: MVT::i64));
18131	}
18132
18133	// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18134	// the result is used as the condition of a br_cc or select_cc we can invert,
18135	// inverting the setcc is free, and Z is 0/1. Caller will invert the
18136	// br_cc/select_cc.
18137	static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
18138	bool IsAnd = Cond.getOpcode() == ISD::AND;
18139	if (!IsAnd && Cond.getOpcode() != ISD::OR)
18140	return SDValue ();
18141
18142	if (!Cond.hasOneUse())
18143	return SDValue ();
18144
18145	SDValue Setcc = Cond.getOperand(i: `0`);
18146	SDValue Xor = Cond.getOperand(i: `1`);
18147	// Canonicalize setcc to LHS.
18148	if (Setcc.getOpcode() != ISD::SETCC)
18149	std::swap(a&: Setcc, b&: Xor);
18150	// LHS should be a setcc and RHS should be an xor.
18151	if (Setcc.getOpcode() != ISD::SETCC \|\| !Setcc.hasOneUse() \|\|
18152	Xor.getOpcode() != ISD::XOR \|\| !Xor.hasOneUse())
18153	return SDValue ();
18154
18155	// If the condition is an And, SimplifyDemandedBits may have changed
18156	// (xor Z, 1) to (not Z).
18157	SDValue Xor1 = Xor.getOperand(i: `1`);
18158	if (!isOneConstant(V: Xor1) && !(IsAnd && isAllOnesConstant(V: Xor1)))
18159	return SDValue ();
18160
18161	EVT VT = Cond.getValueType();
18162	SDValue Xor0 = Xor.getOperand(i: `0`);
18163
18164	// The LHS of the xor needs to be 0/1.
18165	APInt Mask = APInt::getBitsSetFrom(numBits: VT.getSizeInBits(), loBit: `1`);
18166	if (!DAG.MaskedValueIsZero(Op: Xor0, Mask))
18167	return SDValue ();
18168
18169	// We can only invert integer setccs.
18170	EVT SetCCOpVT = Setcc.getOperand(i: `0`).getValueType();
18171	if (!SetCCOpVT.isScalarInteger())
18172	return SDValue ();
18173
18174	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Setcc.getOperand(i: `2`))->get();
18175	if (ISD::isIntEqualitySetCC(Code: CCVal)) {
18176	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: SetCCOpVT);
18177	Setcc = DAG.getSetCC(DL: SDLoc (Setcc), VT, LHS: Setcc.getOperand(i: `0`),
18178	RHS: Setcc.getOperand(i: `1`), Cond: CCVal);
18179	} else if (CCVal == ISD::SETLT && isNullConstant(V: Setcc.getOperand(i: `0`))) {
18180	// Invert (setlt 0, X) by converting to (setlt X, 1).
18181	Setcc = DAG.getSetCC(DL: SDLoc (Setcc), VT, LHS: Setcc.getOperand(i: `1`),
18182	RHS: DAG.getConstant(Val: `1`, DL: SDLoc (Setcc), VT), Cond: CCVal);
18183	} else if (CCVal == ISD::SETLT && isOneConstant(V: Setcc.getOperand(i: `1`))) {
18184	// (setlt X, 1) by converting to (setlt 0, X).
18185	Setcc = DAG.getSetCC(DL: SDLoc (Setcc), VT,
18186	LHS: DAG.getConstant(Val: `0`, DL: SDLoc (Setcc), VT),
18187	RHS: Setcc.getOperand(i: `0`), Cond: CCVal);
18188	} else
18189	return SDValue ();
18190
18191	unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18192	return DAG.getNode(Opcode: Opc, DL: SDLoc (Cond), VT, N1: Setcc, N2: Xor.getOperand(i: `0`));
18193	}
18194
18195	// Perform common combines for BR_CC and SELECT_CC conditions.
18196	static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18197	SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18198	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
18199
18200	// As far as arithmetic right shift always saves the sign,
18201	// shift can be omitted.
18202	// Fold setlt (sra X, N), 0 -> setlt X, 0 and
18203	// setge (sra X, N), 0 -> setge X, 0
18204	if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE \|\| CCVal == ISD::SETLT) &&
18205	LHS.getOpcode() == ISD::SRA) {
18206	LHS = LHS.getOperand(i: `0`);
18207	return true;
18208	}
18209
18210	if (!ISD::isIntEqualitySetCC(Code: CCVal))
18211	return false;
18212
18213	// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18214	// Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18215	if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
18216	LHS.getOperand(i: `0`).getValueType() == Subtarget.getXLenVT()) {
18217	// If we're looking for eq 0 instead of ne 0, we need to invert the
18218	// condition.
18219	bool Invert = CCVal == ISD::SETEQ;
18220	CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: `2`))->get();
18221	if (Invert)
18222	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
18223
18224	RHS = LHS.getOperand(i: `1`);
18225	LHS = LHS.getOperand(i: `0`);
18226	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG, Subtarget);
18227
18228	CC = DAG.getCondCode(Cond: CCVal);
18229	return true;
18230	}
18231
18232	// If XOR is reused and has an immediate that will fit in XORI,
18233	// do not fold.
18234	auto isXorImmediate = [](const SDValue &Op) -> bool {
18235	if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Val: Op))
18236	return isInt<`12`>(x: XorCnst->getSExtValue());
18237	return false;
18238	};
18239	// Fold (X(i1) ^ 1) == 0 -> X != 0
18240	auto singleBitOp = [&DAG](const SDValue &VarOp,
18241	const SDValue &ConstOp) -> bool {
18242	if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Val: ConstOp)) {
18243	const APInt Mask = APInt::getBitsSetFrom(numBits: VarOp.getValueSizeInBits(), loBit: `1`);
18244	return (XorCnst->getSExtValue() == `1`) &&
18245	DAG.MaskedValueIsZero(Op: VarOp, Mask);
18246	}
18247	return false;
18248	};
18249	auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18250	for (const SDNode *UserNode : Op ->users()) {
18251	const unsigned Opcode = UserNode->getOpcode();
18252	if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18253	return false;
18254	}
18255	return true;
18256	};
18257	auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18258	const SDValue &LHS, const SDValue &RHS) -> bool {
18259	return LHS.getOpcode() == ISD::XOR && isNullConstant(V: RHS) &&
18260	(!isXorImmediate (LHS.getOperand(i: `1`)) \|\|
18261	singleBitOp (LHS.getOperand(i: `0`), LHS.getOperand(i: `1`)) \|\|
18262	onlyUsedBySelectOrBR (LHS));
18263	};
18264	// Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18265	if (isFoldableXorEq (LHS, RHS)) {
18266	RHS = LHS.getOperand(i: `1`);
18267	LHS = LHS.getOperand(i: `0`);
18268	return true;
18269	}
18270	// Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18271	if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18272	const SDValue LHS0 = LHS.getOperand(i: `0`);
18273	if (isFoldableXorEq (LHS0, RHS) && isa<ConstantSDNode>(Val: LHS0.getOperand(i: `1`))) {
18274	// SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18275	RHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: LHS.getValueType(),
18276	N1: LHS0.getOperand(i: `1`), N2: LHS.getOperand(i: `1`));
18277	LHS = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: LHS.getValueType(),
18278	N1: LHS0.getOperand(i: `0`), N2: LHS.getOperand(i: `1`));
18279	return true;
18280	}
18281	}
18282
18283	// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18284	if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18285	LHS.getOperand(i: `1`).getOpcode() == ISD::Constant) {
18286	SDValue LHS0 = LHS.getOperand(i: `0`);
18287	if (LHS0.getOpcode() == ISD::AND &&
18288	LHS0.getOperand(i: `1`).getOpcode() == ISD::Constant) {
18289	uint64_t Mask = LHS0.getConstantOperandVal(i: `1`);
18290	uint64_t ShAmt = LHS.getConstantOperandVal(i: `1`);
18291	if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
18292	// XAndesPerf supports branch on test bit.
18293	if (Subtarget.hasVendorXAndesPerf()) {
18294	LHS =
18295	DAG.getNode(Opcode: ISD::AND, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: `0`),
18296	N2: DAG.getConstant(Val: Mask, DL, VT: LHS.getValueType()));
18297	return true;
18298	}
18299
18300	CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18301	CC = DAG.getCondCode(Cond: CCVal);
18302
18303	ShAmt = LHS.getValueSizeInBits() - `1` - ShAmt;
18304	LHS = LHS0.getOperand(i: `0`);
18305	if (ShAmt != `0`)
18306	LHS =
18307	DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: `0`),
18308	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
18309	return true;
18310	}
18311	}
18312	}
18313
18314	// (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18315	// This can occur when legalizing some floating point comparisons.
18316	APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: `1`);
18317	if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
18318	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
18319	CC = DAG.getCondCode(Cond: CCVal);
18320	RHS = DAG.getConstant(Val: `0`, DL, VT: LHS.getValueType());
18321	return true;
18322	}
18323
18324	if (isNullConstant(V: RHS)) {
18325	if (SDValue NewCond = tryDemorganOfBooleanCondition(Cond: LHS, DAG)) {
18326	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
18327	CC = DAG.getCondCode(Cond: CCVal);
18328	LHS = NewCond;
18329	return true;
18330	}
18331	}
18332
18333	return false;
18334	}
18335
18336	// Fold
18337	// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18338	// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18339	// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18340	// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18341	// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18342	// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18343	static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
18344	SDValue TrueVal, SDValue FalseVal,
18345	bool Swapped) {
18346	bool Commutative = true;
18347	unsigned Opc = TrueVal.getOpcode();
18348	switch (Opc) {
18349	default:
18350	return SDValue ();
18351	case ISD::SHL:
18352	case ISD::SRA:
18353	case ISD::SRL:
18354	case ISD::SUB:
18355	case ISD::ROTL:
18356	case ISD::ROTR:
18357	Commutative = false;
18358	break;
18359	case ISD::ADD:
18360	case ISD::OR:
18361	case ISD::XOR:
18362	break;
18363	}
18364
18365	if (!TrueVal.hasOneUse() \|\| isa<ConstantSDNode>(Val: FalseVal))
18366	return SDValue ();
18367
18368	unsigned OpToFold;
18369	if (FalseVal == TrueVal.getOperand(i: `0`))
18370	OpToFold = `0`;
18371	else if (Commutative && FalseVal == TrueVal.getOperand(i: `1`))
18372	OpToFold = `1`;
18373	else
18374	return SDValue ();
18375
18376	EVT VT = N->getValueType(ResNo: `0`);
18377	SDLoc DL(N);
18378	SDValue OtherOp = TrueVal.getOperand(i: `1` - OpToFold);
18379	EVT OtherOpVT = OtherOp.getValueType();
18380	SDValue IdentityOperand =
18381	DAG.getNeutralElement(Opcode: Opc, DL, VT: OtherOpVT, Flags: N->getFlags());
18382	if (!Commutative)
18383	IdentityOperand = DAG.getConstant(Val: `0`, DL, VT: OtherOpVT);
18384	assert(IdentityOperand && "No identity operand!");
18385
18386	if (Swapped)
18387	std::swap(a&: OtherOp, b&: IdentityOperand);
18388	SDValue NewSel =
18389	DAG.getSelect(DL, VT: OtherOpVT, Cond: N->getOperand(Num: `0`), LHS: OtherOp, RHS: IdentityOperand);
18390	return DAG.getNode(Opcode: TrueVal.getOpcode(), DL, VT, N1: FalseVal, N2: NewSel);
18391	}
18392
18393	// This tries to get rid of `select` and `icmp` that are being used to handle
18394	// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18395	static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
18396	SDValue Cond = N->getOperand(Num: `0`);
18397
18398	// This represents either CTTZ or CTLZ instruction.
18399	SDValue CountZeroes;
18400
18401	SDValue ValOnZero;
18402
18403	if (Cond.getOpcode() != ISD::SETCC)
18404	return SDValue ();
18405
18406	if (!isNullConstant(V: Cond ->getOperand(Num: `1`)))
18407	return SDValue ();
18408
18409	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond ->getOperand(Num: `2`))->get();
18410	if (CCVal == ISD::CondCode::SETEQ) {
18411	CountZeroes = N->getOperand(Num: `2`);
18412	ValOnZero = N->getOperand(Num: `1`);
18413	} else if (CCVal == ISD::CondCode::SETNE) {
18414	CountZeroes = N->getOperand(Num: `1`);
18415	ValOnZero = N->getOperand(Num: `2`);
18416	} else {
18417	return SDValue ();
18418	}
18419
18420	if (CountZeroes.getOpcode() == ISD::TRUNCATE \|\|
18421	CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18422	CountZeroes = CountZeroes.getOperand(i: `0`);
18423
18424	if (CountZeroes.getOpcode() != ISD::CTTZ &&
18425	CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18426	CountZeroes.getOpcode() != ISD::CTLZ &&
18427	CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18428	return SDValue ();
18429
18430	if (!isNullConstant(V: ValOnZero))
18431	return SDValue ();
18432
18433	SDValue CountZeroesArgument = CountZeroes ->getOperand(Num: `0`);
18434	if (Cond ->getOperand(Num: `0`) != CountZeroesArgument)
18435	return SDValue ();
18436
18437	if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18438	CountZeroes = DAG.getNode(Opcode: ISD::CTTZ, DL: SDLoc (CountZeroes),
18439	VT: CountZeroes.getValueType(), Operand: CountZeroesArgument);
18440	} else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18441	CountZeroes = DAG.getNode(Opcode: ISD::CTLZ, DL: SDLoc (CountZeroes),
18442	VT: CountZeroes.getValueType(), Operand: CountZeroesArgument);
18443	}
18444
18445	unsigned BitWidth = CountZeroes.getValueSizeInBits();
18446	SDValue BitWidthMinusOne =
18447	DAG.getConstant(Val: BitWidth - `1`, DL: SDLoc (N), VT: CountZeroes.getValueType());
18448
18449	auto AndNode = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N), VT: CountZeroes.getValueType(),
18450	N1: CountZeroes, N2: BitWidthMinusOne);
18451	return DAG.getZExtOrTrunc(Op: AndNode, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`));
18452	}
18453
18454	static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
18455	const RISCVSubtarget &Subtarget) {
18456	SDValue Cond = N->getOperand(Num: `0`);
18457	SDValue True = N->getOperand(Num: `1`);
18458	SDValue False = N->getOperand(Num: `2`);
18459	SDLoc DL(N);
18460	EVT VT = N->getValueType(ResNo: `0`);
18461	EVT CondVT = Cond.getValueType();
18462
18463	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond.hasOneUse())
18464	return SDValue ();
18465
18466	// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18467	// BEXTI, where C is power of 2.
18468	if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
18469	(Subtarget.hasStdExtZicond() \|\| Subtarget.hasVendorXVentanaCondOps())) {
18470	SDValue LHS = Cond.getOperand(i: `0`);
18471	SDValue RHS = Cond.getOperand(i: `1`);
18472	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
18473	if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18474	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) && isNullConstant(V: RHS)) {
18475	const APInt &MaskVal = LHS.getConstantOperandAPInt(i: `1`);
18476	if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(N: `12`))
18477	return DAG.getSelect(DL, VT,
18478	Cond: DAG.getSetCC(DL, VT: CondVT, LHS, RHS, Cond: ISD::SETNE),
18479	LHS: False, RHS: True);
18480	}
18481	}
18482	return SDValue ();
18483	}
18484
18485	static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
18486	if (!TrueVal.hasOneUse() \|\| !FalseVal.hasOneUse())
18487	return false;
18488
18489	SwapCC = false;
18490	if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
18491	std::swap(a&: TrueVal, b&: FalseVal);
18492	SwapCC = true;
18493	}
18494
18495	if (TrueVal.getOpcode() != ISD::ADD \|\| FalseVal.getOpcode() != ISD::SUB)
18496	return false;
18497
18498	SDValue A = FalseVal.getOperand(i: `0`);
18499	SDValue B = FalseVal.getOperand(i: `1`);
18500	// Add is commutative, so check both orders
18501	return ((TrueVal.getOperand(i: `0`) == A && TrueVal.getOperand(i: `1`) == B) \|\|
18502	(TrueVal.getOperand(i: `1`) == A && TrueVal.getOperand(i: `0`) == B));
18503	}
18504
18505	/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
18506	/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
18507	/// register pressure over the add followed by masked vsub sequence.
18508	static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
18509	SDLoc DL(N);
18510	EVT VT = N->getValueType(ResNo: `0`);
18511	SDValue CC = N->getOperand(Num: `0`);
18512	SDValue TrueVal = N->getOperand(Num: `1`);
18513	SDValue FalseVal = N->getOperand(Num: `2`);
18514
18515	bool SwapCC;
18516	if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
18517	return SDValue ();
18518
18519	SDValue Sub = SwapCC ? TrueVal : FalseVal;
18520	SDValue A = Sub.getOperand(i: `0`);
18521	SDValue B = Sub.getOperand(i: `1`);
18522
18523	// Arrange the select such that we can match a masked
18524	// vrsub.vi to perform the conditional negate
18525	SDValue NegB = DAG.getNegative(Val: B, DL, VT);
18526	if (!SwapCC)
18527	CC = DAG.getLogicalNOT(DL, Val: CC, VT: CC ->getValueType(ResNo: `0`));
18528	SDValue NewB = DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CC, N2: NegB, N3: B);
18529	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: A, N2: NewB);
18530	}
18531
18532	static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
18533	const RISCVSubtarget &Subtarget) {
18534	if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
18535	return Folded;
18536
18537	if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
18538	return V;
18539
18540	if (Subtarget.hasConditionalMoveFusion())
18541	return SDValue ();
18542
18543	SDValue TrueVal = N->getOperand(Num: `1`);
18544	SDValue FalseVal = N->getOperand(Num: `2`);
18545	if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /Swapped/false))
18546	return V;
18547	return tryFoldSelectIntoOp(N, DAG, TrueVal: FalseVal, FalseVal: TrueVal, /Swapped/true);
18548	}
18549
18550	/// If we have a build_vector where each lane is binop X, C, where C
18551	/// is a constant (but not necessarily the same constant on all lanes),
18552	/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
18553	/// We assume that materializing a constant build vector will be no more
18554	/// expensive that performing O(n) binops.
18555	static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
18556	const RISCVSubtarget &Subtarget,
18557	const RISCVTargetLowering &TLI) {
18558	SDLoc DL(N);
18559	EVT VT = N->getValueType(ResNo: `0`);
18560
18561	assert(!VT.isScalableVector() && "unexpected build vector");
18562
18563	if (VT.getVectorNumElements() == `1`)
18564	return SDValue ();
18565
18566	const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
18567	if (!TLI.isBinOp(Opcode))
18568	return SDValue ();
18569
18570	if (!TLI.isOperationLegalOrCustom(Op: Opcode, VT) \|\| !TLI.isTypeLegal(VT))
18571	return SDValue ();
18572
18573	// This BUILD_VECTOR involves an implicit truncation, and sinking
18574	// truncates through binops is non-trivial.
18575	if (N->op_begin()->getValueType() != VT.getVectorElementType())
18576	return SDValue ();
18577
18578	SmallVector<SDValue> LHSOps;
18579	SmallVector<SDValue> RHSOps;
18580	for (SDValue Op : N->ops()) {
18581	if (Op.isUndef()) {
18582	// We can't form a divide or remainder from undef.
18583	if (!DAG.isSafeToSpeculativelyExecute(Opcode))
18584	return SDValue ();
18585
18586	LHSOps.push_back(Elt: Op);
18587	RHSOps.push_back(Elt: Op);
18588	continue;
18589	}
18590
18591	// TODO: We can handle operations which have an neutral rhs value
18592	// (e.g. x + 0, a 1 or a << 0), but we then have to keep track*
18593	// of profit in a more explicit manner.
18594	if (Op.getOpcode() != Opcode \|\| !Op.hasOneUse())
18595	return SDValue ();
18596
18597	LHSOps.push_back(Elt: Op.getOperand(i: `0`));
18598	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)) &&
18599	!isa<ConstantFPSDNode>(Val: Op.getOperand(i: `1`)))
18600	return SDValue ();
18601	// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
18602	// have different LHS and RHS types.
18603	if (Op.getOperand(i: `0`).getValueType() != Op.getOperand(i: `1`).getValueType())
18604	return SDValue ();
18605
18606	RHSOps.push_back(Elt: Op.getOperand(i: `1`));
18607	}
18608
18609	return DAG.getNode(Opcode, DL, VT, N1: DAG.getBuildVector(VT, DL, Ops: LHSOps),
18610	N2: DAG.getBuildVector(VT, DL, Ops: RHSOps));
18611	}
18612
18613	static MVT getQDOTXResultType(MVT OpVT) {
18614	ElementCount OpEC = OpVT.getVectorElementCount();
18615	assert(OpEC.isKnownMultipleOf(`4`) && OpVT.getVectorElementType() == MVT::i8);
18616	return MVT::getVectorVT(VT: MVT::i32, EC: OpEC.divideCoefficientBy(RHS: `4`));
18617	}
18618
18619	/// Given fixed length vectors A and B with equal element types, but possibly
18620	/// different number of elements, return A + B where either A or B is zero
18621	/// padded to the larger number of elements.
18622	static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B,
18623	SelectionDAG &DAG) {
18624	// NOTE: Manually doing the extract/add/insert scheme produces
18625	// significantly better codegen than the naive pad with zeros
18626	// and add scheme.
18627	EVT AVT = A.getValueType();
18628	EVT BVT = B.getValueType();
18629	assert(AVT.getVectorElementType() == BVT.getVectorElementType());
18630	if (AVT.getVectorMinNumElements() > BVT.getVectorMinNumElements()) {
18631	std::swap(a&: A, b&: B);
18632	std::swap(a&: AVT, b&: BVT);
18633	}
18634
18635	SDValue BPart = DAG.getExtractSubvector(DL, VT: AVT, Vec: B, Idx: `0`);
18636	SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT: AVT, N1: A, N2: BPart);
18637	return DAG.getInsertSubvector(DL, Vec: B, SubVec: Res, Idx: `0`);
18638	}
18639
18640	static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
18641	SelectionDAG &DAG,
18642	const RISCVSubtarget &Subtarget,
18643	const RISCVTargetLowering &TLI) {
18644	// Note: We intentionally do not check the legality of the reduction type.
18645	// We want to handle the m4/m8 src* types, and thus need to let illegal*
18646	// intermediate types flow through here.
18647	if (InVec.getValueType().getVectorElementType() != MVT::i32 \|\|
18648	!InVec.getValueType().getVectorElementCount().isKnownMultipleOf(RHS: `4`))
18649	return SDValue ();
18650
18651	// Recurse through adds (since generic dag canonicalizes to that
18652	// form). TODO: Handle disjoint or here.
18653	if (InVec ->getOpcode() == ISD::ADD) {
18654	SDValue A = InVec.getOperand(i: `0`);
18655	SDValue B = InVec.getOperand(i: `1`);
18656	SDValue AOpt = foldReduceOperandViaVQDOT(InVec: A, DL, DAG, Subtarget, TLI);
18657	SDValue BOpt = foldReduceOperandViaVQDOT(InVec: B, DL, DAG, Subtarget, TLI);
18658	if (AOpt \|\| BOpt) {
18659	if (AOpt)
18660	A = AOpt;
18661	if (BOpt)
18662	B = BOpt;
18663	// From here, we're doing A + B with mixed types, implicitly zero
18664	// padded to the wider type. Note that we don't* need the result*
18665	// type to be the original VT, and in fact prefer narrower ones
18666	// if possible.
18667	return getZeroPaddedAdd(DL, A, B, DAG);
18668	}
18669	}
18670
18671	// zext a <--> partial_reduce_umla 0, a, 1
18672	// sext a <--> partial_reduce_smla 0, a, 1
18673	if (InVec.getOpcode() == ISD::ZERO_EXTEND \|\|
18674	InVec.getOpcode() == ISD::SIGN_EXTEND) {
18675	SDValue A = InVec.getOperand(i: `0`);
18676	EVT OpVT = A.getValueType();
18677	if (OpVT.getVectorElementType() != MVT::i8 \|\| !TLI.isTypeLegal(VT: OpVT))
18678	return SDValue ();
18679
18680	MVT ResVT = getQDOTXResultType(OpVT: A.getSimpleValueType());
18681	SDValue B = DAG.getConstant(Val: `0x1`, DL, VT: OpVT);
18682	bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
18683	unsigned Opc =
18684	IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
18685	return DAG.getNode(Opcode: Opc, DL, VT: ResVT, Ops: {DAG.getConstant(Val: `0`, DL, VT: ResVT), A, B});
18686	}
18687
18688	// mul (sext a, sext b) -> partial_reduce_smla 0, a, b
18689	// mul (zext a, zext b) -> partial_reduce_umla 0, a, b
18690	// mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
18691	// mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
18692	if (InVec.getOpcode() != ISD::MUL)
18693	return SDValue ();
18694
18695	SDValue A = InVec.getOperand(i: `0`);
18696	SDValue B = InVec.getOperand(i: `1`);
18697
18698	if (!ISD::isExtOpcode(Opcode: A.getOpcode()))
18699	return SDValue ();
18700
18701	EVT OpVT = A.getOperand(i: `0`).getValueType();
18702	if (OpVT.getVectorElementType() != MVT::i8 \|\|
18703	OpVT != B.getOperand(i: `0`).getValueType() \|\|
18704	!TLI.isTypeLegal(VT: A.getValueType()))
18705	return SDValue ();
18706
18707	unsigned Opc;
18708	if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
18709	Opc = ISD::PARTIAL_REDUCE_SMLA;
18710	else if (A.getOpcode() == ISD::ZERO_EXTEND &&
18711	B.getOpcode() == ISD::ZERO_EXTEND)
18712	Opc = ISD::PARTIAL_REDUCE_UMLA;
18713	else if (A.getOpcode() == ISD::SIGN_EXTEND &&
18714	B.getOpcode() == ISD::ZERO_EXTEND)
18715	Opc = ISD::PARTIAL_REDUCE_SUMLA;
18716	else if (A.getOpcode() == ISD::ZERO_EXTEND &&
18717	B.getOpcode() == ISD::SIGN_EXTEND) {
18718	Opc = ISD::PARTIAL_REDUCE_SUMLA;
18719	std::swap(a&: A, b&: B);
18720	} else
18721	return SDValue ();
18722
18723	MVT ResVT = getQDOTXResultType(OpVT: OpVT.getSimpleVT());
18724	return DAG.getNode(
18725	Opcode: Opc, DL, VT: ResVT,
18726	Ops: {DAG.getConstant(Val: `0`, DL, VT: ResVT), A.getOperand(i: `0`), B.getOperand(i: `0`)});
18727	}
18728
18729	static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG,
18730	const RISCVSubtarget &Subtarget,
18731	const RISCVTargetLowering &TLI) {
18732	if (!Subtarget.hasStdExtZvqdotq())
18733	return SDValue ();
18734
18735	SDLoc DL(N);
18736	EVT VT = N->getValueType(ResNo: `0`);
18737	SDValue InVec = N->getOperand(Num: `0`);
18738	if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
18739	return DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT, Operand: V);
18740	return SDValue ();
18741	}
18742
18743	static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
18744	const RISCVSubtarget &Subtarget,
18745	const RISCVTargetLowering &TLI) {
18746	SDValue InVec = N->getOperand(Num: `0`);
18747	SDValue InVal = N->getOperand(Num: `1`);
18748	SDValue EltNo = N->getOperand(Num: `2`);
18749	SDLoc DL(N);
18750
18751	EVT VT = InVec.getValueType();
18752	if (VT.isScalableVector())
18753	return SDValue ();
18754
18755	if (!InVec.hasOneUse())
18756	return SDValue ();
18757
18758	// Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
18759	// move the insert_vector_elts into the arms of the binop. Note that
18760	// the new RHS must be a constant.
18761	const unsigned InVecOpcode = InVec ->getOpcode();
18762	if (InVecOpcode == InVal ->getOpcode() && TLI.isBinOp(Opcode: InVecOpcode) &&
18763	InVal.hasOneUse()) {
18764	SDValue InVecLHS = InVec ->getOperand(Num: `0`);
18765	SDValue InVecRHS = InVec ->getOperand(Num: `1`);
18766	SDValue InValLHS = InVal ->getOperand(Num: `0`);
18767	SDValue InValRHS = InVal ->getOperand(Num: `1`);
18768
18769	if (!ISD::isBuildVectorOfConstantSDNodes(N: InVecRHS.getNode()))
18770	return SDValue ();
18771	if (!isa<ConstantSDNode>(Val: InValRHS) && !isa<ConstantFPSDNode>(Val: InValRHS))
18772	return SDValue ();
18773	// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
18774	// have different LHS and RHS types.
18775	if (InVec.getOperand(i: `0`).getValueType() != InVec.getOperand(i: `1`).getValueType())
18776	return SDValue ();
18777	SDValue LHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
18778	N1: InVecLHS, N2: InValLHS, N3: EltNo);
18779	SDValue RHS = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT,
18780	N1: InVecRHS, N2: InValRHS, N3: EltNo);
18781	return DAG.getNode(Opcode: InVecOpcode, DL, VT, N1: LHS, N2: RHS);
18782	}
18783
18784	// Given insert_vector_elt (concat_vectors ...), InVal, Elt
18785	// move the insert_vector_elt to the source operand of the concat_vector.
18786	if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
18787	return SDValue ();
18788
18789	auto *IndexC = dyn_cast<ConstantSDNode>(Val&: EltNo);
18790	if (!IndexC)
18791	return SDValue ();
18792	unsigned Elt = IndexC->getZExtValue();
18793
18794	EVT ConcatVT = InVec.getOperand(i: `0`).getValueType();
18795	if (ConcatVT.getVectorElementType() != InVal.getValueType())
18796	return SDValue ();
18797	unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18798	unsigned NewIdx = Elt % ConcatNumElts;
18799
18800	unsigned ConcatOpIdx = Elt / ConcatNumElts;
18801	SDValue ConcatOp = InVec.getOperand(i: ConcatOpIdx);
18802	ConcatOp = DAG.getInsertVectorElt(DL, Vec: ConcatOp, Elt: InVal, Idx: NewIdx);
18803
18804	SmallVector<SDValue> ConcatOps(InVec ->ops());
18805	ConcatOps [ConcatOpIdx] = ConcatOp;
18806	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps);
18807	}
18808
18809	// If we're concatenating a series of vector loads like
18810	// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n2) ...*
18811	// Then we can turn this into a strided load by widening the vector elements
18812	// vlse32 p, stride=n
18813	static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
18814	const RISCVSubtarget &Subtarget,
18815	const RISCVTargetLowering &TLI) {
18816	SDLoc DL(N);
18817	EVT VT = N->getValueType(ResNo: `0`);
18818
18819	// Only perform this combine on legal MVTs.
18820	if (!TLI.isTypeLegal(VT))
18821	return SDValue ();
18822
18823	// TODO: Potentially extend this to scalable vectors
18824	if (VT.isScalableVector())
18825	return SDValue ();
18826
18827	auto *BaseLd = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: `0`));
18828	if (!BaseLd \|\| !BaseLd->isSimple() \|\| !ISD::isNormalLoad(N: BaseLd) \|\|
18829	!SDValue (BaseLd, `0`).hasOneUse())
18830	return SDValue ();
18831
18832	EVT BaseLdVT = BaseLd->getValueType(ResNo: `0`);
18833
18834	// Go through the loads and check that they're strided
18835	SmallVector<LoadSDNode *> Lds;
18836	Lds.push_back(Elt: BaseLd);
18837	Align Align = BaseLd->getAlign();
18838	for (SDValue Op : N->ops().drop_front()) {
18839	auto *Ld = dyn_cast<LoadSDNode>(Val&: Op);
18840	if (!Ld \|\| !Ld->isSimple() \|\| !Op.hasOneUse() \|\|
18841	Ld->getChain() != BaseLd->getChain() \|\| !ISD::isNormalLoad(N: Ld) \|\|
18842	Ld->getValueType(ResNo: `0`) != BaseLdVT)
18843	return SDValue ();
18844
18845	Lds.push_back(Elt: Ld);
18846
18847	// The common alignment is the most restrictive (smallest) of all the loads
18848	Align = std::min(a: Align, b: Ld->getAlign());
18849	}
18850
18851	using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
18852	auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
18853	LoadSDNode *Ld2) -> std::optional<PtrDiff> {
18854	// If the load ptrs can be decomposed into a common (Base + Index) with a
18855	// common constant stride, then return the constant stride.
18856	BaseIndexOffset BIO1 = BaseIndexOffset::match(N: Ld1, DAG);
18857	BaseIndexOffset BIO2 = BaseIndexOffset::match(N: Ld2, DAG);
18858	if (BIO1.equalBaseIndex(Other: BIO2, DAG))
18859	return {{BIO2.getOffset() - BIO1.getOffset(), false}};
18860
18861	// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
18862	SDValue P1 = Ld1->getBasePtr();
18863	SDValue P2 = Ld2->getBasePtr();
18864	if (P2.getOpcode() == ISD::ADD && P2.getOperand(i: `0`) == P1)
18865	return {{P2.getOperand(i: `1`), false}};
18866	if (P1.getOpcode() == ISD::ADD && P1.getOperand(i: `0`) == P2)
18867	return {{P1.getOperand(i: `1`), true}};
18868
18869	return std::nullopt;
18870	};
18871
18872	// Get the distance between the first and second loads
18873	auto BaseDiff = GetPtrDiff (Lds [`0`], Lds [`1`]);
18874	if (!BaseDiff)
18875	return SDValue ();
18876
18877	// Check all the loads are the same distance apart
18878	for (auto *It = Lds.begin() + `1`; It != Lds.end() - `1`; It++)
18879	if (GetPtrDiff (It, std::next(x: It)) != BaseDiff)
18880	return SDValue ();
18881
18882	// TODO: At this point, we've successfully matched a generalized gather
18883	// load. Maybe we should emit that, and then move the specialized
18884	// matchers above and below into a DAG combine?
18885
18886	// Get the widened scalar type, e.g. v4i8 -> i64
18887	unsigned WideScalarBitWidth =
18888	BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
18889	MVT WideScalarVT = MVT::getIntegerVT(BitWidth: WideScalarBitWidth);
18890
18891	// Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
18892	MVT WideVecVT = MVT::getVectorVT(VT: WideScalarVT, NumElements: N->getNumOperands());
18893	if (!TLI.isTypeLegal(VT: WideVecVT))
18894	return SDValue ();
18895
18896	// Check that the operation is legal
18897	if (!TLI.isLegalStridedLoadStore(DataType: WideVecVT, Alignment: Align))
18898	return SDValue ();
18899
18900	auto [StrideVariant, MustNegateStride] = *BaseDiff;
18901	SDValue Stride =
18902	std::holds_alternative<SDValue>(v: StrideVariant)
18903	? std::get<SDValue>(v&: StrideVariant)
18904	: DAG.getSignedConstant(Val: std::get<int64_t>(v&: StrideVariant), DL,
18905	VT: Lds [`0`]->getOffset().getValueType());
18906	if (MustNegateStride)
18907	Stride = DAG.getNegative(Val: Stride, DL, VT: Stride.getValueType());
18908
18909	SDValue AllOneMask =
18910	DAG.getSplat(VT: WideVecVT.changeVectorElementType(EltVT: MVT::i1), DL,
18911	Op: DAG.getConstant(Val: `1`, DL, VT: MVT::i1));
18912
18913	uint64_t MemSize;
18914	if (auto *ConstStride = dyn_cast<ConstantSDNode>(Val&: Stride);
18915	ConstStride && ConstStride->getSExtValue() >= `0`)
18916	// total size = (elsize n) + (stride - elsize) * (n-1)*
18917	// = elsize + stride (n-1)*
18918	MemSize = WideScalarVT.getSizeInBits() +
18919	ConstStride->getSExtValue() * (N->getNumOperands() - `1`);
18920	else
18921	// If Stride isn't constant, then we can't know how much it will load
18922	MemSize = MemoryLocation::UnknownSize;
18923
18924	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
18925	PtrInfo: BaseLd->getPointerInfo(), F: BaseLd->getMemOperand()->getFlags(), Size: MemSize,
18926	BaseAlignment: Align);
18927
18928	SDValue StridedLoad = DAG.getStridedLoadVP(
18929	VT: WideVecVT, DL, Chain: BaseLd->getChain(), Ptr: BaseLd->getBasePtr(), Stride,
18930	Mask: AllOneMask,
18931	EVL: DAG.getConstant(Val: N->getNumOperands(), DL, VT: Subtarget.getXLenVT()), MMO);
18932
18933	for (SDValue Ld : N->ops())
18934	DAG.makeEquivalentMemoryOrdering(OldLoad: cast<LoadSDNode>(Val&: Ld), NewMemOp: StridedLoad);
18935
18936	return DAG.getBitcast(VT: VT.getSimpleVT(), V: StridedLoad);
18937	}
18938
18939	static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG,
18940	const RISCVSubtarget &Subtarget,
18941	const RISCVTargetLowering &TLI) {
18942	SDLoc DL(N);
18943	EVT VT = N->getValueType(ResNo: `0`);
18944	const unsigned ElementSize = VT.getScalarSizeInBits();
18945	const unsigned NumElts = VT.getVectorNumElements();
18946	SDValue V1 = N->getOperand(Num: `0`);
18947	SDValue V2 = N->getOperand(Num: `1`);
18948	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val: N)->getMask();
18949	MVT XLenVT = Subtarget.getXLenVT();
18950
18951	// Recognized a disguised select of add/sub.
18952	bool SwapCC;
18953	if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts: NumElts) &&
18954	matchSelectAddSub(TrueVal: V1, FalseVal: V2, SwapCC)) {
18955	SDValue Sub = SwapCC ? V1 : V2;
18956	SDValue A = Sub.getOperand(i: `0`);
18957	SDValue B = Sub.getOperand(i: `1`);
18958
18959	SmallVector<SDValue> MaskVals;
18960	for (int MaskIndex : Mask) {
18961	bool SelectMaskVal = (MaskIndex < (int)NumElts);
18962	MaskVals.push_back(Elt: DAG.getConstant(Val: SelectMaskVal, DL, VT: XLenVT));
18963	}
18964	assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
18965	EVT MaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1, NumElements: NumElts);
18966	SDValue CC = DAG.getBuildVector(VT: MaskVT, DL, Ops: MaskVals);
18967
18968	// Arrange the select such that we can match a masked
18969	// vrsub.vi to perform the conditional negate
18970	SDValue NegB = DAG.getNegative(Val: B, DL, VT);
18971	if (!SwapCC)
18972	CC = DAG.getLogicalNOT(DL, Val: CC, VT: CC ->getValueType(ResNo: `0`));
18973	SDValue NewB = DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: CC, N2: NegB, N3: B);
18974	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: A, N2: NewB);
18975	}
18976
18977	// Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
18978	// during the combine phase before type legalization, and relies on
18979	// DAGCombine not undoing the transform if isShuffleMaskLegal returns false
18980	// for the source mask.
18981	if (TLI.isTypeLegal(VT) \|\| ElementSize <= Subtarget.getELen() \|\|
18982	!isPowerOf2_64(Value: ElementSize) \|\| VT.getVectorNumElements() % `2` != `0` \|\|
18983	VT.isFloatingPoint() \|\| TLI.isShuffleMaskLegal(M: Mask, VT))
18984	return SDValue ();
18985
18986	SmallVector<int, `8`> NewMask;
18987	narrowShuffleMaskElts(Scale: `2`, Mask, ScaledMask&: NewMask);
18988
18989	LLVMContext &C = *DAG.getContext();
18990	EVT NewEltVT = EVT::getIntegerVT(Context&: C, BitWidth: ElementSize / `2`);
18991	EVT NewVT = EVT::getVectorVT(Context&: C, VT: NewEltVT, NumElements: VT.getVectorNumElements() * `2`);
18992	SDValue Res = DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: DAG.getBitcast(VT: NewVT, V: V1),
18993	N2: DAG.getBitcast(VT: NewVT, V: V2), Mask: NewMask);
18994	return DAG.getBitcast(VT, V: Res);
18995	}
18996
18997	static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
18998	const RISCVSubtarget &Subtarget) {
18999	assert(N->getOpcode() == RISCVISD::ADD_VL \|\| N->getOpcode() == ISD::ADD);
19000
19001	if (N->getValueType(ResNo: `0`).isFixedLengthVector())
19002	return SDValue ();
19003
19004	SDValue Addend = N->getOperand(Num: `0`);
19005	SDValue MulOp = N->getOperand(Num: `1`);
19006
19007	if (N->getOpcode() == RISCVISD::ADD_VL) {
19008	SDValue AddPassthruOp = N->getOperand(Num: `2`);
19009	if (!AddPassthruOp.isUndef())
19010	return SDValue ();
19011	}
19012
19013	auto IsVWMulOpc = [](unsigned Opc) {
19014	switch (Opc) {
19015	case RISCVISD::VWMUL_VL:
19016	case RISCVISD::VWMULU_VL:
19017	case RISCVISD::VWMULSU_VL:
19018	return true;
19019	default:
19020	return false;
19021	}
19022	};
19023
19024	if (!IsVWMulOpc (MulOp.getOpcode()))
19025	std::swap(a&: Addend, b&: MulOp);
19026
19027	if (!IsVWMulOpc (MulOp.getOpcode()))
19028	return SDValue ();
19029
19030	SDValue MulPassthruOp = MulOp.getOperand(i: `2`);
19031
19032	if (!MulPassthruOp.isUndef())
19033	return SDValue ();
19034
19035	auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19036	const RISCVSubtarget &Subtarget) {
19037	if (N->getOpcode() == ISD::ADD) {
19038	SDLoc DL(N);
19039	return getDefaultScalableVLOps(VecVT: N->getSimpleValueType(ResNo: `0`), DL, DAG,
19040	Subtarget);
19041	}
19042	return std::make_pair(x: N->getOperand(Num: `3`), y: N->getOperand(Num: `4`));
19043	}(N, DAG, Subtarget);
19044
19045	SDValue MulMask = MulOp.getOperand(i: `3`);
19046	SDValue MulVL = MulOp.getOperand(i: `4`);
19047
19048	if (AddMask != MulMask \|\| AddVL != MulVL)
19049	return SDValue ();
19050
19051	const auto &TSInfo =
19052	static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19053	unsigned Opc = TSInfo.getMAccOpcode(MulOpcode: MulOp.getOpcode());
19054
19055	SDLoc DL(N);
19056	EVT VT = N->getValueType(ResNo: `0`);
19057	SDValue Ops[] = {MulOp.getOperand(i: `0`), MulOp.getOperand(i: `1`), Addend, AddMask,
19058	AddVL};
19059	return DAG.getNode(Opcode: Opc, DL, VT, Ops);
19060	}
19061
19062	static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG,
19063	const RISCVSubtarget &Subtarget) {
19064
19065	assert(N->getOpcode() == RISCVISD::ADD_VL \|\| N->getOpcode() == ISD::ADD);
19066
19067	if (!N->getValueType(ResNo: `0`).isVector())
19068	return SDValue ();
19069
19070	SDValue Addend = N->getOperand(Num: `0`);
19071	SDValue DotOp = N->getOperand(Num: `1`);
19072
19073	if (N->getOpcode() == RISCVISD::ADD_VL) {
19074	SDValue AddPassthruOp = N->getOperand(Num: `2`);
19075	if (!AddPassthruOp.isUndef())
19076	return SDValue ();
19077	}
19078
19079	auto IsVqdotqOpc = [](unsigned Opc) {
19080	switch (Opc) {
19081	case RISCVISD::VQDOT_VL:
19082	case RISCVISD::VQDOTU_VL:
19083	case RISCVISD::VQDOTSU_VL:
19084	return true;
19085	default:
19086	return false;
19087	}
19088	};
19089
19090	if (!IsVqdotqOpc (DotOp.getOpcode()))
19091	std::swap(a&: Addend, b&: DotOp);
19092
19093	if (!IsVqdotqOpc (DotOp.getOpcode()))
19094	return SDValue ();
19095
19096	auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19097	const RISCVSubtarget &Subtarget) {
19098	if (N->getOpcode() == ISD::ADD) {
19099	SDLoc DL(N);
19100	return getDefaultScalableVLOps(VecVT: N->getSimpleValueType(ResNo: `0`), DL, DAG,
19101	Subtarget);
19102	}
19103	return std::make_pair(x: N->getOperand(Num: `3`), y: N->getOperand(Num: `4`));
19104	}(N, DAG, Subtarget);
19105
19106	SDValue MulVL = DotOp.getOperand(i: `4`);
19107	if (AddVL != MulVL)
19108	return SDValue ();
19109
19110	if (AddMask.getOpcode() != RISCVISD::VMSET_VL \|\|
19111	AddMask.getOperand(i: `0`) != MulVL)
19112	return SDValue ();
19113
19114	SDValue AccumOp = DotOp.getOperand(i: `2`);
19115	SDLoc DL(N);
19116	EVT VT = N->getValueType(ResNo: `0`);
19117	Addend = DAG.getNode(Opcode: RISCVISD::ADD_VL, DL, VT, N1: Addend, N2: AccumOp,
19118	N3: DAG.getUNDEF(VT), N4: AddMask, N5: AddVL);
19119
19120	SDValue Ops[] = {DotOp.getOperand(i: `0`), DotOp.getOperand(i: `1`), Addend,
19121	DotOp.getOperand(i: `3`), DotOp ->getOperand(Num: `4`)};
19122	return DAG.getNode(Opcode: DotOp ->getOpcode(), DL, VT, Ops);
19123	}
19124
19125	static bool
19126	legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
19127	ISD::MemIndexType &IndexType,
19128	RISCVTargetLowering::DAGCombinerInfo &DCI) {
19129	if (!DCI.isBeforeLegalize())
19130	return false;
19131
19132	SelectionDAG &DAG = DCI.DAG;
19133	const MVT XLenVT =
19134	DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19135
19136	const EVT IndexVT = Index.getValueType();
19137
19138	// RISC-V indexed loads only support the "unsigned unscaled" addressing
19139	// mode, so anything else must be manually legalized.
19140	if (!isIndexTypeSigned(IndexType))
19141	return false;
19142
19143	if (IndexVT.getVectorElementType().bitsLT(VT: XLenVT)) {
19144	// Any index legalization should first promote to XLenVT, so we don't lose
19145	// bits when scaling. This may create an illegal index type so we let
19146	// LLVM's legalization take care of the splitting.
19147	// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19148	Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL,
19149	VT: IndexVT.changeVectorElementType(EltVT: XLenVT), Operand: Index);
19150	}
19151	IndexType = ISD::UNSIGNED_SCALED;
19152	return true;
19153	}
19154
19155	/// Match the index vector of a scatter or gather node as the shuffle mask
19156	/// which performs the rearrangement if possible. Will only match if
19157	/// all lanes are touched, and thus replacing the scatter or gather with
19158	/// a unit strided access and shuffle is legal.
19159	static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19160	SmallVector<int> &ShuffleMask) {
19161	if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()))
19162	return false;
19163	if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode()))
19164	return false;
19165
19166	const unsigned ElementSize = VT.getScalarStoreSize();
19167	const unsigned NumElems = VT.getVectorNumElements();
19168
19169	// Create the shuffle mask and check all bits active
19170	assert(ShuffleMask.empty());
19171	BitVector ActiveLanes(NumElems);
19172	for (unsigned i = `0`; i < Index ->getNumOperands(); i++) {
19173	// TODO: We've found an active bit of UB, and could be
19174	// more aggressive here if desired.
19175	if (Index ->getOperand(Num: i)->isUndef())
19176	return false;
19177	uint64_t C = Index ->getConstantOperandVal(Num: i);
19178	if (C % ElementSize != `0`)
19179	return false;
19180	C = C / ElementSize;
19181	if (C >= NumElems)
19182	return false;
19183	ShuffleMask.push_back(Elt: C);
19184	ActiveLanes.set(C);
19185	}
19186	return ActiveLanes.all();
19187	}
19188
19189	/// Match the index of a gather or scatter operation as an operation
19190	/// with twice the element width and half the number of elements. This is
19191	/// generally profitable (if legal) because these operations are linear
19192	/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19193	/// come out ahead.
19194	static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19195	Align BaseAlign, const RISCVSubtarget &ST) {
19196	if (!ISD::isConstantSplatVectorAllOnes(N: Mask.getNode()))
19197	return false;
19198	if (!ISD::isBuildVectorOfConstantSDNodes(N: Index.getNode()))
19199	return false;
19200
19201	// Attempt a doubling. If we can use a element type 4x or 8x in
19202	// size, this will happen via multiply iterations of the transform.
19203	const unsigned NumElems = VT.getVectorNumElements();
19204	if (NumElems % `2` != `0`)
19205	return false;
19206
19207	const unsigned ElementSize = VT.getScalarStoreSize();
19208	const unsigned WiderElementSize = ElementSize * `2`;
19209	if (WiderElementSize > ST.getELen()/`8`)
19210	return false;
19211
19212	if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19213	return false;
19214
19215	for (unsigned i = `0`; i < Index ->getNumOperands(); i++) {
19216	// TODO: We've found an active bit of UB, and could be
19217	// more aggressive here if desired.
19218	if (Index ->getOperand(Num: i)->isUndef())
19219	return false;
19220	// TODO: This offset check is too strict if we support fully
19221	// misaligned memory operations.
19222	uint64_t C = Index ->getConstantOperandVal(Num: i);
19223	if (i % `2` == `0`) {
19224	if (C % WiderElementSize != `0`)
19225	return false;
19226	continue;
19227	}
19228	uint64_t Last = Index ->getConstantOperandVal(Num: i-`1`);
19229	if (C != Last + ElementSize)
19230	return false;
19231	}
19232	return true;
19233	}
19234
19235	// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19236	// This would be benefit for the cases where X and Y are both the same value
19237	// type of low precision vectors. Since the truncate would be lowered into
19238	// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW2->SEW truncate*
19239	// restriction, such pattern would be expanded into a series of "vsetvli"
19240	// and "vnsrl" instructions later to reach this point.
19241	static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) {
19242	SDValue Mask = N->getOperand(Num: `1`);
19243	SDValue VL = N->getOperand(Num: `2`);
19244
19245	bool IsVLMAX = isAllOnesConstant(V: VL) \|\|
19246	(isa<RegisterSDNode>(Val: VL) &&
19247	cast<RegisterSDNode>(Val&: VL)->getReg() == RISCV::X0);
19248	if (!IsVLMAX \|\| Mask.getOpcode() != RISCVISD::VMSET_VL \|\|
19249	Mask.getOperand(i: `0`) != VL)
19250	return SDValue ();
19251
19252	auto IsTruncNode = [&](SDValue V) {
19253	return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19254	V.getOperand(i: `1`) == Mask && V.getOperand(i: `2`) == VL;
19255	};
19256
19257	SDValue Op = N->getOperand(Num: `0`);
19258
19259	// We need to first find the inner level of TRUNCATE_VECTOR_VL node
19260	// to distinguish such pattern.
19261	while (IsTruncNode (Op)) {
19262	if (!Op.hasOneUse())
19263	return SDValue ();
19264	Op = Op.getOperand(i: `0`);
19265	}
19266
19267	if (Op.getOpcode() != ISD::SRA \|\| !Op.hasOneUse())
19268	return SDValue ();
19269
19270	SDValue N0 = Op.getOperand(i: `0`);
19271	SDValue N1 = Op.getOperand(i: `1`);
19272	if (N0.getOpcode() != ISD::SIGN_EXTEND \|\| !N0.hasOneUse() \|\|
19273	N1.getOpcode() != ISD::ZERO_EXTEND \|\| !N1.hasOneUse())
19274	return SDValue ();
19275
19276	SDValue N00 = N0.getOperand(i: `0`);
19277	SDValue N10 = N1.getOperand(i: `0`);
19278	if (!N00.getValueType().isVector() \|\|
19279	N00.getValueType() != N10.getValueType() \|\|
19280	N->getValueType(ResNo: `0`) != N10.getValueType())
19281	return SDValue ();
19282
19283	unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - `1`;
19284	SDValue SMin =
19285	DAG.getNode(Opcode: ISD::SMIN, DL: SDLoc (N1), VT: N->getValueType(ResNo: `0`), N1: N10,
19286	N2: DAG.getConstant(Val: MaxShAmt, DL: SDLoc (N1), VT: N->getValueType(ResNo: `0`)));
19287	return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), N1: N00, N2: SMin);
19288	}
19289
19290	// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19291	// maximum value for the truncated type.
19292	// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19293	// is the signed maximum value for the truncated type and C2 is the signed
19294	// minimum value.
19295	static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
19296	const RISCVSubtarget &Subtarget) {
19297	assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19298
19299	MVT VT = N->getSimpleValueType(ResNo: `0`);
19300
19301	SDValue Mask = N->getOperand(Num: `1`);
19302	SDValue VL = N->getOperand(Num: `2`);
19303
19304	auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19305	APInt &SplatVal) {
19306	if (V.getOpcode() != Opc &&
19307	!(V.getOpcode() == OpcVL && V.getOperand(i: `2`).isUndef() &&
19308	V.getOperand(i: `3`) == Mask && V.getOperand(i: `4`) == VL))
19309	return SDValue ();
19310
19311	SDValue Op = V.getOperand(i: `1`);
19312
19313	// Peek through conversion between fixed and scalable vectors.
19314	if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(i: `0`).isUndef() &&
19315	isNullConstant(V: Op.getOperand(i: `2`)) &&
19316	Op.getOperand(i: `1`).getValueType().isFixedLengthVector() &&
19317	Op.getOperand(i: `1`).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19318	Op.getOperand(i: `1`).getOperand(i: `0`).getValueType() == Op.getValueType() &&
19319	isNullConstant(V: Op.getOperand(i: `1`).getOperand(i: `1`)))
19320	Op = Op.getOperand(i: `1`).getOperand(i: `0`);
19321
19322	if (ISD::isConstantSplatVector(N: Op.getNode(), SplatValue&: SplatVal))
19323	return V.getOperand(i: `0`);
19324
19325	if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(i: `0`).isUndef() &&
19326	Op.getOperand(i: `2`) == VL) {
19327	if (auto *Op1 = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))) {
19328	SplatVal =
19329	Op1->getAPIntValue().sextOrTrunc(width: Op.getScalarValueSizeInBits());
19330	return V.getOperand(i: `0`);
19331	}
19332	}
19333
19334	return SDValue ();
19335	};
19336
19337	SDLoc DL(N);
19338
19339	auto DetectUSatPattern = [&](SDValue V) {
19340	APInt LoC, HiC;
19341
19342	// Simple case, V is a UMIN.
19343	if (SDValue UMinOp = MatchMinMax (V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19344	if (HiC.isMask(numBits: VT.getScalarSizeInBits()))
19345	return UMinOp;
19346
19347	// If we have an SMAX that removes negative numbers first, then we can match
19348	// SMIN instead of UMIN.
19349	if (SDValue SMinOp = MatchMinMax (V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19350	if (SDValue SMaxOp =
19351	MatchMinMax (SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19352	if (LoC.isNonNegative() && HiC.isMask(numBits: VT.getScalarSizeInBits()))
19353	return SMinOp;
19354
19355	// If we have an SMIN before an SMAX and the SMAX constant is less than or
19356	// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19357	// first.
19358	if (SDValue SMaxOp = MatchMinMax (V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19359	if (SDValue SMinOp =
19360	MatchMinMax (SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19361	if (LoC.isNonNegative() && HiC.isMask(numBits: VT.getScalarSizeInBits()) &&
19362	HiC.uge(RHS: LoC))
19363	return DAG.getNode(Opcode: RISCVISD::SMAX_VL, DL, VT: V.getValueType(), N1: SMinOp,
19364	N2: V.getOperand(i: `1`), N3: DAG.getUNDEF(VT: V.getValueType()),
19365	N4: Mask, N5: VL);
19366
19367	return SDValue ();
19368	};
19369
19370	auto DetectSSatPattern = [&](SDValue V) {
19371	unsigned NumDstBits = VT.getScalarSizeInBits();
19372	unsigned NumSrcBits = V.getScalarValueSizeInBits();
19373	APInt SignedMax = APInt::getSignedMaxValue(numBits: NumDstBits).sext(width: NumSrcBits);
19374	APInt SignedMin = APInt::getSignedMinValue(numBits: NumDstBits).sext(width: NumSrcBits);
19375
19376	APInt HiC, LoC;
19377	if (SDValue SMinOp = MatchMinMax (V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19378	if (SDValue SMaxOp =
19379	MatchMinMax (SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19380	if (HiC == SignedMax && LoC == SignedMin)
19381	return SMaxOp;
19382
19383	if (SDValue SMaxOp = MatchMinMax (V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19384	if (SDValue SMinOp =
19385	MatchMinMax (SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19386	if (HiC == SignedMax && LoC == SignedMin)
19387	return SMinOp;
19388
19389	return SDValue ();
19390	};
19391
19392	SDValue Src = N->getOperand(Num: `0`);
19393
19394	// Look through multiple layers of truncates.
19395	while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19396	Src.getOperand(i: `1`) == Mask && Src.getOperand(i: `2`) == VL &&
19397	Src.hasOneUse())
19398	Src = Src.getOperand(i: `0`);
19399
19400	SDValue Val;
19401	unsigned ClipOpc;
19402	if ((Val = DetectUSatPattern (Src)))
19403	ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19404	else if ((Val = DetectSSatPattern (Src)))
19405	ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19406	else
19407	return SDValue ();
19408
19409	MVT ValVT = Val.getSimpleValueType();
19410
19411	do {
19412	MVT ValEltVT = MVT::getIntegerVT(BitWidth: ValVT.getScalarSizeInBits() / `2`);
19413	ValVT = ValVT.changeVectorElementType(EltVT: ValEltVT);
19414	Val = DAG.getNode(Opcode: ClipOpc, DL, VT: ValVT, N1: Val, N2: Mask, N3: VL);
19415	} while (ValVT != VT);
19416
19417	return Val;
19418	}
19419
19420	// Convert
19421	// (iX ctpop (bitcast (vXi1 A)))
19422	// ->
19423	// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19424	// and
19425	// (iN reduce.add (zext (vXi1 A to vXiN))
19426	// ->
19427	// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19428	// FIXME: It's complicated to match all the variations of this after type
19429	// legalization so we only handle the pre-type legalization pattern, but that
19430	// requires the fixed vector type to be legal.
19431	static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
19432	const RISCVSubtarget &Subtarget) {
19433	unsigned Opc = N->getOpcode();
19434	assert((Opc == ISD::CTPOP \|\| Opc == ISD::VECREDUCE_ADD) &&
19435	"Unexpected opcode");
19436	EVT VT = N->getValueType(ResNo: `0`);
19437	if (!VT.isScalarInteger())
19438	return SDValue ();
19439
19440	SDValue Src = N->getOperand(Num: `0`);
19441
19442	if (Opc == ISD::CTPOP) {
19443	// Peek through zero_extend. It doesn't change the count.
19444	if (Src.getOpcode() == ISD::ZERO_EXTEND)
19445	Src = Src.getOperand(i: `0`);
19446
19447	if (Src.getOpcode() != ISD::BITCAST)
19448	return SDValue ();
19449	Src = Src.getOperand(i: `0`);
19450	} else if (Opc == ISD::VECREDUCE_ADD) {
19451	if (Src.getOpcode() != ISD::ZERO_EXTEND)
19452	return SDValue ();
19453	Src = Src.getOperand(i: `0`);
19454	}
19455
19456	EVT SrcEVT = Src.getValueType();
19457	if (!SrcEVT.isSimple())
19458	return SDValue ();
19459
19460	MVT SrcMVT = SrcEVT.getSimpleVT();
19461	// Make sure the input is an i1 vector.
19462	if (!SrcMVT.isVector() \|\| SrcMVT.getVectorElementType() != MVT::i1)
19463	return SDValue ();
19464
19465	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19466	if (!TLI.isTypeLegal(VT: SrcMVT))
19467	return SDValue ();
19468
19469	// Check that destination type is large enough to hold result without
19470	// overflow.
19471	if (Opc == ISD::VECREDUCE_ADD) {
19472	unsigned EltSize = SrcMVT.getScalarSizeInBits();
19473	unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19474	unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19475	unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19476	? SrcMVT.getVectorNumElements()
19477	: RISCVTargetLowering::computeVLMAX(
19478	VectorBits: VectorBitsMax, EltSize, MinSize);
19479	if (VT.getFixedSizeInBits() < Log2_32(Value: MaxVLMAX) + `1`)
19480	return SDValue ();
19481	}
19482
19483	MVT ContainerVT = SrcMVT;
19484	if (SrcMVT.isFixedLengthVector()) {
19485	ContainerVT = getContainerForFixedLengthVector(DAG, VT: SrcMVT, Subtarget);
19486	Src = convertToScalableVector(VT: ContainerVT, V: Src, DAG, Subtarget);
19487	}
19488
19489	SDLoc DL(N);
19490	auto [Mask, VL] = getDefaultVLOps(VecVT: SrcMVT, ContainerVT, DL, DAG, Subtarget);
19491
19492	MVT XLenVT = Subtarget.getXLenVT();
19493	SDValue Pop = DAG.getNode(Opcode: RISCVISD::VCPOP_VL, DL, VT: XLenVT, N1: Src, N2: Mask, N3: VL);
19494	return DAG.getZExtOrTrunc(Op: Pop, DL, VT);
19495	}
19496
19497	static SDValue performSHLCombine(SDNode *N,
19498	TargetLowering::DAGCombinerInfo &DCI,
19499	const RISCVSubtarget &Subtarget) {
19500	// (shl (zext x), y) -> (vwsll x, y)
19501	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19502	return V;
19503
19504	// (shl (sext x), C) -> (vwmulsu x, 1u << C)
19505	// (shl (zext x), C) -> (vwmulu x, 1u << C)
19506
19507	if (!DCI.isAfterLegalizeDAG())
19508	return SDValue ();
19509
19510	SDValue LHS = N->getOperand(Num: `0`);
19511	if (!LHS.hasOneUse())
19512	return SDValue ();
19513	unsigned Opcode;
19514	switch (LHS.getOpcode()) {
19515	case ISD::SIGN_EXTEND:
19516	case RISCVISD::VSEXT_VL:
19517	Opcode = RISCVISD::VWMULSU_VL;
19518	break;
19519	case ISD::ZERO_EXTEND:
19520	case RISCVISD::VZEXT_VL:
19521	Opcode = RISCVISD::VWMULU_VL;
19522	break;
19523	default:
19524	return SDValue ();
19525	}
19526
19527	SDValue RHS = N->getOperand(Num: `1`);
19528	APInt ShAmt;
19529	uint64_t ShAmtInt;
19530	if (ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: ShAmt))
19531	ShAmtInt = ShAmt.getZExtValue();
19532	else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
19533	RHS.getOperand(i: `1`).getOpcode() == ISD::Constant)
19534	ShAmtInt = RHS.getConstantOperandVal(i: `1`);
19535	else
19536	return SDValue ();
19537
19538	// Better foldings:
19539	// (shl (sext x), 1) -> (vwadd x, x)
19540	// (shl (zext x), 1) -> (vwaddu x, x)
19541	if (ShAmtInt <= `1`)
19542	return SDValue ();
19543
19544	SDValue NarrowOp = LHS.getOperand(i: `0`);
19545	MVT NarrowVT = NarrowOp.getSimpleValueType();
19546	uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
19547	if (ShAmtInt >= NarrowBits)
19548	return SDValue ();
19549	MVT VT = N->getSimpleValueType(ResNo: `0`);
19550	if (NarrowBits * `2` != VT.getScalarSizeInBits())
19551	return SDValue ();
19552
19553	SelectionDAG &DAG = DCI.DAG;
19554	SDLoc DL(N);
19555	SDValue Passthru, Mask, VL;
19556	switch (N->getOpcode()) {
19557	case ISD::SHL:
19558	Passthru = DAG.getUNDEF(VT);
19559	std::tie(args&: Mask, args&: VL) = getDefaultScalableVLOps(VecVT: VT, DL, DAG, Subtarget);
19560	break;
19561	case RISCVISD::SHL_VL:
19562	Passthru = N->getOperand(Num: `2`);
19563	Mask = N->getOperand(Num: `3`);
19564	VL = N->getOperand(Num: `4`);
19565	break;
19566	default:
19567	llvm_unreachable("Expected SHL");
19568	}
19569	return DAG.getNode(Opcode, DL, VT, N1: NarrowOp,
19570	N2: DAG.getConstant(Val: `1ULL` << ShAmtInt, DL: SDLoc (RHS), VT: NarrowVT),
19571	N3: Passthru, N4: Mask, N5: VL);
19572	}
19573
19574	SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
19575	DAGCombinerInfo &DCI) const {
19576	SelectionDAG &DAG = DCI.DAG;
19577	const MVT XLenVT = Subtarget.getXLenVT();
19578	SDLoc DL(N);
19579
19580	// Helper to call SimplifyDemandedBits on an operand of N where only some low
19581	// bits are demanded. N will be added to the Worklist if it was not deleted.
19582	// Caller should return SDValue(N, 0) if this returns true.
19583	auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
19584	SDValue Op = N->getOperand(Num: OpNo);
19585	APInt Mask = APInt::getLowBitsSet(numBits: Op.getValueSizeInBits(), loBitsSet: LowBits);
19586	if (!SimplifyDemandedBits(Op, DemandedBits: Mask, DCI))
19587	return false;
19588
19589	if (N->getOpcode() != ISD::DELETED_NODE)
19590	DCI.AddToWorklist(N);
19591	return true;
19592	};
19593
19594	switch (N->getOpcode()) {
19595	default:
19596	break;
19597	case RISCVISD::SplitF64: {
19598	SDValue Op0 = N->getOperand(Num: `0`);
19599	// If the input to SplitF64 is just BuildPairF64 then the operation is
19600	// redundant. Instead, use BuildPairF64's operands directly.
19601	if (Op0 ->getOpcode() == RISCVISD::BuildPairF64)
19602	return DCI.CombineTo(N, Res0: Op0.getOperand(i: `0`), Res1: Op0.getOperand(i: `1`));
19603
19604	if (Op0 ->isUndef()) {
19605	SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
19606	SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
19607	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
19608	}
19609
19610	// It's cheaper to materialise two 32-bit integers than to load a double
19611	// from the constant pool and transfer it to integer registers through the
19612	// stack.
19613	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
19614	APInt V = C->getValueAPF().bitcastToAPInt();
19615	SDValue Lo = DAG.getConstant(Val: V.trunc(width: `32`), DL, VT: MVT::i32);
19616	SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: `32`).trunc(width: `32`), DL, VT: MVT::i32);
19617	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
19618	}
19619
19620	// This is a target-specific version of a DAGCombine performed in
19621	// DAGCombiner::visitBITCAST. It performs the equivalent of:
19622	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
19623	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
19624	if (!(Op0.getOpcode() == ISD::FNEG \|\| Op0.getOpcode() == ISD::FABS) \|\|
19625	!Op0.getNode()->hasOneUse() \|\| Subtarget.hasStdExtZdinx())
19626	break;
19627	SDValue NewSplitF64 =
19628	DAG.getNode(Opcode: RISCVISD::SplitF64, DL, VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32),
19629	N: Op0.getOperand(i: `0`));
19630	SDValue Lo = NewSplitF64.getValue(R: `0`);
19631	SDValue Hi = NewSplitF64.getValue(R: `1`);
19632	APInt SignBit = APInt::getSignMask(BitWidth: `32`);
19633	if (Op0.getOpcode() == ISD::FNEG) {
19634	SDValue NewHi = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Hi,
19635	N2: DAG.getConstant(Val: SignBit, DL, VT: MVT::i32));
19636	return DCI.CombineTo(N, Res0: Lo, Res1: NewHi);
19637	}
19638	assert(Op0.getOpcode() == ISD::FABS);
19639	SDValue NewHi = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: Hi,
19640	N2: DAG.getConstant(Val: ~SignBit, DL, VT: MVT::i32));
19641	return DCI.CombineTo(N, Res0: Lo, Res1: NewHi);
19642	}
19643	case RISCVISD::SLLW:
19644	case RISCVISD::SRAW:
19645	case RISCVISD::SRLW:
19646	case RISCVISD::RORW:
19647	case RISCVISD::ROLW: {
19648	// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
19649	if (SimplifyDemandedLowBitsHelper (`0`, `32`) \|\|
19650	SimplifyDemandedLowBitsHelper (`1`, `5`))
19651	return SDValue (N, `0`);
19652
19653	break;
19654	}
19655	case RISCVISD::CLZW:
19656	case RISCVISD::CTZW: {
19657	// Only the lower 32 bits of the first operand are read
19658	if (SimplifyDemandedLowBitsHelper (`0`, `32`))
19659	return SDValue (N, `0`);
19660	break;
19661	}
19662	case RISCVISD::FMV_W_X_RV64: {
19663	// If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
19664	// conversion is unnecessary and can be replaced with the
19665	// FMV_X_ANYEXTW_RV64 operand.
19666	SDValue Op0 = N->getOperand(Num: `0`);
19667	if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
19668	return Op0.getOperand(i: `0`);
19669	break;
19670	}
19671	case RISCVISD::FMV_X_ANYEXTH:
19672	case RISCVISD::FMV_X_ANYEXTW_RV64: {
19673	SDLoc DL(N);
19674	SDValue Op0 = N->getOperand(Num: `0`);
19675	MVT VT = N->getSimpleValueType(ResNo: `0`);
19676
19677	// Constant fold.
19678	if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
19679	APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(width: VT.getSizeInBits());
19680	return DAG.getConstant(Val, DL, VT);
19681	}
19682
19683	// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
19684	// conversion is unnecessary and can be replaced with the FMV_W_X_RV64
19685	// operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
19686	if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
19687	Op0 ->getOpcode() == RISCVISD::FMV_W_X_RV64) \|\|
19688	(N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
19689	Op0 ->getOpcode() == RISCVISD::FMV_H_X)) {
19690	assert(Op0.getOperand(`0`).getValueType() == VT &&
19691	"Unexpected value type!");
19692	return Op0.getOperand(i: `0`);
19693	}
19694
19695	if (ISD::isNormalLoad(N: Op0.getNode()) && Op0.hasOneUse() &&
19696	cast<LoadSDNode>(Val&: Op0)->isSimple()) {
19697	MVT IVT = MVT::getIntegerVT(BitWidth: Op0.getValueSizeInBits());
19698	auto *LN0 = cast<LoadSDNode>(Val&: Op0);
19699	SDValue Load =
19700	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SDLoc (N), VT, Chain: LN0->getChain(),
19701	Ptr: LN0->getBasePtr(), MemVT: IVT, MMO: LN0->getMemOperand());
19702	DAG.ReplaceAllUsesOfValueWith(From: Op0.getValue(R: `1`), To: Load.getValue(R: `1`));
19703	return Load;
19704	}
19705
19706	// This is a target-specific version of a DAGCombine performed in
19707	// DAGCombiner::visitBITCAST. It performs the equivalent of:
19708	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
19709	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
19710	if (!(Op0.getOpcode() == ISD::FNEG \|\| Op0.getOpcode() == ISD::FABS) \|\|
19711	!Op0.getNode()->hasOneUse())
19712	break;
19713	SDValue NewFMV = DAG.getNode(Opcode: N->getOpcode(), DL, VT, Operand: Op0.getOperand(i: `0`));
19714	unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? `32` : `16`;
19715	APInt SignBit = APInt::getSignMask(BitWidth: FPBits).sext(width: VT.getSizeInBits());
19716	if (Op0.getOpcode() == ISD::FNEG)
19717	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewFMV,
19718	N2: DAG.getConstant(Val: SignBit, DL, VT));
19719
19720	assert(Op0.getOpcode() == ISD::FABS);
19721	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: NewFMV,
19722	N2: DAG.getConstant(Val: ~SignBit, DL, VT));
19723	}
19724	case ISD::ABS: {
19725	EVT VT = N->getValueType(ResNo: `0`);
19726	SDValue N0 = N->getOperand(Num: `0`);
19727	// abs (sext) -> zext (abs)
19728	// abs (zext) -> zext (handled elsewhere)
19729	if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
19730	SDValue Src = N0.getOperand(i: `0`);
19731	SDLoc DL(N);
19732	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT,
19733	Operand: DAG.getNode(Opcode: ISD::ABS, DL, VT: Src.getValueType(), Operand: Src));
19734	}
19735	break;
19736	}
19737	case ISD::ADD: {
19738	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19739	return V;
19740	if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
19741	return V;
19742	if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
19743	return V;
19744	return performADDCombine(N, DCI, Subtarget);
19745	}
19746	case ISD::SUB: {
19747	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19748	return V;
19749	return performSUBCombine(N, DAG, Subtarget);
19750	}
19751	case ISD::AND:
19752	return performANDCombine(N, DCI, Subtarget);
19753	case ISD::OR: {
19754	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19755	return V;
19756	return performORCombine(N, DCI, Subtarget);
19757	}
19758	case ISD::XOR:
19759	return performXORCombine(N, DAG, Subtarget);
19760	case ISD::MUL:
19761	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19762	return V;
19763	return performMULCombine(N, DAG, DCI, Subtarget);
19764	case ISD::SDIV:
19765	case ISD::UDIV:
19766	case ISD::SREM:
19767	case ISD::UREM:
19768	if (SDValue V = combineBinOpOfZExt(N, DAG))
19769	return V;
19770	break;
19771	case ISD::FMUL: {
19772	// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
19773	SDValue N0 = N->getOperand(Num: `0`);
19774	SDValue N1 = N->getOperand(Num: `1`);
19775	if (N0 ->getOpcode() != ISD::FCOPYSIGN)
19776	std::swap(a&: N0, b&: N1);
19777	if (N0 ->getOpcode() != ISD::FCOPYSIGN)
19778	return SDValue ();
19779	ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val: N0 ->getOperand(Num: `0`));
19780	if (!C \|\| !C->getValueAPF().isExactlyValue(V: +`1.0`))
19781	return SDValue ();
19782	EVT VT = N->getValueType(ResNo: `0`);
19783	if (VT.isVector() \|\| !isOperationLegal(Op: ISD::FCOPYSIGN, VT))
19784	return SDValue ();
19785	SDValue Sign = N0 ->getOperand(Num: `1`);
19786	if (Sign.getValueType() != VT)
19787	return SDValue ();
19788	return DAG.getNode(Opcode: RISCVISD::FSGNJX, DL: SDLoc (N), VT, N1, N2: N0 ->getOperand(Num: `1`));
19789	}
19790	case ISD::FADD:
19791	case ISD::UMAX:
19792	case ISD::UMIN:
19793	case ISD::SMAX:
19794	case ISD::SMIN:
19795	case ISD::FMAXNUM:
19796	case ISD::FMINNUM: {
19797	if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
19798	return V;
19799	if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
19800	return V;
19801	return SDValue ();
19802	}
19803	case ISD::SETCC:
19804	return performSETCCCombine(N, DAG, Subtarget);
19805	case ISD::SIGN_EXTEND_INREG:
19806	return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
19807	case ISD::ZERO_EXTEND:
19808	// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
19809	// type legalization. This is safe because fp_to_uint produces poison if
19810	// it overflows.
19811	if (N->getValueType(ResNo: `0`) == MVT::i64 && Subtarget.is64Bit()) {
19812	SDValue Src = N->getOperand(Num: `0`);
19813	if (Src.getOpcode() == ISD::FP_TO_UINT &&
19814	isTypeLegal(VT: Src.getOperand(i: `0`).getValueType()))
19815	return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL: SDLoc (N), VT: MVT::i64,
19816	Operand: Src.getOperand(i: `0`));
19817	if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
19818	isTypeLegal(VT: Src.getOperand(i: `1`).getValueType())) {
19819	SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other);
19820	SDValue Res = DAG.getNode(Opcode: ISD::STRICT_FP_TO_UINT, DL: SDLoc (N), VTList: VTs,
19821	N1: Src.getOperand(i: `0`), N2: Src.getOperand(i: `1`));
19822	DCI.CombineTo(N, Res);
19823	DAG.ReplaceAllUsesOfValueWith(From: Src.getValue(R: `1`), To: Res.getValue(R: `1`));
19824	DCI.recursivelyDeleteUnusedNodes(N: Src.getNode());
19825	return SDValue (N, `0`); // Return N so it doesn't get rechecked.
19826	}
19827	}
19828	return SDValue ();
19829	case RISCVISD::TRUNCATE_VECTOR_VL:
19830	if (SDValue V = combineTruncOfSraSext(N, DAG))
19831	return V;
19832	return combineTruncToVnclip(N, DAG, Subtarget);
19833	case ISD::VP_TRUNCATE:
19834	return performVP_TRUNCATECombine(N, DAG, Subtarget);
19835	case ISD::TRUNCATE:
19836	return performTRUNCATECombine(N, DAG, Subtarget);
19837	case ISD::SELECT:
19838	return performSELECTCombine(N, DAG, Subtarget);
19839	case ISD::VSELECT:
19840	return performVSELECTCombine(N, DAG);
19841	case RISCVISD::CZERO_EQZ:
19842	case RISCVISD::CZERO_NEZ: {
19843	SDValue Val = N->getOperand(Num: `0`);
19844	SDValue Cond = N->getOperand(Num: `1`);
19845
19846	unsigned Opc = N->getOpcode();
19847
19848	// czero_eqz x, x -> x
19849	if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
19850	return Val;
19851
19852	unsigned InvOpc =
19853	Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
19854
19855	// czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
19856	// czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
19857	if (Cond.getOpcode() == ISD::XOR && isOneConstant(V: Cond.getOperand(i: `1`))) {
19858	SDValue NewCond = Cond.getOperand(i: `0`);
19859	APInt Mask = APInt::getBitsSetFrom(numBits: NewCond.getValueSizeInBits(), loBit: `1`);
19860	if (DAG.MaskedValueIsZero(Op: NewCond, Mask))
19861	return DAG.getNode(Opcode: InvOpc, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), N1: Val, N2: NewCond);
19862	}
19863	// czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
19864	// czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
19865	// czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
19866	// czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
19867	if (Cond.getOpcode() == ISD::SETCC && isNullConstant(V: Cond.getOperand(i: `1`))) {
19868	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: Cond.getOperand(i: `2`))->get();
19869	if (ISD::isIntEqualitySetCC(Code: CCVal))
19870	return DAG.getNode(Opcode: CCVal == ISD::SETNE ? Opc : InvOpc, DL: SDLoc (N),
19871	VT: N->getValueType(ResNo: `0`), N1: Val, N2: Cond.getOperand(i: `0`));
19872	}
19873	return SDValue ();
19874	}
19875	case RISCVISD::SELECT_CC: {
19876	// Transform
19877	SDValue LHS = N->getOperand(Num: `0`);
19878	SDValue RHS = N->getOperand(Num: `1`);
19879	SDValue CC = N->getOperand(Num: `2`);
19880	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
19881	SDValue TrueV = N->getOperand(Num: `3`);
19882	SDValue FalseV = N->getOperand(Num: `4`);
19883	SDLoc DL(N);
19884	EVT VT = N->getValueType(ResNo: `0`);
19885
19886	// If the True and False values are the same, we don't need a select_cc.
19887	if (TrueV == FalseV)
19888	return TrueV;
19889
19890	// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
19891	// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
19892	if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(Val: TrueV) &&
19893	isa<ConstantSDNode>(Val: FalseV) && isNullConstant(V: RHS) &&
19894	(CCVal == ISD::CondCode::SETLT \|\| CCVal == ISD::CondCode::SETGE)) {
19895	if (CCVal == ISD::CondCode::SETGE)
19896	std::swap(a&: TrueV, b&: FalseV);
19897
19898	int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
19899	int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
19900	// Only handle simm12, if it is not in this range, it can be considered as
19901	// register.
19902	if (isInt<`12`>(x: TrueSImm) && isInt<`12`>(x: FalseSImm) &&
19903	isInt<`12`>(x: TrueSImm - FalseSImm)) {
19904	SDValue SRA =
19905	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
19906	N2: DAG.getConstant(Val: Subtarget.getXLen() - `1`, DL, VT));
19907	SDValue AND =
19908	DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
19909	N2: DAG.getSignedConstant(Val: TrueSImm - FalseSImm, DL, VT));
19910	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
19911	}
19912
19913	if (CCVal == ISD::CondCode::SETGE)
19914	std::swap(a&: TrueV, b&: FalseV);
19915	}
19916
19917	if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
19918	return DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT: N->getValueType(ResNo: `0`),
19919	Ops: {LHS, RHS, CC, TrueV, FalseV});
19920
19921	if (!Subtarget.hasConditionalMoveFusion()) {
19922	// (select c, -1, y) -> -c \| y
19923	if (isAllOnesConstant(V: TrueV)) {
19924	SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal);
19925	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
19926	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: FalseV);
19927	}
19928	// (select c, y, -1) -> -!c \| y
19929	if (isAllOnesConstant(V: FalseV)) {
19930	SDValue C =
19931	DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT));
19932	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
19933	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: TrueV);
19934	}
19935
19936	// (select c, 0, y) -> -!c & y
19937	if (isNullConstant(V: TrueV)) {
19938	SDValue C =
19939	DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::getSetCCInverse(Operation: CCVal, Type: VT));
19940	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
19941	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: FalseV);
19942	}
19943	// (select c, y, 0) -> -c & y
19944	if (isNullConstant(V: FalseV)) {
19945	SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: CCVal);
19946	SDValue Neg = DAG.getNegative(Val: C, DL, VT);
19947	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: TrueV);
19948	}
19949	// (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
19950	// (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
19951	if (((isOneConstant(V: FalseV) && LHS == TrueV &&
19952	CCVal == ISD::CondCode::SETNE) \|\|
19953	(isOneConstant(V: TrueV) && LHS == FalseV &&
19954	CCVal == ISD::CondCode::SETEQ)) &&
19955	isNullConstant(V: RHS)) {
19956	// freeze it to be safe.
19957	LHS = DAG.getFreeze(V: LHS);
19958	SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, Cond: ISD::CondCode::SETEQ);
19959	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: LHS, N2: C);
19960	}
19961	}
19962
19963	// If both true/false are an xor with 1, pull through the select.
19964	// This can occur after op legalization if both operands are setccs that
19965	// require an xor to invert.
19966	// FIXME: Generalize to other binary ops with identical operand?
19967	if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
19968	TrueV.getOperand(i: `1`) == FalseV.getOperand(i: `1`) &&
19969	isOneConstant(V: TrueV.getOperand(i: `1`)) &&
19970	TrueV.hasOneUse() && FalseV.hasOneUse()) {
19971	SDValue NewSel = DAG.getNode(Opcode: RISCVISD::SELECT_CC, DL, VT, N1: LHS, N2: RHS, N3: CC,
19972	N4: TrueV.getOperand(i: `0`), N5: FalseV.getOperand(i: `0`));
19973	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: NewSel, N2: TrueV.getOperand(i: `1`));
19974	}
19975
19976	return SDValue ();
19977	}
19978	case RISCVISD::BR_CC: {
19979	SDValue LHS = N->getOperand(Num: `1`);
19980	SDValue RHS = N->getOperand(Num: `2`);
19981	SDValue CC = N->getOperand(Num: `3`);
19982	SDLoc DL(N);
19983
19984	if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
19985	return DAG.getNode(Opcode: RISCVISD::BR_CC, DL, VT: N->getValueType(ResNo: `0`),
19986	N1: N->getOperand(Num: `0`), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: `4`));
19987
19988	return SDValue ();
19989	}
19990	case ISD::BITREVERSE:
19991	return performBITREVERSECombine(N, DAG, Subtarget);
19992	case ISD::FP_TO_SINT:
19993	case ISD::FP_TO_UINT:
19994	return performFP_TO_INTCombine(N, DCI, Subtarget);
19995	case ISD::FP_TO_SINT_SAT:
19996	case ISD::FP_TO_UINT_SAT:
19997	return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
19998	case ISD::FCOPYSIGN: {
19999	EVT VT = N->getValueType(ResNo: `0`);
20000	if (!VT.isVector())
20001	break;
20002	// There is a form of VFSGNJ which injects the negated sign of its second
20003	// operand. Try and bubble any FNEG up after the extend/round to produce
20004	// this optimized pattern. Avoid modifying cases where FP_ROUND and
20005	// TRUNC=1.
20006	SDValue In2 = N->getOperand(Num: `1`);
20007	// Avoid cases where the extend/round has multiple uses, as duplicating
20008	// those is typically more expensive than removing a fneg.
20009	if (!In2.hasOneUse())
20010	break;
20011	if (In2.getOpcode() != ISD::FP_EXTEND &&
20012	(In2.getOpcode() != ISD::FP_ROUND \|\| In2.getConstantOperandVal(i: `1`) != `0`))
20013	break;
20014	In2 = In2.getOperand(i: `0`);
20015	if (In2.getOpcode() != ISD::FNEG)
20016	break;
20017	SDLoc DL(N);
20018	SDValue NewFPExtRound = DAG.getFPExtendOrRound(Op: In2.getOperand(i: `0`), DL, VT);
20019	return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL, VT, N1: N->getOperand(Num: `0`),
20020	N2: DAG.getNode(Opcode: ISD::FNEG, DL, VT, Operand: NewFPExtRound));
20021	}
20022	case ISD::MGATHER: {
20023	const auto *MGN = cast<MaskedGatherSDNode>(Val: N);
20024	const EVT VT = N->getValueType(ResNo: `0`);
20025	SDValue Index = MGN->getIndex();
20026	SDValue ScaleOp = MGN->getScale();
20027	ISD::MemIndexType IndexType = MGN->getIndexType();
20028	assert(!MGN->isIndexScaled() &&
20029	"Scaled gather/scatter should not be formed");
20030
20031	SDLoc DL(N);
20032	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20033	return DAG.getMaskedGather(
20034	VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL,
20035	Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20036	MGN->getBasePtr(), Index, ScaleOp},
20037	MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType());
20038
20039	if (narrowIndex(N&: Index, IndexType, DAG))
20040	return DAG.getMaskedGather(
20041	VTs: N->getVTList(), MemVT: MGN->getMemoryVT(), dl: DL,
20042	Ops: {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20043	MGN->getBasePtr(), Index, ScaleOp},
20044	MMO: MGN->getMemOperand(), IndexType, ExtTy: MGN->getExtensionType());
20045
20046	if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20047	MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20048	// The sequence will be XLenVT, not the type of Index. Tell
20049	// isSimpleVIDSequence this so we avoid overflow.
20050	if (std::optional<VIDSequence> SimpleVID =
20051	isSimpleVIDSequence(Op: Index, EltSizeInBits: Subtarget.getXLen());
20052	SimpleVID && SimpleVID ->StepDenominator == `1`) {
20053	const int64_t StepNumerator = SimpleVID ->StepNumerator;
20054	const int64_t Addend = SimpleVID ->Addend;
20055
20056	// Note: We don't need to check alignment here since (by assumption
20057	// from the existence of the gather), our offsets must be sufficiently
20058	// aligned.
20059
20060	const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
20061	assert(MGN->getBasePtr()->getValueType(`0`) == PtrVT);
20062	assert(IndexType == ISD::UNSIGNED_SCALED);
20063	SDValue BasePtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: MGN->getBasePtr(),
20064	N2: DAG.getSignedConstant(Val: Addend, DL, VT: PtrVT));
20065
20066	SDValue EVL = DAG.getElementCount(DL, VT: Subtarget.getXLenVT(),
20067	EC: VT.getVectorElementCount());
20068	SDValue StridedLoad = DAG.getStridedLoadVP(
20069	VT, DL, Chain: MGN->getChain(), Ptr: BasePtr,
20070	Stride: DAG.getSignedConstant(Val: StepNumerator, DL, VT: XLenVT), Mask: MGN->getMask(),
20071	EVL, MMO: MGN->getMemOperand());
20072	SDValue VPSelect = DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT, N1: MGN->getMask(),
20073	N2: StridedLoad, N3: MGN->getPassThru(), N4: EVL);
20074	return DAG.getMergeValues(Ops: {VPSelect, SDValue (StridedLoad.getNode(), `1`)},
20075	dl: DL);
20076	}
20077	}
20078
20079	SmallVector<int> ShuffleMask;
20080	if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20081	matchIndexAsShuffle(VT, Index, Mask: MGN->getMask(), ShuffleMask)) {
20082	SDValue Load = DAG.getMaskedLoad(VT, dl: DL, Chain: MGN->getChain(),
20083	Base: MGN->getBasePtr(), Offset: DAG.getUNDEF(VT: XLenVT),
20084	Mask: MGN->getMask(), Src0: DAG.getUNDEF(VT),
20085	MemVT: MGN->getMemoryVT(), MMO: MGN->getMemOperand(),
20086	AM: ISD::UNINDEXED, ISD::NON_EXTLOAD);
20087	SDValue Shuffle =
20088	DAG.getVectorShuffle(VT, dl: DL, N1: Load, N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
20089	return DAG.getMergeValues(Ops: {Shuffle, Load.getValue(R: `1`)}, dl: DL);
20090	}
20091
20092	if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20093	matchIndexAsWiderOp(VT, Index, Mask: MGN->getMask(),
20094	BaseAlign: MGN->getMemOperand()->getBaseAlign(), ST: Subtarget)) {
20095	SmallVector<SDValue> NewIndices;
20096	for (unsigned i = `0`; i < Index ->getNumOperands(); i += `2`)
20097	NewIndices.push_back(Elt: Index.getOperand(i));
20098	EVT IndexVT = Index.getValueType()
20099	.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
20100	Index = DAG.getBuildVector(VT: IndexVT, DL, Ops: NewIndices);
20101
20102	unsigned ElementSize = VT.getScalarStoreSize();
20103	EVT WideScalarVT = MVT::getIntegerVT(BitWidth: ElementSize * `8` * `2`);
20104	auto EltCnt = VT.getVectorElementCount();
20105	assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20106	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideScalarVT,
20107	EC: EltCnt.divideCoefficientBy(RHS: `2`));
20108	SDValue Passthru = DAG.getBitcast(VT: WideVT, V: MGN->getPassThru());
20109	EVT MaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
20110	EC: EltCnt.divideCoefficientBy(RHS: `2`));
20111	SDValue Mask = DAG.getSplat(VT: MaskVT, DL, Op: DAG.getConstant(Val: `1`, DL, VT: MVT::i1));
20112
20113	SDValue Gather =
20114	DAG.getMaskedGather(VTs: DAG.getVTList(VT1: WideVT, VT2: MVT::Other), MemVT: WideVT, dl: DL,
20115	Ops: {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20116	Index, ScaleOp},
20117	MMO: MGN->getMemOperand(), IndexType, ExtTy: ISD::NON_EXTLOAD);
20118	SDValue Result = DAG.getBitcast(VT, V: Gather.getValue(R: `0`));
20119	return DAG.getMergeValues(Ops: {Result, Gather.getValue(R: `1`)}, dl: DL);
20120	}
20121	break;
20122	}
20123	case ISD::MSCATTER:{
20124	const auto *MSN = cast<MaskedScatterSDNode>(Val: N);
20125	SDValue Index = MSN->getIndex();
20126	SDValue ScaleOp = MSN->getScale();
20127	ISD::MemIndexType IndexType = MSN->getIndexType();
20128	assert(!MSN->isIndexScaled() &&
20129	"Scaled gather/scatter should not be formed");
20130
20131	SDLoc DL(N);
20132	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20133	return DAG.getMaskedScatter(
20134	VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL,
20135	Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20136	Index, ScaleOp},
20137	MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore());
20138
20139	if (narrowIndex(N&: Index, IndexType, DAG))
20140	return DAG.getMaskedScatter(
20141	VTs: N->getVTList(), MemVT: MSN->getMemoryVT(), dl: DL,
20142	Ops: {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20143	Index, ScaleOp},
20144	MMO: MSN->getMemOperand(), IndexType, IsTruncating: MSN->isTruncatingStore());
20145
20146	EVT VT = MSN->getValue()->getValueType(ResNo: `0`);
20147	SmallVector<int> ShuffleMask;
20148	if (!MSN->isTruncatingStore() &&
20149	matchIndexAsShuffle(VT, Index, Mask: MSN->getMask(), ShuffleMask)) {
20150	SDValue Shuffle = DAG.getVectorShuffle(VT, dl: DL, N1: MSN->getValue(),
20151	N2: DAG.getUNDEF(VT), Mask: ShuffleMask);
20152	return DAG.getMaskedStore(Chain: MSN->getChain(), dl: DL, Val: Shuffle, Base: MSN->getBasePtr(),
20153	Offset: DAG.getUNDEF(VT: XLenVT), Mask: MSN->getMask(),
20154	MemVT: MSN->getMemoryVT(), MMO: MSN->getMemOperand(),
20155	AM: ISD::UNINDEXED, IsTruncating: false);
20156	}
20157	break;
20158	}
20159	case ISD::VP_GATHER: {
20160	const auto *VPGN = cast<VPGatherSDNode>(Val: N);
20161	SDValue Index = VPGN->getIndex();
20162	SDValue ScaleOp = VPGN->getScale();
20163	ISD::MemIndexType IndexType = VPGN->getIndexType();
20164	assert(!VPGN->isIndexScaled() &&
20165	"Scaled gather/scatter should not be formed");
20166
20167	SDLoc DL(N);
20168	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20169	return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL,
20170	Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index,
20171	ScaleOp, VPGN->getMask(),
20172	VPGN->getVectorLength()},
20173	MMO: VPGN->getMemOperand(), IndexType);
20174
20175	if (narrowIndex(N&: Index, IndexType, DAG))
20176	return DAG.getGatherVP(VTs: N->getVTList(), VT: VPGN->getMemoryVT(), dl: DL,
20177	Ops: {VPGN->getChain(), VPGN->getBasePtr(), Index,
20178	ScaleOp, VPGN->getMask(),
20179	VPGN->getVectorLength()},
20180	MMO: VPGN->getMemOperand(), IndexType);
20181
20182	break;
20183	}
20184	case ISD::VP_SCATTER: {
20185	const auto *VPSN = cast<VPScatterSDNode>(Val: N);
20186	SDValue Index = VPSN->getIndex();
20187	SDValue ScaleOp = VPSN->getScale();
20188	ISD::MemIndexType IndexType = VPSN->getIndexType();
20189	assert(!VPSN->isIndexScaled() &&
20190	"Scaled gather/scatter should not be formed");
20191
20192	SDLoc DL(N);
20193	if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20194	return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL,
20195	Ops: {VPSN->getChain(), VPSN->getValue(),
20196	VPSN->getBasePtr(), Index, ScaleOp,
20197	VPSN->getMask(), VPSN->getVectorLength()},
20198	MMO: VPSN->getMemOperand(), IndexType);
20199
20200	if (narrowIndex(N&: Index, IndexType, DAG))
20201	return DAG.getScatterVP(VTs: N->getVTList(), VT: VPSN->getMemoryVT(), dl: DL,
20202	Ops: {VPSN->getChain(), VPSN->getValue(),
20203	VPSN->getBasePtr(), Index, ScaleOp,
20204	VPSN->getMask(), VPSN->getVectorLength()},
20205	MMO: VPSN->getMemOperand(), IndexType);
20206	break;
20207	}
20208	case RISCVISD::SHL_VL:
20209	if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20210	return V;
20211	[[fallthrough]];
20212	case RISCVISD::SRA_VL:
20213	case RISCVISD::SRL_VL: {
20214	SDValue ShAmt = N->getOperand(Num: `1`);
20215	if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20216	// We don't need the upper 32 bits of a 64-bit element for a shift amount.
20217	SDLoc DL(N);
20218	SDValue VL = N->getOperand(Num: `4`);
20219	EVT VT = N->getValueType(ResNo: `0`);
20220	ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
20221	N2: ShAmt.getOperand(i: `1`), N3: VL);
20222	return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: `0`), N2: ShAmt,
20223	N3: N->getOperand(Num: `2`), N4: N->getOperand(Num: `3`), N5: N->getOperand(Num: `4`));
20224	}
20225	break;
20226	}
20227	case ISD::SRA:
20228	if (SDValue V = performSRACombine(N, DAG, Subtarget))
20229	return V;
20230	[[fallthrough]];
20231	case ISD::SRL:
20232	case ISD::SHL: {
20233	if (N->getOpcode() == ISD::SHL) {
20234	if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20235	return V;
20236	}
20237	SDValue ShAmt = N->getOperand(Num: `1`);
20238	if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20239	// We don't need the upper 32 bits of a 64-bit element for a shift amount.
20240	SDLoc DL(N);
20241	EVT VT = N->getValueType(ResNo: `0`);
20242	ShAmt = DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: DAG.getUNDEF(VT),
20243	N2: ShAmt.getOperand(i: `1`),
20244	N3: DAG.getRegister(Reg: RISCV::X0, VT: Subtarget.getXLenVT()));
20245	return DAG.getNode(Opcode: N->getOpcode(), DL, VT, N1: N->getOperand(Num: `0`), N2: ShAmt);
20246	}
20247	break;
20248	}
20249	case RISCVISD::ADD_VL:
20250	if (SDValue V = simplifyOp_VL(N))
20251	return V;
20252	if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20253	return V;
20254	if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20255	return V;
20256	return combineToVWMACC(N, DAG, Subtarget);
20257	case RISCVISD::VWADD_W_VL:
20258	case RISCVISD::VWADDU_W_VL:
20259	case RISCVISD::VWSUB_W_VL:
20260	case RISCVISD::VWSUBU_W_VL:
20261	return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20262	case RISCVISD::OR_VL:
20263	case RISCVISD::SUB_VL:
20264	case RISCVISD::MUL_VL:
20265	return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20266	case RISCVISD::VFMADD_VL:
20267	case RISCVISD::VFNMADD_VL:
20268	case RISCVISD::VFMSUB_VL:
20269	case RISCVISD::VFNMSUB_VL:
20270	case RISCVISD::STRICT_VFMADD_VL:
20271	case RISCVISD::STRICT_VFNMADD_VL:
20272	case RISCVISD::STRICT_VFMSUB_VL:
20273	case RISCVISD::STRICT_VFNMSUB_VL:
20274	return performVFMADD_VLCombine(N, DCI, Subtarget);
20275	case RISCVISD::FADD_VL:
20276	case RISCVISD::FSUB_VL:
20277	case RISCVISD::FMUL_VL:
20278	case RISCVISD::VFWADD_W_VL:
20279	case RISCVISD::VFWSUB_W_VL:
20280	return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20281	case ISD::LOAD:
20282	case ISD::STORE: {
20283	if (DCI.isAfterLegalizeDAG())
20284	if (SDValue V = performMemPairCombine(N, DCI))
20285	return V;
20286
20287	if (N->getOpcode() != ISD::STORE)
20288	break;
20289
20290	auto *Store = cast<StoreSDNode>(Val: N);
20291	SDValue Chain = Store->getChain();
20292	EVT MemVT = Store->getMemoryVT();
20293	SDValue Val = Store->getValue();
20294	SDLoc DL(N);
20295
20296	bool IsScalarizable =
20297	MemVT.isFixedLengthVector() && ISD::isNormalStore(N: Store) &&
20298	Store->isSimple() &&
20299	MemVT.getVectorElementType().bitsLE(VT: Subtarget.getXLenVT()) &&
20300	isPowerOf2_64(Value: MemVT.getSizeInBits()) &&
20301	MemVT.getSizeInBits() <= Subtarget.getXLen();
20302
20303	// If sufficiently aligned we can scalarize stores of constant vectors of
20304	// any power-of-two size up to XLen bits, provided that they aren't too
20305	// expensive to materialize.
20306	// vsetivli zero, 2, e8, m1, ta, ma
20307	// vmv.v.i v8, 4
20308	// vse64.v v8, (a0)
20309	// ->
20310	// li a1, 1028
20311	// sh a1, 0(a0)
20312	if (DCI.isBeforeLegalize() && IsScalarizable &&
20313	ISD::isBuildVectorOfConstantSDNodes(N: Val.getNode())) {
20314	// Get the constant vector bits
20315	APInt NewC(Val.getValueSizeInBits(), `0`);
20316	uint64_t EltSize = Val.getScalarValueSizeInBits();
20317	for (unsigned i = `0`; i < Val.getNumOperands(); i++) {
20318	if (Val.getOperand(i).isUndef())
20319	continue;
20320	NewC.insertBits(SubBits: Val.getConstantOperandAPInt(i).trunc(width: EltSize),
20321	bitPosition: i * EltSize);
20322	}
20323	MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits());
20324
20325	if (RISCVMatInt::getIntMatCost(Val: NewC, Size: Subtarget.getXLen(), STI: Subtarget,
20326	CompressionCost: true) <= `2` &&
20327	allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
20328	VT: NewVT, MMO: *Store->getMemOperand())) {
20329	SDValue NewV = DAG.getConstant(Val: NewC, DL, VT: NewVT);
20330	return DAG.getStore(Chain, dl: DL, Val: NewV, Ptr: Store->getBasePtr(),
20331	PtrInfo: Store->getPointerInfo(), Alignment: Store->getBaseAlign(),
20332	MMOFlags: Store->getMemOperand()->getFlags());
20333	}
20334	}
20335
20336	// Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20337	// vsetivli zero, 2, e16, m1, ta, ma
20338	// vle16.v v8, (a0)
20339	// vse16.v v8, (a1)
20340	if (auto *L = dyn_cast<LoadSDNode>(Val);
20341	L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20342	L->hasNUsesOfValue(NUses: `1`, Value: `0`) && L->hasNUsesOfValue(NUses: `1`, Value: `1`) &&
20343	Store->getChain() == SDValue (L, `1`) && ISD::isNormalLoad(N: L) &&
20344	L->getMemoryVT() == MemVT) {
20345	MVT NewVT = MVT::getIntegerVT(BitWidth: MemVT.getSizeInBits());
20346	if (allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
20347	VT: NewVT, MMO: *Store->getMemOperand()) &&
20348	allowsMemoryAccessForAlignment(Context&: *DAG.getContext(), DL: DAG.getDataLayout(),
20349	VT: NewVT, MMO: *L->getMemOperand())) {
20350	SDValue NewL = DAG.getLoad(VT: NewVT, dl: DL, Chain: L->getChain(), Ptr: L->getBasePtr(),
20351	PtrInfo: L->getPointerInfo(), Alignment: L->getBaseAlign(),
20352	MMOFlags: L->getMemOperand()->getFlags());
20353	return DAG.getStore(Chain, dl: DL, Val: NewL, Ptr: Store->getBasePtr(),
20354	PtrInfo: Store->getPointerInfo(), Alignment: Store->getBaseAlign(),
20355	MMOFlags: Store->getMemOperand()->getFlags());
20356	}
20357	}
20358
20359	// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20360	// vfmv.f.s is represented as extract element from 0. Match it late to avoid
20361	// any illegal types.
20362	if (Val.getOpcode() == RISCVISD::VMV_X_S \|\|
20363	(DCI.isAfterLegalizeDAG() &&
20364	Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20365	isNullConstant(V: Val.getOperand(i: `1`)))) {
20366	SDValue Src = Val.getOperand(i: `0`);
20367	MVT VecVT = Src.getSimpleValueType();
20368	// VecVT should be scalable and memory VT should match the element type.
20369	if (!Store->isIndexed() && VecVT.isScalableVector() &&
20370	MemVT == VecVT.getVectorElementType()) {
20371	SDLoc DL(N);
20372	MVT MaskVT = getMaskTypeFor(VecVT);
20373	return DAG.getStoreVP(
20374	Chain: Store->getChain(), dl: DL, Val: Src, Ptr: Store->getBasePtr(), Offset: Store->getOffset(),
20375	Mask: DAG.getConstant(Val: `1`, DL, VT: MaskVT),
20376	EVL: DAG.getConstant(Val: `1`, DL, VT: Subtarget.getXLenVT()), MemVT,
20377	MMO: Store->getMemOperand(), AM: Store->getAddressingMode(),
20378	IsTruncating: Store->isTruncatingStore(), /IsCompress/ IsCompressing: false);
20379	}
20380	}
20381
20382	break;
20383	}
20384	case ISD::SPLAT_VECTOR: {
20385	EVT VT = N->getValueType(ResNo: `0`);
20386	// Only perform this combine on legal MVT types.
20387	if (!isTypeLegal(VT))
20388	break;
20389	if (auto Gather = matchSplatAsGather(SplatVal: N->getOperand(Num: `0`), VT: VT.getSimpleVT(), DL: N,
20390	DAG, Subtarget))
20391	return Gather;
20392	break;
20393	}
20394	case ISD::BUILD_VECTOR:
20395	if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, TLI: *this))
20396	return V;
20397	break;
20398	case ISD::CONCAT_VECTORS:
20399	if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, TLI: *this))
20400	return V;
20401	break;
20402	case ISD::VECTOR_SHUFFLE:
20403	if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, TLI: *this))
20404	return V;
20405	break;
20406	case ISD::INSERT_VECTOR_ELT:
20407	if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, TLI: *this))
20408	return V;
20409	break;
20410	case RISCVISD::VFMV_V_F_VL: {
20411	const MVT VT = N->getSimpleValueType(ResNo: `0`);
20412	SDValue Passthru = N->getOperand(Num: `0`);
20413	SDValue Scalar = N->getOperand(Num: `1`);
20414	SDValue VL = N->getOperand(Num: `2`);
20415
20416	// If VL is 1, we can use vfmv.s.f.
20417	if (isOneConstant(V: VL))
20418	return DAG.getNode(Opcode: RISCVISD::VFMV_S_F_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
20419	break;
20420	}
20421	case RISCVISD::VMV_V_X_VL: {
20422	const MVT VT = N->getSimpleValueType(ResNo: `0`);
20423	SDValue Passthru = N->getOperand(Num: `0`);
20424	SDValue Scalar = N->getOperand(Num: `1`);
20425	SDValue VL = N->getOperand(Num: `2`);
20426
20427	// Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20428	// scalar input.
20429	unsigned ScalarSize = Scalar.getValueSizeInBits();
20430	unsigned EltWidth = VT.getScalarSizeInBits();
20431	if (ScalarSize > EltWidth && Passthru.isUndef())
20432	if (SimplifyDemandedLowBitsHelper (`1`, EltWidth))
20433	return SDValue (N, `0`);
20434
20435	// If VL is 1 and the scalar value won't benefit from immediate, we can
20436	// use vmv.s.x.
20437	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar);
20438	if (isOneConstant(V: VL) &&
20439	(!Const \|\| Const->isZero() \|\|
20440	!Const->getAPIntValue().sextOrTrunc(width: EltWidth).isSignedIntN(N: `5`)))
20441	return DAG.getNode(Opcode: RISCVISD::VMV_S_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
20442
20443	break;
20444	}
20445	case RISCVISD::VFMV_S_F_VL: {
20446	SDValue Src = N->getOperand(Num: `1`);
20447	// Try to remove vector->scalar->vector if the scalar->vector is inserting
20448	// into an undef vector.
20449	// TODO: Could use a vslide or vmv.v.v for non-undef.
20450	if (N->getOperand(Num: `0`).isUndef() &&
20451	Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20452	isNullConstant(V: Src.getOperand(i: `1`)) &&
20453	Src.getOperand(i: `0`).getValueType().isScalableVector()) {
20454	EVT VT = N->getValueType(ResNo: `0`);
20455	EVT SrcVT = Src.getOperand(i: `0`).getValueType();
20456	assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
20457	// Widths match, just return the original vector.
20458	if (SrcVT == VT)
20459	return Src.getOperand(i: `0`);
20460	// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
20461	}
20462	[[fallthrough]];
20463	}
20464	case RISCVISD::VMV_S_X_VL: {
20465	const MVT VT = N->getSimpleValueType(ResNo: `0`);
20466	SDValue Passthru = N->getOperand(Num: `0`);
20467	SDValue Scalar = N->getOperand(Num: `1`);
20468	SDValue VL = N->getOperand(Num: `2`);
20469
20470	// The vmv.s.x instruction copies the scalar integer register to element 0
20471	// of the destination vector register. If SEW < XLEN, the least-significant
20472	// bits are copied and the upper XLEN-SEW bits are ignored.
20473	unsigned ScalarSize = Scalar.getValueSizeInBits();
20474	unsigned EltWidth = VT.getScalarSizeInBits();
20475	if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper (`1`, EltWidth))
20476	return SDValue (N, `0`);
20477
20478	if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
20479	Scalar.getOperand(i: `0`).getValueType() == N->getValueType(ResNo: `0`))
20480	return Scalar.getOperand(i: `0`);
20481
20482	// Use M1 or smaller to avoid over constraining register allocation
20483	const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
20484	if (M1VT.bitsLT(VT)) {
20485	SDValue M1Passthru = DAG.getExtractSubvector(DL, VT: M1VT, Vec: Passthru, Idx: `0`);
20486	SDValue Result =
20487	DAG.getNode(Opcode: N->getOpcode(), DL, VT: M1VT, N1: M1Passthru, N2: Scalar, N3: VL);
20488	Result = DAG.getInsertSubvector(DL, Vec: Passthru, SubVec: Result, Idx: `0`);
20489	return Result;
20490	}
20491
20492	// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
20493	// higher would involve overly constraining the register allocator for
20494	// no purpose.
20495	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val&: Scalar);
20496	Const && !Const->isZero() && isInt<`5`>(x: Const->getSExtValue()) &&
20497	VT.bitsLE(VT: RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
20498	return DAG.getNode(Opcode: RISCVISD::VMV_V_X_VL, DL, VT, N1: Passthru, N2: Scalar, N3: VL);
20499
20500	break;
20501	}
20502	case RISCVISD::VMV_X_S: {
20503	SDValue Vec = N->getOperand(Num: `0`);
20504	MVT VecVT = N->getOperand(Num: `0`).getSimpleValueType();
20505	const MVT M1VT = RISCVTargetLowering::getM1VT(VT: VecVT);
20506	if (M1VT.bitsLT(VT: VecVT)) {
20507	Vec = DAG.getExtractSubvector(DL, VT: M1VT, Vec, Idx: `0`);
20508	return DAG.getNode(Opcode: RISCVISD::VMV_X_S, DL, VT: N->getSimpleValueType(ResNo: `0`), Operand: Vec);
20509	}
20510	break;
20511	}
20512	case ISD::INTRINSIC_VOID:
20513	case ISD::INTRINSIC_W_CHAIN:
20514	case ISD::INTRINSIC_WO_CHAIN: {
20515	unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? `0` : `1`;
20516	unsigned IntNo = N->getConstantOperandVal(Num: IntOpNo);
20517	switch (IntNo) {
20518	// By default we do not combine any intrinsic.
20519	default:
20520	return SDValue ();
20521	case Intrinsic::riscv_vcpop:
20522	case Intrinsic::riscv_vcpop_mask:
20523	case Intrinsic::riscv_vfirst:
20524	case Intrinsic::riscv_vfirst_mask: {
20525	SDValue VL = N->getOperand(Num: `2`);
20526	if (IntNo == Intrinsic::riscv_vcpop_mask \|\|
20527	IntNo == Intrinsic::riscv_vfirst_mask)
20528	VL = N->getOperand(Num: `3`);
20529	if (!isNullConstant(V: VL))
20530	return SDValue ();
20531	// If VL is 0, vcpop -> li 0, vfirst -> li -1.
20532	SDLoc DL(N);
20533	EVT VT = N->getValueType(ResNo: `0`);
20534	if (IntNo == Intrinsic::riscv_vfirst \|\|
20535	IntNo == Intrinsic::riscv_vfirst_mask)
20536	return DAG.getAllOnesConstant(DL, VT);
20537	return DAG.getConstant(Val: `0`, DL, VT);
20538	}
20539	}
20540	}
20541	case ISD::EXPERIMENTAL_VP_REVERSE:
20542	return performVP_REVERSECombine(N, DAG, Subtarget);
20543	case ISD::VP_STORE:
20544	return performVP_STORECombine(N, DAG, Subtarget);
20545	case ISD::BITCAST: {
20546	assert(Subtarget.useRVVForFixedLengthVectors());
20547	SDValue N0 = N->getOperand(Num: `0`);
20548	EVT VT = N->getValueType(ResNo: `0`);
20549	EVT SrcVT = N0.getValueType();
20550	if (VT.isRISCVVectorTuple() && N0 ->getOpcode() == ISD::SPLAT_VECTOR) {
20551	unsigned NF = VT.getRISCVVectorTupleNumFields();
20552	unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * `8`);
20553	SDValue EltVal = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getXLenVT());
20554	MVT ScalTy = MVT::getScalableVectorVT(VT: MVT::getIntegerVT(BitWidth: `8`), NumElements: NumScalElts);
20555
20556	SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT: ScalTy, Operand: EltVal);
20557
20558	SDValue Result = DAG.getUNDEF(VT);
20559	for (unsigned i = `0`; i < NF; ++i)
20560	Result = DAG.getNode(Opcode: RISCVISD::TUPLE_INSERT, DL, VT, N1: Result, N2: Splat,
20561	N3: DAG.getVectorIdxConstant(Val: i, DL));
20562	return Result;
20563	}
20564	// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
20565	// type, widen both sides to avoid a trip through memory.
20566	if ((SrcVT == MVT::v1i1 \|\| SrcVT == MVT::v2i1 \|\| SrcVT == MVT::v4i1) &&
20567	VT.isScalarInteger()) {
20568	unsigned NumConcats = `8` / SrcVT.getVectorNumElements();
20569	SmallVector<SDValue, `4`> Ops(NumConcats, DAG.getUNDEF(VT: SrcVT));
20570	Ops [`0`] = N0;
20571	SDLoc DL(N);
20572	N0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v8i1, Ops);
20573	N0 = DAG.getBitcast(VT: MVT::i8, V: N0);
20574	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: N0);
20575	}
20576
20577	return SDValue ();
20578	}
20579	case ISD::VECREDUCE_ADD:
20580	if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, TLI: *this))
20581	return V;
20582	[[fallthrough]];
20583	case ISD::CTPOP:
20584	if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
20585	return V;
20586	break;
20587	case RISCVISD::VRGATHER_VX_VL: {
20588	// Note this assumes that out of bounds indices produce poison
20589	// and can thus be replaced without having to prove them inbounds..
20590	EVT VT = N->getValueType(ResNo: `0`);
20591	SDValue Src = N->getOperand(Num: `0`);
20592	SDValue Idx = N->getOperand(Num: `1`);
20593	SDValue Passthru = N->getOperand(Num: `2`);
20594	SDValue VL = N->getOperand(Num: `4`);
20595
20596	// Warning: Unlike most cases we strip an insert_subvector, this one
20597	// does not require the first operand to be undef.
20598	if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
20599	isNullConstant(V: Src.getOperand(i: `2`)))
20600	Src = Src.getOperand(i: `1`);
20601
20602	switch (Src.getOpcode()) {
20603	default:
20604	break;
20605	case RISCVISD::VMV_V_X_VL:
20606	case RISCVISD::VFMV_V_F_VL:
20607	// Drop a redundant vrgather_vx.
20608	// TODO: Remove the type restriction if we find a motivating
20609	// test case?
20610	if (Passthru.isUndef() && VL == Src.getOperand(i: `2`) &&
20611	Src.getValueType() == VT)
20612	return Src;
20613	break;
20614	case RISCVISD::VMV_S_X_VL:
20615	case RISCVISD::VFMV_S_F_VL:
20616	// If this use only demands lane zero from the source vmv.s.x, and
20617	// doesn't have a passthru, then this vrgather.vi/vx is equivalent to
20618	// a vmv.v.x. Note that there can be other uses of the original
20619	// vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
20620	if (isNullConstant(V: Idx) && Passthru.isUndef() &&
20621	VL == Src.getOperand(i: `2`)) {
20622	unsigned Opc =
20623	VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
20624	return DAG.getNode(Opcode: Opc, DL, VT, N1: DAG.getUNDEF(VT), N2: Src.getOperand(i: `1`),
20625	N3: VL);
20626	}
20627	break;
20628	}
20629	break;
20630	}
20631	}
20632
20633	return SDValue ();
20634	}
20635
20636	bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
20637	EVT XVT, unsigned KeptBits) const {
20638	// For vectors, we don't have a preference..
20639	if (XVT.isVector())
20640	return false;
20641
20642	if (XVT != MVT::i32 && XVT != MVT::i64)
20643	return false;
20644
20645	// We can use sext.w for RV64 or an srai 31 on RV32.
20646	if (KeptBits == `32` \|\| KeptBits == `64`)
20647	return true;
20648
20649	// With Zbb we can use sext.h/sext.b.
20650	return Subtarget.hasStdExtZbb() &&
20651	((KeptBits == `8` && XVT == MVT::i64 && !Subtarget.is64Bit()) \|\|
20652	KeptBits == `16`);
20653	}
20654
20655	bool RISCVTargetLowering::isDesirableToCommuteWithShift(
20656	const SDNode N, CombineLevel Level) const* {
20657	assert((N->getOpcode() == ISD::SHL \|\| N->getOpcode() == ISD::SRA \|\|
20658	N->getOpcode() == ISD::SRL) &&
20659	"Expected shift op");
20660
20661	// The following folds are only desirable if `(OP _, c1 << c2)` can be
20662	// materialised in fewer instructions than `(OP _, c1)`:
20663	//
20664	// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
20665	// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
20666	SDValue N0 = N->getOperand(Num: `0`);
20667	EVT Ty = N0.getValueType();
20668
20669	// LD/ST will optimize constant Offset extraction, so when AddNode is used by
20670	// LD/ST, it can still complete the folding optimization operation performed
20671	// above.
20672	auto isUsedByLdSt = [](const SDNode X, const* SDNode *User) {
20673	for (SDNode *Use : X->users()) {
20674	// This use is the one we're on right now. Skip it
20675	if (Use == User \|\| Use->getOpcode() == ISD::SELECT)
20676	continue;
20677	if (!isa<StoreSDNode>(Val: Use) && !isa<LoadSDNode>(Val: Use))
20678	return false;
20679	}
20680	return true;
20681	};
20682
20683	if (Ty.isScalarInteger() &&
20684	(N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::OR)) {
20685	if (N0.getOpcode() == ISD::ADD && !N0 ->hasOneUse())
20686	return isUsedByLdSt (N0.getNode(), N);
20687
20688	auto *C1 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`));
20689	auto *C2 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
20690
20691	// Bail if we might break a sh{1,2,3}add pattern.
20692	if ((Subtarget.hasStdExtZba() \|\| Subtarget.hasVendorXAndesPerf()) && C2 &&
20693	C2->getZExtValue() >= `1` && C2->getZExtValue() <= `3` && N->hasOneUse() &&
20694	N->user_begin()->getOpcode() == ISD::ADD &&
20695	!isUsedByLdSt (N->user_begin(), nullptr*) &&
20696	!isa<ConstantSDNode>(Val: N->user_begin()->getOperand(Num: `1`)))
20697	return false;
20698
20699	if (C1 && C2) {
20700	const APInt &C1Int = C1->getAPIntValue();
20701	APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
20702
20703	// We can materialise `c1 << c2` into an add immediate, so it's "free",
20704	// and the combine should happen, to potentially allow further combines
20705	// later.
20706	if (ShiftedC1Int.getSignificantBits() <= `64` &&
20707	isLegalAddImmediate(Imm: ShiftedC1Int.getSExtValue()))
20708	return true;
20709
20710	// We can materialise `c1` in an add immediate, so it's "free", and the
20711	// combine should be prevented.
20712	if (C1Int.getSignificantBits() <= `64` &&
20713	isLegalAddImmediate(Imm: C1Int.getSExtValue()))
20714	return false;
20715
20716	// Neither constant will fit into an immediate, so find materialisation
20717	// costs.
20718	int C1Cost =
20719	RISCVMatInt::getIntMatCost(Val: C1Int, Size: Ty.getSizeInBits(), STI: Subtarget,
20720	/CompressionCost/ true);
20721	int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
20722	Val: ShiftedC1Int, Size: Ty.getSizeInBits(), STI: Subtarget,
20723	/CompressionCost/ true);
20724
20725	// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
20726	// combine should be prevented.
20727	if (C1Cost < ShiftedC1Cost)
20728	return false;
20729	}
20730	}
20731
20732	if (!N0 ->hasOneUse())
20733	return false;
20734
20735	if (N0 ->getOpcode() == ISD::SIGN_EXTEND &&
20736	N0 ->getOperand(Num: `0`)->getOpcode() == ISD::ADD &&
20737	!N0 ->getOperand(Num: `0`)->hasOneUse())
20738	return isUsedByLdSt (N0 ->getOperand(Num: `0`).getNode(), N0.getNode());
20739
20740	return true;
20741	}
20742
20743	bool RISCVTargetLowering::targetShrinkDemandedConstant(
20744	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20745	TargetLoweringOpt &TLO) const {
20746	// Delay this optimization as late as possible.
20747	if (!TLO.LegalOps)
20748	return false;
20749
20750	EVT VT = Op.getValueType();
20751	if (VT.isVector())
20752	return false;
20753
20754	unsigned Opcode = Op.getOpcode();
20755	if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
20756	return false;
20757
20758	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
20759	if (!C)
20760	return false;
20761
20762	const APInt &Mask = C->getAPIntValue();
20763
20764	// Clear all non-demanded bits initially.
20765	APInt ShrunkMask = Mask & DemandedBits;
20766
20767	// Try to make a smaller immediate by setting undemanded bits.
20768
20769	APInt ExpandedMask = Mask \| ~DemandedBits;
20770
20771	auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
20772	return ShrunkMask.isSubsetOf(RHS: Mask) && Mask.isSubsetOf(RHS: ExpandedMask);
20773	};
20774	auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
20775	if (NewMask == Mask)
20776	return true;
20777	SDLoc DL(Op);
20778	SDValue NewC = TLO.DAG.getConstant(Val: NewMask, DL, VT: Op.getValueType());
20779	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL, VT: Op.getValueType(),
20780	N1: Op.getOperand(i: `0`), N2: NewC);
20781	return TLO.CombineTo(O: Op, N: NewOp);
20782	};
20783
20784	// If the shrunk mask fits in sign extended 12 bits, let the target
20785	// independent code apply it.
20786	if (ShrunkMask.isSignedIntN(N: `12`))
20787	return false;
20788
20789	// And has a few special cases for zext.
20790	if (Opcode == ISD::AND) {
20791	// Preserve (and X, 0xffff), if zext.h exists use zext.h,
20792	// otherwise use SLLI + SRLI.
20793	APInt NewMask = APInt (Mask.getBitWidth(), `0xffff`);
20794	if (IsLegalMask (NewMask))
20795	return UseMask (NewMask);
20796
20797	// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
20798	if (VT == MVT::i64) {
20799	APInt NewMask = APInt (`64`, `0xffffffff`);
20800	if (IsLegalMask (NewMask))
20801	return UseMask (NewMask);
20802	}
20803	}
20804
20805	// For the remaining optimizations, we need to be able to make a negative
20806	// number through a combination of mask and undemanded bits.
20807	if (!ExpandedMask.isNegative())
20808	return false;
20809
20810	// What is the fewest number of bits we need to represent the negative number.
20811	unsigned MinSignedBits = ExpandedMask.getSignificantBits();
20812
20813	// Try to make a 12 bit negative immediate. If that fails try to make a 32
20814	// bit negative immediate unless the shrunk immediate already fits in 32 bits.
20815	// If we can't create a simm12, we shouldn't change opaque constants.
20816	APInt NewMask = ShrunkMask;
20817	if (MinSignedBits <= `12`)
20818	NewMask.setBitsFrom(`11`);
20819	else if (!C->isOpaque() && MinSignedBits <= `32` && !ShrunkMask.isSignedIntN(N: `32`))
20820	NewMask.setBitsFrom(`31`);
20821	else
20822	return false;
20823
20824	// Check that our new mask is a subset of the demanded mask.
20825	assert(IsLegalMask(NewMask));
20826	return UseMask (NewMask);
20827	}
20828
20829	static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
20830	static const uint64_t GREVMasks[] = {
20831	`0x5555555555555555ULL`, `0x3333333333333333ULL`, `0x0F0F0F0F0F0F0F0FULL`,
20832	`0x00FF00FF00FF00FFULL`, `0x0000FFFF0000FFFFULL`, `0x00000000FFFFFFFFULL`};
20833
20834	for (unsigned Stage = `0`; Stage != `6`; ++Stage) {
20835	unsigned Shift = `1` << Stage;
20836	if (ShAmt & Shift) {
20837	uint64_t Mask = GREVMasks[Stage];
20838	uint64_t Res = ((x & Mask) << Shift) \| ((x >> Shift) & Mask);
20839	if (IsGORC)
20840	Res \|= x;
20841	x = Res;
20842	}
20843	}
20844
20845	return x;
20846	}
20847
20848	void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
20849	KnownBits &Known,
20850	const APInt &DemandedElts,
20851	const SelectionDAG &DAG,
20852	unsigned Depth) const {
20853	unsigned BitWidth = Known.getBitWidth();
20854	unsigned Opc = Op.getOpcode();
20855	assert((Opc >= ISD::BUILTIN_OP_END \|\|
20856	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
20857	Opc == ISD::INTRINSIC_W_CHAIN \|\|
20858	Opc == ISD::INTRINSIC_VOID) &&
20859	"Should use MaskedValueIsZero if you don't know whether Op"
20860	" is a target node!");
20861
20862	Known.resetAll();
20863	switch (Opc) {
20864	default: break;
20865	case RISCVISD::SELECT_CC: {
20866	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `4`), Depth: Depth + `1`);
20867	// If we don't know any bits, early out.
20868	if (Known.isUnknown())
20869	break;
20870	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `3`), Depth: Depth + `1`);
20871
20872	// Only known if known in both the LHS and RHS.
20873	Known = Known.intersectWith(RHS: Known2);
20874	break;
20875	}
20876	case RISCVISD::VCPOP_VL: {
20877	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `2`), Depth: Depth + `1`);
20878	Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
20879	break;
20880	}
20881	case RISCVISD::CZERO_EQZ:
20882	case RISCVISD::CZERO_NEZ:
20883	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
20884	// Result is either all zero or operand 0. We can propagate zeros, but not
20885	// ones.
20886	Known.One.clearAllBits();
20887	break;
20888	case RISCVISD::REMUW: {
20889	KnownBits Known2;
20890	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
20891	Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
20892	// We only care about the lower 32 bits.
20893	Known = KnownBits::urem(LHS: Known.trunc(BitWidth: `32`), RHS: Known2.trunc(BitWidth: `32`));
20894	// Restore the original width by sign extending.
20895	Known = Known.sext(BitWidth);
20896	break;
20897	}
20898	case RISCVISD::DIVUW: {
20899	KnownBits Known2;
20900	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
20901	Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
20902	// We only care about the lower 32 bits.
20903	Known = KnownBits::udiv(LHS: Known.trunc(BitWidth: `32`), RHS: Known2.trunc(BitWidth: `32`));
20904	// Restore the original width by sign extending.
20905	Known = Known.sext(BitWidth);
20906	break;
20907	}
20908	case RISCVISD::SLLW: {
20909	KnownBits Known2;
20910	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
20911	Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
20912	Known = KnownBits::shl(LHS: Known.trunc(BitWidth: `32`), RHS: Known2.trunc(BitWidth: `5`).zext(BitWidth: `32`));
20913	// Restore the original width by sign extending.
20914	Known = Known.sext(BitWidth);
20915	break;
20916	}
20917	case RISCVISD::CTZW: {
20918	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
20919	unsigned PossibleTZ = Known2.trunc(BitWidth: `32`).countMaxTrailingZeros();
20920	unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
20921	Known.Zero.setBitsFrom(LowBits);
20922	break;
20923	}
20924	case RISCVISD::CLZW: {
20925	KnownBits Known2 = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
20926	unsigned PossibleLZ = Known2.trunc(BitWidth: `32`).countMaxLeadingZeros();
20927	unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
20928	Known.Zero.setBitsFrom(LowBits);
20929	break;
20930	}
20931	case RISCVISD::BREV8:
20932	case RISCVISD::ORC_B: {
20933	// FIXME: This is based on the non-ratified Zbp GREV and GORC where a
20934	// control value of 7 is equivalent to brev8 and orc.b.
20935	Known = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), Depth: Depth + `1`);
20936	bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
20937	// To compute zeros for ORC_B, we need to invert the value and invert it
20938	// back after. This inverting is harmless for BREV8.
20939	Known.Zero =
20940	~computeGREVOrGORC(x: ~Known.Zero.getZExtValue(), ShAmt: `7`, IsGORC);
20941	Known.One = computeGREVOrGORC(x: Known.One.getZExtValue(), ShAmt: `7`, IsGORC);
20942	break;
20943	}
20944	case RISCVISD::READ_VLENB: {
20945	// We can use the minimum and maximum VLEN values to bound VLENB. We
20946	// know VLEN must be a power of two.
20947	const unsigned MinVLenB = Subtarget.getRealMinVLen() / `8`;
20948	const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / `8`;
20949	assert(MinVLenB > `0` && "READ_VLENB without vector extension enabled?");
20950	Known.Zero.setLowBits(Log2_32(Value: MinVLenB));
20951	Known.Zero.setBitsFrom(Log2_32(Value: MaxVLenB)+`1`);
20952	if (MaxVLenB == MinVLenB)
20953	Known.One.setBit(Log2_32(Value: MinVLenB));
20954	break;
20955	}
20956	case RISCVISD::FCLASS: {
20957	// fclass will only set one of the low 10 bits.
20958	Known.Zero.setBitsFrom(`10`);
20959	break;
20960	}
20961	case ISD::INTRINSIC_W_CHAIN:
20962	case ISD::INTRINSIC_WO_CHAIN: {
20963	unsigned IntNo =
20964	Op.getConstantOperandVal(i: Opc == ISD::INTRINSIC_WO_CHAIN ? `0` : `1`);
20965	switch (IntNo) {
20966	default:
20967	// We can't do anything for most intrinsics.
20968	break;
20969	case Intrinsic::riscv_vsetvli:
20970	case Intrinsic::riscv_vsetvlimax: {
20971	bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
20972	unsigned VSEW = Op.getConstantOperandVal(i: HasAVL + `1`);
20973	RISCVVType::VLMUL VLMUL =
20974	static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(i: HasAVL + `2`));
20975	unsigned SEW = RISCVVType::decodeVSEW(VSEW);
20976	auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul: VLMUL);
20977	uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
20978	MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
20979
20980	// Result of vsetvli must be not larger than AVL.
20981	if (HasAVL && isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
20982	MaxVL = std::min(a: MaxVL, b: Op.getConstantOperandVal(i: `1`));
20983
20984	unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + `1`;
20985	if (BitWidth > KnownZeroFirstBit)
20986	Known.Zero.setBitsFrom(KnownZeroFirstBit);
20987	break;
20988	}
20989	}
20990	break;
20991	}
20992	}
20993	}
20994
20995	unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
20996	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
20997	unsigned Depth) const {
20998	switch (Op.getOpcode()) {
20999	default:
21000	break;
21001	case RISCVISD::SELECT_CC: {
21002	unsigned Tmp =
21003	DAG.ComputeNumSignBits(Op: Op.getOperand(i: `3`), DemandedElts, Depth: Depth + `1`);
21004	if (Tmp == `1`) return `1`; // Early out.
21005	unsigned Tmp2 =
21006	DAG.ComputeNumSignBits(Op: Op.getOperand(i: `4`), DemandedElts, Depth: Depth + `1`);
21007	return std::min(a: Tmp, b: Tmp2);
21008	}
21009	case RISCVISD::CZERO_EQZ:
21010	case RISCVISD::CZERO_NEZ:
21011	// Output is either all zero or operand 0. We can propagate sign bit count
21012	// from operand 0.
21013	return DAG.ComputeNumSignBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
21014	case RISCVISD::ABSW: {
21015	// We expand this at isel to negw+max. The result will have 33 sign bits
21016	// if the input has at least 33 sign bits.
21017	unsigned Tmp =
21018	DAG.ComputeNumSignBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
21019	if (Tmp < `33`) return `1`;
21020	return `33`;
21021	}
21022	case RISCVISD::SLLW:
21023	case RISCVISD::SRAW:
21024	case RISCVISD::SRLW:
21025	case RISCVISD::DIVW:
21026	case RISCVISD::DIVUW:
21027	case RISCVISD::REMUW:
21028	case RISCVISD::ROLW:
21029	case RISCVISD::RORW:
21030	case RISCVISD::FCVT_W_RV64:
21031	case RISCVISD::FCVT_WU_RV64:
21032	case RISCVISD::STRICT_FCVT_W_RV64:
21033	case RISCVISD::STRICT_FCVT_WU_RV64:
21034	// TODO: As the result is sign-extended, this is conservatively correct. A
21035	// more precise answer could be calculated for SRAW depending on known
21036	// bits in the shift amount.
21037	return `33`;
21038	case RISCVISD::VMV_X_S: {
21039	// The number of sign bits of the scalar result is computed by obtaining the
21040	// element type of the input vector operand, subtracting its width from the
21041	// XLEN, and then adding one (sign bit within the element type). If the
21042	// element type is wider than XLen, the least-significant XLEN bits are
21043	// taken.
21044	unsigned XLen = Subtarget.getXLen();
21045	unsigned EltBits = Op.getOperand(i: `0`).getScalarValueSizeInBits();
21046	if (EltBits <= XLen)
21047	return XLen - EltBits + `1`;
21048	break;
21049	}
21050	case ISD::INTRINSIC_W_CHAIN: {
21051	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
21052	switch (IntNo) {
21053	default:
21054	break;
21055	case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
21056	case Intrinsic::riscv_masked_atomicrmw_add_i64:
21057	case Intrinsic::riscv_masked_atomicrmw_sub_i64:
21058	case Intrinsic::riscv_masked_atomicrmw_nand_i64:
21059	case Intrinsic::riscv_masked_atomicrmw_max_i64:
21060	case Intrinsic::riscv_masked_atomicrmw_min_i64:
21061	case Intrinsic::riscv_masked_atomicrmw_umax_i64:
21062	case Intrinsic::riscv_masked_atomicrmw_umin_i64:
21063	case Intrinsic::riscv_masked_cmpxchg_i64:
21064	// riscv_masked_{atomicrmw_,cmpxchg} intrinsics represent an emulated*
21065	// narrow atomic operation. These are implemented using atomic
21066	// operations at the minimum supported atomicrmw/cmpxchg width whose
21067	// result is then sign extended to XLEN. With +A, the minimum width is
21068	// 32 for both 64 and 32.
21069	assert(Subtarget.getXLen() == `64`);
21070	assert(getMinCmpXchgSizeInBits() == `32`);
21071	assert(Subtarget.hasStdExtA());
21072	return `33`;
21073	}
21074	break;
21075	}
21076	}
21077
21078	return `1`;
21079	}
21080
21081	bool RISCVTargetLowering::SimplifyDemandedBitsForTargetNode(
21082	SDValue Op, const APInt &OriginalDemandedBits,
21083	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21084	unsigned Depth) const {
21085	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21086
21087	switch (Op.getOpcode()) {
21088	case RISCVISD::BREV8:
21089	case RISCVISD::ORC_B: {
21090	KnownBits Known2;
21091	bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21092	// For BREV8, we need to do BREV8 on the demanded bits.
21093	// For ORC_B, any bit in the output demandeds all bits from the same byte.
21094	// So we need to do ORC_B on the demanded bits.
21095	APInt DemandedBits =
21096	APInt (BitWidth, computeGREVOrGORC(x: OriginalDemandedBits.getZExtValue(),
21097	ShAmt: `7`, IsGORC));
21098	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits,
21099	DemandedElts: OriginalDemandedElts, Known&: Known2, TLO, Depth: Depth + `1`))
21100	return true;
21101
21102	// To compute zeros for ORC_B, we need to invert the value and invert it
21103	// back after. This inverting is harmless for BREV8.
21104	Known.Zero = ~computeGREVOrGORC(x: ~Known2.Zero.getZExtValue(), ShAmt: `7`, IsGORC);
21105	Known.One = computeGREVOrGORC(x: Known2.One.getZExtValue(), ShAmt: `7`, IsGORC);
21106	return false;
21107	}
21108	}
21109
21110	return TargetLowering::SimplifyDemandedBitsForTargetNode(
21111	Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
21112	}
21113
21114	bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
21115	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21116	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21117
21118	// TODO: Add more target nodes.
21119	switch (Op.getOpcode()) {
21120	case RISCVISD::SELECT_CC:
21121	// Integer select_cc cannot create poison.
21122	// TODO: What are the FP poison semantics?
21123	// TODO: This instruction blocks poison from the unselected operand, can
21124	// we do anything with that?
21125	return !Op.getValueType().isInteger();
21126	}
21127	return TargetLowering::canCreateUndefOrPoisonForTargetNode(
21128	Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21129	}
21130
21131	const Constant *
21132	RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode Ld) const* {
21133	assert(Ld && "Unexpected null LoadSDNode");
21134	if (!ISD::isNormalLoad(N: Ld))
21135	return nullptr;
21136
21137	SDValue Ptr = Ld->getBasePtr();
21138
21139	// Only constant pools with no offset are supported.
21140	auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21141	auto *CNode = dyn_cast<ConstantPoolSDNode>(Val&: Ptr);
21142	if (!CNode \|\| CNode->isMachineConstantPoolEntry() \|\|
21143	CNode->getOffset() != `0`)
21144	return nullptr;
21145
21146	return CNode;
21147	};
21148
21149	// Simple case, LLA.
21150	if (Ptr.getOpcode() == RISCVISD::LLA) {
21151	auto *CNode = GetSupportedConstantPool (Ptr.getOperand(i: `0`));
21152	if (!CNode \|\| CNode->getTargetFlags() != `0`)
21153	return nullptr;
21154
21155	return CNode->getConstVal();
21156	}
21157
21158	// Look for a HI and ADD_LO pair.
21159	if (Ptr.getOpcode() != RISCVISD::ADD_LO \|\|
21160	Ptr.getOperand(i: `0`).getOpcode() != RISCVISD::HI)
21161	return nullptr;
21162
21163	auto *CNodeLo = GetSupportedConstantPool (Ptr.getOperand(i: `1`));
21164	auto *CNodeHi = GetSupportedConstantPool (Ptr.getOperand(i: `0`).getOperand(i: `0`));
21165
21166	if (!CNodeLo \|\| CNodeLo->getTargetFlags() != RISCVII::MO_LO \|\|
21167	!CNodeHi \|\| CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21168	return nullptr;
21169
21170	if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21171	return nullptr;
21172
21173	return CNodeLo->getConstVal();
21174	}
21175
21176	static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,
21177	MachineBasicBlock *BB) {
21178	assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21179
21180	// To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21181	// Should the count have wrapped while it was being read, we need to try
21182	// again.
21183	// For example:
21184	// ```
21185	// read:
21186	// csrrs x3, counterh # load high word of counter
21187	// csrrs x2, counter # load low word of counter
21188	// csrrs x4, counterh # load high word of counter
21189	// bne x3, x4, read # check if high word reads match, otherwise try again
21190	// ```
21191
21192	MachineFunction &MF = *BB->getParent();
21193	const BasicBlock *LLVMBB = BB->getBasicBlock();
21194	MachineFunction::iterator It = ++BB->getIterator();
21195
21196	MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVMBB);
21197	MF.insert(MBBI: It, MBB: LoopMBB);
21198
21199	MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(BB: LLVMBB);
21200	MF.insert(MBBI: It, MBB: DoneMBB);
21201
21202	// Transfer the remainder of BB and its successor edges to DoneMBB.
21203	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB,
21204	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
21205	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
21206
21207	BB->addSuccessor(Succ: LoopMBB);
21208
21209	MachineRegisterInfo &RegInfo = MF.getRegInfo();
21210	Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
21211	Register LoReg = MI.getOperand(i: `0`).getReg();
21212	Register HiReg = MI.getOperand(i: `1`).getReg();
21213	int64_t LoCounter = MI.getOperand(i: `2`).getImm();
21214	int64_t HiCounter = MI.getOperand(i: `3`).getImm();
21215	DebugLoc DL = MI.getDebugLoc();
21216
21217	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
21218	BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS), DestReg: HiReg)
21219	.addImm(Val: HiCounter)
21220	.addReg(RegNo: RISCV::X0);
21221	BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS), DestReg: LoReg)
21222	.addImm(Val: LoCounter)
21223	.addReg(RegNo: RISCV::X0);
21224	BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS), DestReg: ReadAgainReg)
21225	.addImm(Val: HiCounter)
21226	.addReg(RegNo: RISCV::X0);
21227
21228	BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
21229	.addReg(RegNo: HiReg)
21230	.addReg(RegNo: ReadAgainReg)
21231	.addMBB(MBB: LoopMBB);
21232
21233	LoopMBB->addSuccessor(Succ: LoopMBB);
21234	LoopMBB->addSuccessor(Succ: DoneMBB);
21235
21236	MI.eraseFromParent();
21237
21238	return DoneMBB;
21239	}
21240
21241	static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
21242	MachineBasicBlock *BB,
21243	const RISCVSubtarget &Subtarget) {
21244	assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21245
21246	MachineFunction &MF = *BB->getParent();
21247	DebugLoc DL = MI.getDebugLoc();
21248	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
21249	const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
21250	Register LoReg = MI.getOperand(i: `0`).getReg();
21251	Register HiReg = MI.getOperand(i: `1`).getReg();
21252	Register SrcReg = MI.getOperand(i: `2`).getReg();
21253
21254	const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21255	int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21256
21257	TII.storeRegToStackSlot(MBB&: *BB, MI, SrcReg, isKill: MI.getOperand(i: `2`).isKill(), FrameIndex: FI, RC: SrcRC,
21258	TRI: RI, VReg: Register ());
21259	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
21260	MachineMemOperand *MMOLo =
21261	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`8`));
21262	MachineMemOperand *MMOHi = MF.getMachineMemOperand(
21263	PtrInfo: MPI.getWithOffset(O: `4`), F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`8`));
21264	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::LW), DestReg: LoReg)
21265	.addFrameIndex(Idx: FI)
21266	.addImm(Val: `0`)
21267	.addMemOperand(MMO: MMOLo);
21268	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::LW), DestReg: HiReg)
21269	.addFrameIndex(Idx: FI)
21270	.addImm(Val: `4`)
21271	.addMemOperand(MMO: MMOHi);
21272	MI.eraseFromParent(); // The pseudo instruction is gone now.
21273	return BB;
21274	}
21275
21276	static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
21277	MachineBasicBlock *BB,
21278	const RISCVSubtarget &Subtarget) {
21279	assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21280	"Unexpected instruction");
21281
21282	MachineFunction &MF = *BB->getParent();
21283	DebugLoc DL = MI.getDebugLoc();
21284	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
21285	const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
21286	Register DstReg = MI.getOperand(i: `0`).getReg();
21287	Register LoReg = MI.getOperand(i: `1`).getReg();
21288	Register HiReg = MI.getOperand(i: `2`).getReg();
21289
21290	const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
21291	int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21292
21293	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
21294	MachineMemOperand *MMOLo =
21295	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Align (`8`));
21296	MachineMemOperand *MMOHi = MF.getMachineMemOperand(
21297	PtrInfo: MPI.getWithOffset(O: `4`), F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Align (`8`));
21298	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::SW))
21299	.addReg(RegNo: LoReg, flags: getKillRegState(B: MI.getOperand(i: `1`).isKill()))
21300	.addFrameIndex(Idx: FI)
21301	.addImm(Val: `0`)
21302	.addMemOperand(MMO: MMOLo);
21303	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::SW))
21304	.addReg(RegNo: HiReg, flags: getKillRegState(B: MI.getOperand(i: `2`).isKill()))
21305	.addFrameIndex(Idx: FI)
21306	.addImm(Val: `4`)
21307	.addMemOperand(MMO: MMOHi);
21308	TII.loadRegFromStackSlot(MBB&: *BB, MI, DestReg: DstReg, FrameIndex: FI, RC: DstRC, TRI: RI, VReg: Register ());
21309	MI.eraseFromParent(); // The pseudo instruction is gone now.
21310	return BB;
21311	}
21312
21313	static MachineBasicBlock emitQuietFCMP(MachineInstr &MI, MachineBasicBlock BB,
21314	unsigned RelOpcode, unsigned EqOpcode,
21315	const RISCVSubtarget &Subtarget) {
21316	DebugLoc DL = MI.getDebugLoc();
21317	Register DstReg = MI.getOperand(i: `0`).getReg();
21318	Register Src1Reg = MI.getOperand(i: `1`).getReg();
21319	Register Src2Reg = MI.getOperand(i: `2`).getReg();
21320	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
21321	Register SavedFFlags = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
21322	const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
21323
21324	// Save the current FFLAGS.
21325	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::ReadFFLAGS), DestReg: SavedFFlags);
21326
21327	auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RelOpcode), DestReg: DstReg)
21328	.addReg(RegNo: Src1Reg)
21329	.addReg(RegNo: Src2Reg);
21330	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
21331	MIB ->setFlag(MachineInstr::MIFlag::NoFPExcept);
21332
21333	// Restore the FFLAGS.
21334	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::WriteFFLAGS))
21335	.addReg(RegNo: SavedFFlags, flags: RegState::Kill);
21336
21337	// Issue a dummy FEQ opcode to raise exception for signaling NaNs.
21338	auto MIB2 = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: EqOpcode), DestReg: RISCV::X0)
21339	.addReg(RegNo: Src1Reg, flags: getKillRegState(B: MI.getOperand(i: `1`).isKill()))
21340	.addReg(RegNo: Src2Reg, flags: getKillRegState(B: MI.getOperand(i: `2`).isKill()));
21341	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
21342	MIB2 ->setFlag(MachineInstr::MIFlag::NoFPExcept);
21343
21344	// Erase the pseudoinstruction.
21345	MI.eraseFromParent();
21346	return BB;
21347	}
21348
21349	static MachineBasicBlock *
21350	EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
21351	MachineBasicBlock *ThisMBB,
21352	const RISCVSubtarget &Subtarget) {
21353	// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
21354	// Without this, custom-inserter would have generated:
21355	//
21356	// A
21357	// \| \
21358	// \| B
21359	// \| /
21360	// C
21361	// \| \
21362	// \| D
21363	// \| /
21364	// E
21365	//
21366	// A: X = ...; Y = ...
21367	// B: empty
21368	// C: Z = PHI [X, A], [Y, B]
21369	// D: empty
21370	// E: PHI [X, C], [Z, D]
21371	//
21372	// If we lower both Select_FPRX_ in a single step, we can instead generate:
21373	//
21374	// A
21375	// \| \
21376	// \| C
21377	// \| /\|
21378	// \|/ \|
21379	// \| \|
21380	// \| D
21381	// \| /
21382	// E
21383	//
21384	// A: X = ...; Y = ...
21385	// D: empty
21386	// E: PHI [X, A], [X, C], [Y, D]
21387
21388	const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
21389	const DebugLoc &DL = First.getDebugLoc();
21390	const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
21391	MachineFunction *F = ThisMBB->getParent();
21392	MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
21393	MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
21394	MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
21395	MachineFunction::iterator It = ++ThisMBB->getIterator();
21396	F->insert(MBBI: It, MBB: FirstMBB);
21397	F->insert(MBBI: It, MBB: SecondMBB);
21398	F->insert(MBBI: It, MBB: SinkMBB);
21399
21400	// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
21401	SinkMBB->splice(Where: SinkMBB->begin(), Other: ThisMBB,
21402	From: std::next(x: MachineBasicBlock::iterator (First)),
21403	To: ThisMBB->end());
21404	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: ThisMBB);
21405
21406	// Fallthrough block for ThisMBB.
21407	ThisMBB->addSuccessor(Succ: FirstMBB);
21408	// Fallthrough block for FirstMBB.
21409	FirstMBB->addSuccessor(Succ: SecondMBB);
21410	ThisMBB->addSuccessor(Succ: SinkMBB);
21411	FirstMBB->addSuccessor(Succ: SinkMBB);
21412	// This is fallthrough.
21413	SecondMBB->addSuccessor(Succ: SinkMBB);
21414
21415	auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(i: `3`).getImm());
21416	Register FLHS = First.getOperand(i: `1`).getReg();
21417	Register FRHS = First.getOperand(i: `2`).getReg();
21418	// Insert appropriate branch.
21419	BuildMI(BB: FirstMBB, MIMD: DL, MCID: TII.get(Opcode: RISCVCC::getBrCond(CC: FirstCC, SelectOpc: First.getOpcode())))
21420	.addReg(RegNo: FLHS)
21421	.addReg(RegNo: FRHS)
21422	.addMBB(MBB: SinkMBB);
21423
21424	Register SLHS = Second.getOperand(i: `1`).getReg();
21425	Register SRHS = Second.getOperand(i: `2`).getReg();
21426	Register Op1Reg4 = First.getOperand(i: `4`).getReg();
21427	Register Op1Reg5 = First.getOperand(i: `5`).getReg();
21428
21429	auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(i: `3`).getImm());
21430	// Insert appropriate branch.
21431	BuildMI(BB: ThisMBB, MIMD: DL,
21432	MCID: TII.get(Opcode: RISCVCC::getBrCond(CC: SecondCC, SelectOpc: Second.getOpcode())))
21433	.addReg(RegNo: SLHS)
21434	.addReg(RegNo: SRHS)
21435	.addMBB(MBB: SinkMBB);
21436
21437	Register DestReg = Second.getOperand(i: `0`).getReg();
21438	Register Op2Reg4 = Second.getOperand(i: `4`).getReg();
21439	BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: RISCV::PHI), DestReg)
21440	.addReg(RegNo: Op2Reg4)
21441	.addMBB(MBB: ThisMBB)
21442	.addReg(RegNo: Op1Reg4)
21443	.addMBB(MBB: FirstMBB)
21444	.addReg(RegNo: Op1Reg5)
21445	.addMBB(MBB: SecondMBB);
21446
21447	// Now remove the Select_FPRX_s.
21448	First.eraseFromParent();
21449	Second.eraseFromParent();
21450	return SinkMBB;
21451	}
21452
21453	static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
21454	MachineBasicBlock *BB,
21455	const RISCVSubtarget &Subtarget) {
21456	// To "insert" Select_ instructions, we actually have to insert the triangle*
21457	// control-flow pattern. The incoming instructions know the destination vreg
21458	// to set, the condition code register to branch on, the true/false values to
21459	// select between, and the condcode to use to select the appropriate branch.
21460	//
21461	// We produce the following control flow:
21462	// HeadMBB
21463	// \| \
21464	// \| IfFalseMBB
21465	// \| /
21466	// TailMBB
21467	//
21468	// When we find a sequence of selects we attempt to optimize their emission
21469	// by sharing the control flow. Currently we only handle cases where we have
21470	// multiple selects with the exact same condition (same LHS, RHS and CC).
21471	// The selects may be interleaved with other instructions if the other
21472	// instructions meet some requirements we deem safe:
21473	// - They are not pseudo instructions.
21474	// - They are debug instructions. Otherwise,
21475	// - They do not have side-effects, do not access memory and their inputs do
21476	// not depend on the results of the select pseudo-instructions.
21477	// The TrueV/FalseV operands of the selects cannot depend on the result of
21478	// previous selects in the sequence.
21479	// These conditions could be further relaxed. See the X86 target for a
21480	// related approach and more information.
21481	//
21482	// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
21483	// is checked here and handled by a separate function -
21484	// EmitLoweredCascadedSelect.
21485
21486	auto Next = next_nodbg(It: MI.getIterator(), End: BB->instr_end());
21487	if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
21488	MI.getOperand(i: `1`).isReg() && MI.getOperand(i: `2`).isReg() &&
21489	Next != BB->end() && Next ->getOpcode() == MI.getOpcode() &&
21490	Next ->getOperand(i: `5`).getReg() == MI.getOperand(i: `0`).getReg() &&
21491	Next ->getOperand(i: `5`).isKill())
21492	return EmitLoweredCascadedSelect(First&: MI, Second&: *Next, ThisMBB: BB, Subtarget);
21493
21494	Register LHS = MI.getOperand(i: `1`).getReg();
21495	Register RHS;
21496	if (MI.getOperand(i: `2`).isReg())
21497	RHS = MI.getOperand(i: `2`).getReg();
21498	auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: `3`).getImm());
21499
21500	SmallVector<MachineInstr *, `4`> SelectDebugValues;
21501	SmallSet<Register, `4`> SelectDests;
21502	SelectDests.insert(V: MI.getOperand(i: `0`).getReg());
21503
21504	MachineInstr *LastSelectPseudo = &MI;
21505	for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator (MI);
21506	SequenceMBBI != E; ++SequenceMBBI) {
21507	if (SequenceMBBI ->isDebugInstr())
21508	continue;
21509	if (RISCVInstrInfo::isSelectPseudo(MI: *SequenceMBBI)) {
21510	if (SequenceMBBI ->getOperand(i: `1`).getReg() != LHS \|\|
21511	!SequenceMBBI ->getOperand(i: `2`).isReg() \|\|
21512	SequenceMBBI ->getOperand(i: `2`).getReg() != RHS \|\|
21513	SequenceMBBI ->getOperand(i: `3`).getImm() != CC \|\|
21514	SelectDests.count(V: SequenceMBBI ->getOperand(i: `4`).getReg()) \|\|
21515	SelectDests.count(V: SequenceMBBI ->getOperand(i: `5`).getReg()))
21516	break;
21517	LastSelectPseudo = &*SequenceMBBI;
21518	SequenceMBBI ->collectDebugValues(DbgValues&: SelectDebugValues);
21519	SelectDests.insert(V: SequenceMBBI ->getOperand(i: `0`).getReg());
21520	continue;
21521	}
21522	if (SequenceMBBI ->hasUnmodeledSideEffects() \|\|
21523	SequenceMBBI ->mayLoadOrStore() \|\|
21524	SequenceMBBI ->usesCustomInsertionHook())
21525	break;
21526	if (llvm::any_of(Range: SequenceMBBI ->operands(), P: [&](MachineOperand &MO) {
21527	return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
21528	}))
21529	break;
21530	}
21531
21532	const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
21533	const BasicBlock *LLVM_BB = BB->getBasicBlock();
21534	DebugLoc DL = MI.getDebugLoc();
21535	MachineFunction::iterator I = ++BB->getIterator();
21536
21537	MachineBasicBlock *HeadMBB = BB;
21538	MachineFunction *F = BB->getParent();
21539	MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
21540	MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
21541
21542	F->insert(MBBI: I, MBB: IfFalseMBB);
21543	F->insert(MBBI: I, MBB: TailMBB);
21544
21545	// Set the call frame size on entry to the new basic blocks.
21546	unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
21547	IfFalseMBB->setCallFrameSize(CallFrameSize);
21548	TailMBB->setCallFrameSize(CallFrameSize);
21549
21550	// Transfer debug instructions associated with the selects to TailMBB.
21551	for (MachineInstr *DebugInstr : SelectDebugValues) {
21552	TailMBB->push_back(MI: DebugInstr->removeFromParent());
21553	}
21554
21555	// Move all instructions after the sequence to TailMBB.
21556	TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
21557	From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
21558	// Update machine-CFG edges by transferring all successors of the current
21559	// block to the new block which will contain the Phi nodes for the selects.
21560	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
21561	// Set the successors for HeadMBB.
21562	HeadMBB->addSuccessor(Succ: IfFalseMBB);
21563	HeadMBB->addSuccessor(Succ: TailMBB);
21564
21565	// Insert appropriate branch.
21566	if (MI.getOperand(i: `2`).isImm())
21567	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: RISCVCC::getBrCond(CC, SelectOpc: MI.getOpcode())))
21568	.addReg(RegNo: LHS)
21569	.addImm(Val: MI.getOperand(i: `2`).getImm())
21570	.addMBB(MBB: TailMBB);
21571	else
21572	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: RISCVCC::getBrCond(CC, SelectOpc: MI.getOpcode())))
21573	.addReg(RegNo: LHS)
21574	.addReg(RegNo: RHS)
21575	.addMBB(MBB: TailMBB);
21576
21577	// IfFalseMBB just falls through to TailMBB.
21578	IfFalseMBB->addSuccessor(Succ: TailMBB);
21579
21580	// Create PHIs for all of the select pseudo-instructions.
21581	auto SelectMBBI = MI.getIterator();
21582	auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
21583	auto InsertionPoint = TailMBB->begin();
21584	while (SelectMBBI != SelectEnd) {
21585	auto Next = std::next(x: SelectMBBI);
21586	if (RISCVInstrInfo::isSelectPseudo(MI: *SelectMBBI)) {
21587	// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
21588	BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI ->getDebugLoc(),
21589	MCID: TII.get(Opcode: RISCV::PHI), DestReg: SelectMBBI ->getOperand(i: `0`).getReg())
21590	.addReg(RegNo: SelectMBBI ->getOperand(i: `4`).getReg())
21591	.addMBB(MBB: HeadMBB)
21592	.addReg(RegNo: SelectMBBI ->getOperand(i: `5`).getReg())
21593	.addMBB(MBB: IfFalseMBB);
21594	SelectMBBI ->eraseFromParent();
21595	}
21596	SelectMBBI = Next;
21597	}
21598
21599	F->getProperties().resetNoPHIs();
21600	return TailMBB;
21601	}
21602
21603	// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
21604	static const RISCV::RISCVMaskedPseudoInfo *
21605	lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
21606	const RISCVVInversePseudosTable::PseudoInfo *Inverse =
21607	RISCVVInversePseudosTable::getBaseInfo(BaseInstr: MCOpcode, VLMul: LMul, SEW);
21608	assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
21609	const RISCV::RISCVMaskedPseudoInfo *Masked =
21610	RISCV::lookupMaskedIntrinsicByUnmasked(UnmaskedPseudo: Inverse->Pseudo);
21611	assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
21612	return Masked;
21613	}
21614
21615	static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
21616	MachineBasicBlock *BB,
21617	unsigned CVTXOpc) {
21618	DebugLoc DL = MI.getDebugLoc();
21619
21620	const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
21621
21622	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
21623	Register SavedFFLAGS = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
21624
21625	// Save the old value of FFLAGS.
21626	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::ReadFFLAGS), DestReg: SavedFFLAGS);
21627
21628	assert(MI.getNumOperands() == `7`);
21629
21630	// Emit a VFCVT_X_F
21631	const TargetRegisterInfo *TRI =
21632	BB->getParent()->getSubtarget().getRegisterInfo();
21633	const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx: `0`, TII: &TII, TRI);
21634	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
21635	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: CVTXOpc), DestReg: Tmp)
21636	.add(MO: MI.getOperand(i: `1`))
21637	.add(MO: MI.getOperand(i: `2`))
21638	.add(MO: MI.getOperand(i: `3`))
21639	.add(MO: MachineOperand::CreateImm(Val: `7`)) // frm = DYN
21640	.add(MO: MI.getOperand(i: `4`))
21641	.add(MO: MI.getOperand(i: `5`))
21642	.add(MO: MI.getOperand(i: `6`))
21643	.add(MO: MachineOperand::CreateReg(Reg: RISCV::FRM,
21644	/IsDef/ isDef: false,
21645	/IsImp/ isImp: true));
21646
21647	// Emit a VFCVT_F_X
21648	RISCVVType::VLMUL LMul = RISCVII::getLMul(TSFlags: MI.getDesc().TSFlags);
21649	unsigned Log2SEW = MI.getOperand(i: RISCVII::getSEWOpNum(Desc: MI.getDesc())).getImm();
21650	// There is no E8 variant for VFCVT_F_X.
21651	assert(Log2SEW >= `4`);
21652	unsigned CVTFOpc =
21653	lookupMaskedIntrinsic(MCOpcode: RISCV::VFCVT_F_X_V, LMul, SEW: `1` << Log2SEW)
21654	->MaskedPseudo;
21655
21656	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: CVTFOpc))
21657	.add(MO: MI.getOperand(i: `0`))
21658	.add(MO: MI.getOperand(i: `1`))
21659	.addReg(RegNo: Tmp)
21660	.add(MO: MI.getOperand(i: `3`))
21661	.add(MO: MachineOperand::CreateImm(Val: `7`)) // frm = DYN
21662	.add(MO: MI.getOperand(i: `4`))
21663	.add(MO: MI.getOperand(i: `5`))
21664	.add(MO: MI.getOperand(i: `6`))
21665	.add(MO: MachineOperand::CreateReg(Reg: RISCV::FRM,
21666	/IsDef/ isDef: false,
21667	/IsImp/ isImp: true));
21668
21669	// Restore FFLAGS.
21670	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::WriteFFLAGS))
21671	.addReg(RegNo: SavedFFLAGS, flags: RegState::Kill);
21672
21673	// Erase the pseudoinstruction.
21674	MI.eraseFromParent();
21675	return BB;
21676	}
21677
21678	static MachineBasicBlock emitFROUND(MachineInstr &MI, MachineBasicBlock MBB,
21679	const RISCVSubtarget &Subtarget) {
21680	unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
21681	const TargetRegisterClass *RC;
21682	switch (MI.getOpcode()) {
21683	default:
21684	llvm_unreachable("Unexpected opcode");
21685	case RISCV::PseudoFROUND_H:
21686	CmpOpc = RISCV::FLT_H;
21687	F2IOpc = RISCV::FCVT_W_H;
21688	I2FOpc = RISCV::FCVT_H_W;
21689	FSGNJOpc = RISCV::FSGNJ_H;
21690	FSGNJXOpc = RISCV::FSGNJX_H;
21691	RC = &RISCV::FPR16RegClass;
21692	break;
21693	case RISCV::PseudoFROUND_H_INX:
21694	CmpOpc = RISCV::FLT_H_INX;
21695	F2IOpc = RISCV::FCVT_W_H_INX;
21696	I2FOpc = RISCV::FCVT_H_W_INX;
21697	FSGNJOpc = RISCV::FSGNJ_H_INX;
21698	FSGNJXOpc = RISCV::FSGNJX_H_INX;
21699	RC = &RISCV::GPRF16RegClass;
21700	break;
21701	case RISCV::PseudoFROUND_S:
21702	CmpOpc = RISCV::FLT_S;
21703	F2IOpc = RISCV::FCVT_W_S;
21704	I2FOpc = RISCV::FCVT_S_W;
21705	FSGNJOpc = RISCV::FSGNJ_S;
21706	FSGNJXOpc = RISCV::FSGNJX_S;
21707	RC = &RISCV::FPR32RegClass;
21708	break;
21709	case RISCV::PseudoFROUND_S_INX:
21710	CmpOpc = RISCV::FLT_S_INX;
21711	F2IOpc = RISCV::FCVT_W_S_INX;
21712	I2FOpc = RISCV::FCVT_S_W_INX;
21713	FSGNJOpc = RISCV::FSGNJ_S_INX;
21714	FSGNJXOpc = RISCV::FSGNJX_S_INX;
21715	RC = &RISCV::GPRF32RegClass;
21716	break;
21717	case RISCV::PseudoFROUND_D:
21718	assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
21719	CmpOpc = RISCV::FLT_D;
21720	F2IOpc = RISCV::FCVT_L_D;
21721	I2FOpc = RISCV::FCVT_D_L;
21722	FSGNJOpc = RISCV::FSGNJ_D;
21723	FSGNJXOpc = RISCV::FSGNJX_D;
21724	RC = &RISCV::FPR64RegClass;
21725	break;
21726	case RISCV::PseudoFROUND_D_INX:
21727	assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
21728	CmpOpc = RISCV::FLT_D_INX;
21729	F2IOpc = RISCV::FCVT_L_D_INX;
21730	I2FOpc = RISCV::FCVT_D_L_INX;
21731	FSGNJOpc = RISCV::FSGNJ_D_INX;
21732	FSGNJXOpc = RISCV::FSGNJX_D_INX;
21733	RC = &RISCV::GPRRegClass;
21734	break;
21735	}
21736
21737	const BasicBlock *BB = MBB->getBasicBlock();
21738	DebugLoc DL = MI.getDebugLoc();
21739	MachineFunction::iterator I = ++MBB->getIterator();
21740
21741	MachineFunction *F = MBB->getParent();
21742	MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
21743	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
21744
21745	F->insert(MBBI: I, MBB: CvtMBB);
21746	F->insert(MBBI: I, MBB: DoneMBB);
21747	// Move all instructions after the sequence to DoneMBB.
21748	DoneMBB->splice(Where: DoneMBB->end(), Other: MBB, From: MachineBasicBlock::iterator (MI),
21749	To: MBB->end());
21750	// Update machine-CFG edges by transferring all successors of the current
21751	// block to the new block which will contain the Phi nodes for the selects.
21752	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
21753	// Set the successors for MBB.
21754	MBB->addSuccessor(Succ: CvtMBB);
21755	MBB->addSuccessor(Succ: DoneMBB);
21756
21757	Register DstReg = MI.getOperand(i: `0`).getReg();
21758	Register SrcReg = MI.getOperand(i: `1`).getReg();
21759	Register MaxReg = MI.getOperand(i: `2`).getReg();
21760	int64_t FRM = MI.getOperand(i: `3`).getImm();
21761
21762	const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
21763	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
21764
21765	Register FabsReg = MRI.createVirtualRegister(RegClass: RC);
21766	BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: FSGNJXOpc), DestReg: FabsReg).addReg(RegNo: SrcReg).addReg(RegNo: SrcReg);
21767
21768	// Compare the FP value to the max value.
21769	Register CmpReg = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
21770	auto MIB =
21771	BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: CmpOpc), DestReg: CmpReg).addReg(RegNo: FabsReg).addReg(RegNo: MaxReg);
21772	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
21773	MIB ->setFlag(MachineInstr::MIFlag::NoFPExcept);
21774
21775	// Insert branch.
21776	BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: RISCV::BEQ))
21777	.addReg(RegNo: CmpReg)
21778	.addReg(RegNo: RISCV::X0)
21779	.addMBB(MBB: DoneMBB);
21780
21781	CvtMBB->addSuccessor(Succ: DoneMBB);
21782
21783	// Convert to integer.
21784	Register F2IReg = MRI.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
21785	MIB = BuildMI(BB: CvtMBB, MIMD: DL, MCID: TII.get(Opcode: F2IOpc), DestReg: F2IReg).addReg(RegNo: SrcReg).addImm(Val: FRM);
21786	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
21787	MIB ->setFlag(MachineInstr::MIFlag::NoFPExcept);
21788
21789	// Convert back to FP.
21790	Register I2FReg = MRI.createVirtualRegister(RegClass: RC);
21791	MIB = BuildMI(BB: CvtMBB, MIMD: DL, MCID: TII.get(Opcode: I2FOpc), DestReg: I2FReg).addReg(RegNo: F2IReg).addImm(Val: FRM);
21792	if (MI.getFlag(Flag: MachineInstr::MIFlag::NoFPExcept))
21793	MIB ->setFlag(MachineInstr::MIFlag::NoFPExcept);
21794
21795	// Restore the sign bit.
21796	Register CvtReg = MRI.createVirtualRegister(RegClass: RC);
21797	BuildMI(BB: CvtMBB, MIMD: DL, MCID: TII.get(Opcode: FSGNJOpc), DestReg: CvtReg).addReg(RegNo: I2FReg).addReg(RegNo: SrcReg);
21798
21799	// Merge the results.
21800	BuildMI(BB&: *DoneMBB, I: DoneMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: RISCV::PHI), DestReg: DstReg)
21801	.addReg(RegNo: SrcReg)
21802	.addMBB(MBB)
21803	.addReg(RegNo: CvtReg)
21804	.addMBB(MBB: CvtMBB);
21805
21806	MI.eraseFromParent();
21807	return DoneMBB;
21808	}
21809
21810	MachineBasicBlock *
21811	RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
21812	MachineBasicBlock BB) const* {
21813	switch (MI.getOpcode()) {
21814	default:
21815	llvm_unreachable("Unexpected instr type to insert");
21816	case RISCV::ReadCounterWide:
21817	assert(!Subtarget.is64Bit() &&
21818	"ReadCounterWide is only to be used on riscv32");
21819	return emitReadCounterWidePseudo(MI, BB);
21820	case RISCV::Select_GPR_Using_CC_GPR:
21821	case RISCV::Select_GPR_Using_CC_SImm5_CV:
21822	case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
21823	case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
21824	case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
21825	case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
21826	case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
21827	case RISCV::Select_GPR_Using_CC_UImm7_NDS:
21828	case RISCV::Select_FPR16_Using_CC_GPR:
21829	case RISCV::Select_FPR16INX_Using_CC_GPR:
21830	case RISCV::Select_FPR32_Using_CC_GPR:
21831	case RISCV::Select_FPR32INX_Using_CC_GPR:
21832	case RISCV::Select_FPR64_Using_CC_GPR:
21833	case RISCV::Select_FPR64INX_Using_CC_GPR:
21834	case RISCV::Select_FPR64IN32X_Using_CC_GPR:
21835	return emitSelectPseudo(MI, BB, Subtarget);
21836	case RISCV::BuildPairF64Pseudo:
21837	return emitBuildPairF64Pseudo(MI, BB, Subtarget);
21838	case RISCV::SplitF64Pseudo:
21839	return emitSplitF64Pseudo(MI, BB, Subtarget);
21840	case RISCV::PseudoQuietFLE_H:
21841	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_H, EqOpcode: RISCV::FEQ_H, Subtarget);
21842	case RISCV::PseudoQuietFLE_H_INX:
21843	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_H_INX, EqOpcode: RISCV::FEQ_H_INX, Subtarget);
21844	case RISCV::PseudoQuietFLT_H:
21845	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_H, EqOpcode: RISCV::FEQ_H, Subtarget);
21846	case RISCV::PseudoQuietFLT_H_INX:
21847	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_H_INX, EqOpcode: RISCV::FEQ_H_INX, Subtarget);
21848	case RISCV::PseudoQuietFLE_S:
21849	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_S, EqOpcode: RISCV::FEQ_S, Subtarget);
21850	case RISCV::PseudoQuietFLE_S_INX:
21851	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_S_INX, EqOpcode: RISCV::FEQ_S_INX, Subtarget);
21852	case RISCV::PseudoQuietFLT_S:
21853	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_S, EqOpcode: RISCV::FEQ_S, Subtarget);
21854	case RISCV::PseudoQuietFLT_S_INX:
21855	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_S_INX, EqOpcode: RISCV::FEQ_S_INX, Subtarget);
21856	case RISCV::PseudoQuietFLE_D:
21857	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_D, EqOpcode: RISCV::FEQ_D, Subtarget);
21858	case RISCV::PseudoQuietFLE_D_INX:
21859	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_D_INX, EqOpcode: RISCV::FEQ_D_INX, Subtarget);
21860	case RISCV::PseudoQuietFLE_D_IN32X:
21861	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLE_D_IN32X, EqOpcode: RISCV::FEQ_D_IN32X,
21862	Subtarget);
21863	case RISCV::PseudoQuietFLT_D:
21864	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_D, EqOpcode: RISCV::FEQ_D, Subtarget);
21865	case RISCV::PseudoQuietFLT_D_INX:
21866	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_D_INX, EqOpcode: RISCV::FEQ_D_INX, Subtarget);
21867	case RISCV::PseudoQuietFLT_D_IN32X:
21868	return emitQuietFCMP(MI, BB, RelOpcode: RISCV::FLT_D_IN32X, EqOpcode: RISCV::FEQ_D_IN32X,
21869	Subtarget);
21870
21871	case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
21872	return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M1_MASK);
21873	case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
21874	return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M2_MASK);
21875	case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
21876	return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M4_MASK);
21877	case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
21878	return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_M8_MASK);
21879	case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
21880	return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
21881	case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
21882	return emitVFROUND_NOEXCEPT_MASK(MI, BB, CVTXOpc: RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
21883	case RISCV::PseudoFROUND_H:
21884	case RISCV::PseudoFROUND_H_INX:
21885	case RISCV::PseudoFROUND_S:
21886	case RISCV::PseudoFROUND_S_INX:
21887	case RISCV::PseudoFROUND_D:
21888	case RISCV::PseudoFROUND_D_INX:
21889	case RISCV::PseudoFROUND_D_IN32X:
21890	return emitFROUND(MI, MBB: BB, Subtarget);
21891	case RISCV::PROBED_STACKALLOC_DYN:
21892	return emitDynamicProbedAlloc(MI, MBB: BB);
21893	case TargetOpcode::STATEPOINT:
21894	// STATEPOINT is a pseudo instruction which has no implicit defs/uses
21895	// while jal call instruction (where statepoint will be lowered at the end)
21896	// has implicit def. This def is early-clobber as it will be set at
21897	// the moment of the call and earlier than any use is read.
21898	// Add this implicit dead def here as a workaround.
21899	MI.addOperand(MF&: *MI.getMF(),
21900	Op: MachineOperand::CreateReg(
21901	Reg: RISCV::X1, /isDef/ true,
21902	/isImp/ true, /isKill/ false, /isDead/ true,
21903	/isUndef/ false, /isEarlyClobber/ true));
21904	[[fallthrough]];
21905	case TargetOpcode::STACKMAP:
21906	case TargetOpcode::PATCHPOINT:
21907	if (!Subtarget.is64Bit())
21908	report_fatal_error(reason: "STACKMAP, PATCHPOINT and STATEPOINT are only "
21909	"supported on 64-bit targets");
21910	return emitPatchPoint(MI, MBB: BB);
21911	}
21912	}
21913
21914	void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
21915	SDNode Node) const* {
21916	// If instruction defines FRM operand, conservatively set it as non-dead to
21917	// express data dependency with FRM users and prevent incorrect instruction
21918	// reordering.
21919	if (auto FRMDef = MI.findRegisterDefOperand(Reg: RISCV::FRM, /TRI=/*nullptr)) {
21920	FRMDef->setIsDead(false);
21921	return;
21922	}
21923	// Add FRM dependency to any instructions with dynamic rounding mode.
21924	int Idx = RISCV::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: RISCV::OpName::frm);
21925	if (Idx < `0`) {
21926	// Vector pseudos have FRM index indicated by TSFlags.
21927	Idx = RISCVII::getFRMOpNum(Desc: MI.getDesc());
21928	if (Idx < `0`)
21929	return;
21930	}
21931	if (MI.getOperand(i: Idx).getImm() != RISCVFPRndMode::DYN)
21932	return;
21933	// If the instruction already reads FRM, don't add another read.
21934	if (MI.readsRegister(Reg: RISCV::FRM, /TRI=/nullptr))
21935	return;
21936	MI.addOperand(
21937	Op: MachineOperand::CreateReg(Reg: RISCV::FRM, /isDef/ false, /isImp/ true));
21938	}
21939
21940	void RISCVTargetLowering::analyzeInputArgs(
21941	MachineFunction &MF, CCState &CCInfo,
21942	const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
21943	RISCVCCAssignFn Fn) const {
21944	unsigned NumArgs = Ins.size();
21945	FunctionType *FType = MF.getFunction().getFunctionType();
21946
21947	for (unsigned i = `0`; i != NumArgs; ++i) {
21948	MVT ArgVT = Ins [i].VT;
21949	ISD::ArgFlagsTy ArgFlags = Ins [i].Flags;
21950
21951	Type ArgTy = nullptr*;
21952	if (IsRet)
21953	ArgTy = FType->getReturnType();
21954	else if (Ins [i].isOrigArg())
21955	ArgTy = FType->getParamType(i: Ins [i].getOrigArgIndex());
21956
21957	if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
21958	/IsFixed=/true, IsRet, ArgTy)) {
21959	LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
21960	<< ArgVT << `'\n'`);
21961	llvm_unreachable(nullptr);
21962	}
21963	}
21964	}
21965
21966	void RISCVTargetLowering::analyzeOutputArgs(
21967	MachineFunction &MF, CCState &CCInfo,
21968	const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
21969	CallLoweringInfo CLI, RISCVCCAssignFn Fn) const* {
21970	unsigned NumArgs = Outs.size();
21971
21972	for (unsigned i = `0`; i != NumArgs; i++) {
21973	MVT ArgVT = Outs [i].VT;
21974	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
21975	Type OrigTy = CLI ? CLI->getArgs()[Outs [i].OrigArgIndex].Ty : nullptr*;
21976
21977	if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
21978	Outs [i].IsFixed, IsRet, OrigTy)) {
21979	LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
21980	<< ArgVT << "\n");
21981	llvm_unreachable(nullptr);
21982	}
21983	}
21984	}
21985
21986	// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
21987	// values.
21988	static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
21989	const CCValAssign &VA, const SDLoc &DL,
21990	const RISCVSubtarget &Subtarget) {
21991	if (VA.needsCustom()) {
21992	if (VA.getLocVT().isInteger() &&
21993	(VA.getValVT() == MVT::f16 \|\| VA.getValVT() == MVT::bf16))
21994	return DAG.getNode(Opcode: RISCVISD::FMV_H_X, DL, VT: VA.getValVT(), Operand: Val);
21995	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
21996	return DAG.getNode(Opcode: RISCVISD::FMV_W_X_RV64, DL, VT: MVT::f32, Operand: Val);
21997	if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
21998	return convertFromScalableVector(VT: VA.getValVT(), V: Val, DAG, Subtarget);
21999	llvm_unreachable("Unexpected Custom handling.");
22000	}
22001
22002	switch (VA.getLocInfo()) {
22003	default:
22004	llvm_unreachable("Unexpected CCValAssign::LocInfo");
22005	case CCValAssign::Full:
22006	break;
22007	case CCValAssign::BCvt:
22008	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
22009	break;
22010	}
22011	return Val;
22012	}
22013
22014	// The caller is responsible for loading the full value if the argument is
22015	// passed with CCValAssign::Indirect.
22016	static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
22017	const CCValAssign &VA, const SDLoc &DL,
22018	const ISD::InputArg &In,
22019	const RISCVTargetLowering &TLI) {
22020	MachineFunction &MF = DAG.getMachineFunction();
22021	MachineRegisterInfo &RegInfo = MF.getRegInfo();
22022	EVT LocVT = VA.getLocVT();
22023	SDValue Val;
22024	const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
22025	Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
22026	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
22027	Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
22028
22029	// If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22030	if (In.isOrigArg()) {
22031	Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
22032	if (OrigArg->getType()->isIntegerTy()) {
22033	unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22034	// An input zero extended from i31 can also be considered sign extended.
22035	if ((BitWidth <= `32` && In.Flags.isSExt()) \|\|
22036	(BitWidth < `32` && In.Flags.isZExt())) {
22037	RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
22038	RVFI->addSExt32Register(Reg: VReg);
22039	}
22040	}
22041	}
22042
22043	if (VA.getLocInfo() == CCValAssign::Indirect)
22044	return Val;
22045
22046	return convertLocVTToValVT(DAG, Val, VA, DL, Subtarget: TLI.getSubtarget());
22047	}
22048
22049	static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
22050	const CCValAssign &VA, const SDLoc &DL,
22051	const RISCVSubtarget &Subtarget) {
22052	EVT LocVT = VA.getLocVT();
22053
22054	if (VA.needsCustom()) {
22055	if (LocVT.isInteger() &&
22056	(VA.getValVT() == MVT::f16 \|\| VA.getValVT() == MVT::bf16))
22057	return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTH, DL, VT: LocVT, Operand: Val);
22058	if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22059	return DAG.getNode(Opcode: RISCVISD::FMV_X_ANYEXTW_RV64, DL, VT: MVT::i64, Operand: Val);
22060	if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22061	return convertToScalableVector(VT: LocVT, V: Val, DAG, Subtarget);
22062	llvm_unreachable("Unexpected Custom handling.");
22063	}
22064
22065	switch (VA.getLocInfo()) {
22066	default:
22067	llvm_unreachable("Unexpected CCValAssign::LocInfo");
22068	case CCValAssign::Full:
22069	break;
22070	case CCValAssign::BCvt:
22071	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
22072	break;
22073	}
22074	return Val;
22075	}
22076
22077	// The caller is responsible for loading the full value if the argument is
22078	// passed with CCValAssign::Indirect.
22079	static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
22080	const CCValAssign &VA, const SDLoc &DL) {
22081	MachineFunction &MF = DAG.getMachineFunction();
22082	MachineFrameInfo &MFI = MF.getFrameInfo();
22083	EVT LocVT = VA.getLocVT();
22084	EVT ValVT = VA.getValVT();
22085	EVT PtrVT = MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: `0`));
22086	if (VA.getLocInfo() == CCValAssign::Indirect) {
22087	// When the value is a scalable vector, we save the pointer which points to
22088	// the scalable vector value in the stack. The ValVT will be the pointer
22089	// type, instead of the scalable vector type.
22090	ValVT = LocVT;
22091	}
22092	int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
22093	/IsImmutable=/true);
22094	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
22095	SDValue Val;
22096
22097	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
22098	switch (VA.getLocInfo()) {
22099	default:
22100	llvm_unreachable("Unexpected CCValAssign::LocInfo");
22101	case CCValAssign::Full:
22102	case CCValAssign::Indirect:
22103	case CCValAssign::BCvt:
22104	break;
22105	}
22106	Val = DAG.getExtLoad(
22107	ExtType, dl: DL, VT: LocVT, Chain, Ptr: FIN,
22108	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
22109	return Val;
22110	}
22111
22112	static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
22113	const CCValAssign &VA,
22114	const CCValAssign &HiVA,
22115	const SDLoc &DL) {
22116	assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22117	"Unexpected VA");
22118	MachineFunction &MF = DAG.getMachineFunction();
22119	MachineFrameInfo &MFI = MF.getFrameInfo();
22120	MachineRegisterInfo &RegInfo = MF.getRegInfo();
22121
22122	assert(VA.isRegLoc() && "Expected register VA assignment");
22123
22124	Register LoVReg = RegInfo.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
22125	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
22126	SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
22127	SDValue Hi;
22128	if (HiVA.isMemLoc()) {
22129	// Second half of f64 is passed on the stack.
22130	int FI = MFI.CreateFixedObject(Size: `4`, SPOffset: HiVA.getLocMemOffset(),
22131	/IsImmutable=/true);
22132	SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
22133	Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
22134	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
22135	} else {
22136	// Second half of f64 is passed in another GPR.
22137	Register HiVReg = RegInfo.createVirtualRegister(RegClass: &RISCV::GPRRegClass);
22138	RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
22139	Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
22140	}
22141	return DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
22142	}
22143
22144	// Transform physical registers into virtual registers.
22145	SDValue RISCVTargetLowering::LowerFormalArguments(
22146	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22147	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22148	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22149
22150	MachineFunction &MF = DAG.getMachineFunction();
22151
22152	switch (CallConv) {
22153	default:
22154	report_fatal_error(reason: "Unsupported calling convention");
22155	case CallingConv::C:
22156	case CallingConv::Fast:
22157	case CallingConv::SPIR_KERNEL:
22158	case CallingConv::GRAAL:
22159	case CallingConv::RISCV_VectorCall:
22160	#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22161	CC_VLS_CASE(`32`)
22162	CC_VLS_CASE(`64`)
22163	CC_VLS_CASE(`128`)
22164	CC_VLS_CASE(`256`)
22165	CC_VLS_CASE(`512`)
22166	CC_VLS_CASE(`1024`)
22167	CC_VLS_CASE(`2048`)
22168	CC_VLS_CASE(`4096`)
22169	CC_VLS_CASE(`8192`)
22170	CC_VLS_CASE(`16384`)
22171	CC_VLS_CASE(`32768`)
22172	CC_VLS_CASE(`65536`)
22173	#undef CC_VLS_CASE
22174	break;
22175	case CallingConv::GHC:
22176	if (Subtarget.hasStdExtE())
22177	report_fatal_error(reason: "GHC calling convention is not supported on RVE!");
22178	if (!Subtarget.hasStdExtFOrZfinx() \|\| !Subtarget.hasStdExtDOrZdinx())
22179	report_fatal_error(reason: "GHC calling convention requires the (Zfinx/F) and "
22180	"(Zdinx/D) instruction set extensions");
22181	}
22182
22183	const Function &Func = MF.getFunction();
22184	if (Func.hasFnAttribute(Kind: "interrupt")) {
22185	if (!Func.arg_empty())
22186	report_fatal_error(
22187	reason: "Functions with the interrupt attribute cannot have arguments!");
22188
22189	StringRef Kind =
22190	MF.getFunction().getFnAttribute(Kind: "interrupt").getValueAsString();
22191
22192	constexpr StringLiteral SupportedInterruptKinds[] = {
22193	"machine",
22194	"supervisor",
22195	"qci-nest",
22196	"qci-nonest",
22197	"SiFive-CLIC-preemptible",
22198	"SiFive-CLIC-stack-swap",
22199	"SiFive-CLIC-preemptible-stack-swap",
22200	};
22201	if (!llvm::is_contained(Range: SupportedInterruptKinds, Element: Kind))
22202	report_fatal_error(
22203	reason: "Function interrupt attribute argument not supported!");
22204
22205	if (Kind.starts_with(Prefix: "qci-") && !Subtarget.hasVendorXqciint())
22206	report_fatal_error(reason: "'qci-*' interrupt kinds require Xqciint extension");
22207
22208	if (Kind.starts_with(Prefix: "SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22209	reportFatalUsageError(
22210	reason: "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22211
22212	const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22213	if (Kind.starts_with(Prefix: "SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22214	reportFatalUsageError(reason: "'SiFive-CLIC-preemptible' interrupt kinds cannot "
22215	"have a frame pointer");
22216	}
22217
22218	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
22219	MVT XLenVT = Subtarget.getXLenVT();
22220	unsigned XLenInBytes = Subtarget.getXLen() / `8`;
22221	// Used with vargs to accumulate store chains.
22222	std::vector<SDValue> OutChains;
22223
22224	// Assign locations to all of the incoming arguments.
22225	SmallVector<CCValAssign, `16`> ArgLocs;
22226	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22227
22228	if (CallConv == CallingConv::GHC)
22229	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_RISCV_GHC);
22230	else
22231	analyzeInputArgs(MF, CCInfo, Ins, /IsRet=/false,
22232	Fn: CallConv == CallingConv::Fast ? CC_RISCV_FastCC
22233	: CC_RISCV);
22234
22235	for (unsigned i = `0`, e = ArgLocs.size(), InsIdx = `0`; i != e; ++i, ++InsIdx) {
22236	CCValAssign &VA = ArgLocs [i];
22237	SDValue ArgValue;
22238	// Passing f64 on RV32D with a soft float ABI must be handled as a special
22239	// case.
22240	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22241	assert(VA.needsCustom());
22242	ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs [++i], DL);
22243	} else if (VA.isRegLoc())
22244	ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins [InsIdx], TLI: *this);
22245	else
22246	ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22247
22248	if (VA.getLocInfo() == CCValAssign::Indirect) {
22249	// If the original argument was split and passed by reference (e.g. i128
22250	// on RV32), we need to load all parts of it here (using the same
22251	// address). Vectors may be partly split to registers and partly to the
22252	// stack, in which case the base address is partly offset and subsequent
22253	// stores are relative to that.
22254	InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
22255	PtrInfo: MachinePointerInfo ()));
22256	unsigned ArgIndex = Ins [InsIdx].OrigArgIndex;
22257	unsigned ArgPartOffset = Ins [InsIdx].PartOffset;
22258	assert(VA.getValVT().isVector() \|\| ArgPartOffset == `0`);
22259	while (i + `1` != e && Ins [InsIdx + `1`].OrigArgIndex == ArgIndex) {
22260	CCValAssign &PartVA = ArgLocs [i + `1`];
22261	unsigned PartOffset = Ins [InsIdx + `1`].PartOffset - ArgPartOffset;
22262	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
22263	if (PartVA.getValVT().isScalableVector())
22264	Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset);
22265	SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
22266	InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
22267	PtrInfo: MachinePointerInfo ()));
22268	++i;
22269	++InsIdx;
22270	}
22271	continue;
22272	}
22273	InVals.push_back(Elt: ArgValue);
22274	}
22275
22276	if (any_of(Range&: ArgLocs,
22277	P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22278	MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
22279
22280	if (IsVarArg) {
22281	ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(ABI: Subtarget.getTargetABI());
22282	unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
22283	const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22284	MachineFrameInfo &MFI = MF.getFrameInfo();
22285	MachineRegisterInfo &RegInfo = MF.getRegInfo();
22286	RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
22287
22288	// Size of the vararg save area. For now, the varargs save area is either
22289	// zero or large enough to hold a0-a7.
22290	int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
22291	int FI;
22292
22293	// If all registers are allocated, then all varargs must be passed on the
22294	// stack and we don't need to save any argregs.
22295	if (VarArgsSaveSize == `0`) {
22296	int VaArgOffset = CCInfo.getStackSize();
22297	FI = MFI.CreateFixedObject(Size: XLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
22298	} else {
22299	int VaArgOffset = -VarArgsSaveSize;
22300	FI = MFI.CreateFixedObject(Size: VarArgsSaveSize, SPOffset: VaArgOffset, IsImmutable: true);
22301
22302	// If saving an odd number of registers then create an extra stack slot to
22303	// ensure that the frame pointer is 2XLEN-aligned, which in turn ensures*
22304	// offsets to even-numbered registered remain 2XLEN-aligned.*
22305	if (Idx % `2`) {
22306	MFI.CreateFixedObject(
22307	Size: XLenInBytes, SPOffset: VaArgOffset - static_cast<int>(XLenInBytes), IsImmutable: true);
22308	VarArgsSaveSize += XLenInBytes;
22309	}
22310
22311	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
22312
22313	// Copy the integer registers that may have been used for passing varargs
22314	// to the vararg save area.
22315	for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
22316	const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
22317	RegInfo.addLiveIn(Reg: ArgRegs [I], vreg: Reg);
22318	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: XLenVT);
22319	SDValue Store = DAG.getStore(
22320	Chain, dl: DL, Val: ArgValue, Ptr: FIN,
22321	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: (I - Idx) * XLenInBytes));
22322	OutChains.push_back(x: Store);
22323	FIN =
22324	DAG.getMemBasePlusOffset(Base: FIN, Offset: TypeSize::getFixed(ExactSize: XLenInBytes), DL);
22325	}
22326	}
22327
22328	// Record the frame index of the first variable argument
22329	// which is a value necessary to VASTART.
22330	RVFI->setVarArgsFrameIndex(FI);
22331	RVFI->setVarArgsSaveSize(VarArgsSaveSize);
22332	}
22333
22334	// All stores are grouped in one node to allow the matching between
22335	// the size of Ins and InVals. This only happens for vararg functions.
22336	if (!OutChains.empty()) {
22337	OutChains.push_back(x: Chain);
22338	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
22339	}
22340
22341	return Chain;
22342	}
22343
22344	/// isEligibleForTailCallOptimization - Check whether the call is eligible
22345	/// for tail call optimization.
22346	/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
22347	bool RISCVTargetLowering::isEligibleForTailCallOptimization(
22348	CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
22349	const SmallVector<CCValAssign, `16`> &ArgLocs) const {
22350
22351	auto CalleeCC = CLI.CallConv;
22352	auto &Outs = CLI.Outs;
22353	auto &Caller = MF.getFunction();
22354	auto CallerCC = Caller.getCallingConv();
22355
22356	// Exception-handling functions need a special set of instructions to
22357	// indicate a return to the hardware. Tail-calling another function would
22358	// probably break this.
22359	// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
22360	// should be expanded as new function attributes are introduced.
22361	if (Caller.hasFnAttribute(Kind: "interrupt"))
22362	return false;
22363
22364	// Do not tail call opt if the stack is used to pass parameters.
22365	if (CCInfo.getStackSize() != `0`)
22366	return false;
22367
22368	// Do not tail call opt if any parameters need to be passed indirectly.
22369	// Since long doubles (fp128) and i128 are larger than 2XLEN, they are*
22370	// passed indirectly. So the address of the value will be passed in a
22371	// register, or if not available, then the address is put on the stack. In
22372	// order to pass indirectly, space on the stack often needs to be allocated
22373	// in order to store the value. In this case the CCInfo.getNextStackOffset()
22374	// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
22375	// are passed CCValAssign::Indirect.
22376	for (auto &VA : ArgLocs)
22377	if (VA.getLocInfo() == CCValAssign::Indirect)
22378	return false;
22379
22380	// Do not tail call opt if either caller or callee uses struct return
22381	// semantics.
22382	auto IsCallerStructRet = Caller.hasStructRetAttr();
22383	auto IsCalleeStructRet = Outs.empty() ? false : Outs [`0`].Flags.isSRet();
22384	if (IsCallerStructRet \|\| IsCalleeStructRet)
22385	return false;
22386
22387	// The callee has to preserve all registers the caller needs to preserve.
22388	const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
22389	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
22390	if (CalleeCC != CallerCC) {
22391	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
22392	if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
22393	return false;
22394	}
22395
22396	// Byval parameters hand the function a pointer directly into the stack area
22397	// we want to reuse during a tail call. Working around this is* possible*
22398	// but less efficient and uglier in LowerCall.
22399	for (auto &Arg : Outs)
22400	if (Arg.Flags.isByVal())
22401	return false;
22402
22403	return true;
22404	}
22405
22406	static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
22407	return DAG.getDataLayout().getPrefTypeAlign(
22408	Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
22409	}
22410
22411	// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
22412	// and output parameter nodes.
22413	SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
22414	SmallVectorImpl<SDValue> &InVals) const {
22415	SelectionDAG &DAG = CLI.DAG;
22416	SDLoc &DL = CLI.DL;
22417	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
22418	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
22419	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
22420	SDValue Chain = CLI.Chain;
22421	SDValue Callee = CLI.Callee;
22422	bool &IsTailCall = CLI.IsTailCall;
22423	CallingConv::ID CallConv = CLI.CallConv;
22424	bool IsVarArg = CLI.IsVarArg;
22425	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
22426	MVT XLenVT = Subtarget.getXLenVT();
22427
22428	MachineFunction &MF = DAG.getMachineFunction();
22429
22430	// Analyze the operands of the call, assigning locations to each operand.
22431	SmallVector<CCValAssign, `16`> ArgLocs;
22432	CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22433
22434	if (CallConv == CallingConv::GHC) {
22435	if (Subtarget.hasStdExtE())
22436	report_fatal_error(reason: "GHC calling convention is not supported on RVE!");
22437	ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_RISCV_GHC);
22438	} else
22439	analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /IsRet=/false, CLI: &CLI,
22440	Fn: CallConv == CallingConv::Fast ? CC_RISCV_FastCC
22441	: CC_RISCV);
22442
22443	// Check if it's really possible to do a tail call.
22444	if (IsTailCall)
22445	IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
22446
22447	if (IsTailCall)
22448	++NumTailCalls;
22449	else if (CLI.CB && CLI.CB->isMustTailCall())
22450	report_fatal_error(reason: "failed to perform tail call elimination on a call "
22451	"site marked musttail");
22452
22453	// Get a count of how many bytes are to be pushed on the stack.
22454	unsigned NumBytes = ArgCCInfo.getStackSize();
22455
22456	// Create local copies for byval args
22457	SmallVector<SDValue, `8`> ByValArgs;
22458	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
22459	ISD::ArgFlagsTy Flags = Outs [i].Flags;
22460	if (!Flags.isByVal())
22461	continue;
22462
22463	SDValue Arg = OutVals [i];
22464	unsigned Size = Flags.getByValSize();
22465	Align Alignment = Flags.getNonZeroByValAlign();
22466
22467	int FI =
22468	MF.getFrameInfo().CreateStackObject(Size, Alignment, /isSS=/isSpillSlot: false);
22469	SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
22470	SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: XLenVT);
22471
22472	Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
22473	/IsVolatile=/isVol: false,
22474	/AlwaysInline=/false, /CI/ nullptr, OverrideTailCall: IsTailCall,
22475	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
22476	ByValArgs.push_back(Elt: FIPtr);
22477	}
22478
22479	if (!IsTailCall)
22480	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: CLI.DL);
22481
22482	// Copy argument values to their designated locations.
22483	SmallVector<std::pair<Register, SDValue>, `8`> RegsToPass;
22484	SmallVector<SDValue, `8`> MemOpChains;
22485	SDValue StackPtr;
22486	for (unsigned i = `0`, j = `0`, e = ArgLocs.size(), OutIdx = `0`; i != e;
22487	++i, ++OutIdx) {
22488	CCValAssign &VA = ArgLocs [i];
22489	SDValue ArgValue = OutVals [OutIdx];
22490	ISD::ArgFlagsTy Flags = Outs [OutIdx].Flags;
22491
22492	// Handle passing f64 on RV32D with a soft float ABI as a special case.
22493	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22494	assert(VA.isRegLoc() && "Expected register VA assignment");
22495	assert(VA.needsCustom());
22496	SDValue SplitF64 = DAG.getNode(
22497	Opcode: RISCVISD::SplitF64, DL, VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
22498	SDValue Lo = SplitF64.getValue(R: `0`);
22499	SDValue Hi = SplitF64.getValue(R: `1`);
22500
22501	Register RegLo = VA.getLocReg();
22502	RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
22503
22504	// Get the CCValAssign for the Hi part.
22505	CCValAssign &HiVA = ArgLocs [++i];
22506
22507	if (HiVA.isMemLoc()) {
22508	// Second half of f64 is passed on the stack.
22509	if (!StackPtr.getNode())
22510	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: RISCV::X2, VT: PtrVT);
22511	SDValue Address =
22512	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
22513	N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
22514	// Emit the store.
22515	MemOpChains.push_back(Elt: DAG.getStore(
22516	Chain, dl: DL, Val: Hi, Ptr: Address,
22517	PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
22518	} else {
22519	// Second half of f64 is passed in another GPR.
22520	Register RegHigh = HiVA.getLocReg();
22521	RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
22522	}
22523	continue;
22524	}
22525
22526	// Promote the value if needed.
22527	// For now, only handle fully promoted and indirect arguments.
22528	if (VA.getLocInfo() == CCValAssign::Indirect) {
22529	// Store the argument in a stack slot and pass its address.
22530	Align StackAlign =
22531	std::max(a: getPrefTypeAlign(VT: Outs [OutIdx].ArgVT, DAG),
22532	b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
22533	TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
22534	// If the original argument was split (e.g. i128), we need
22535	// to store the required parts of it here (and pass just one address).
22536	// Vectors may be partly split to registers and partly to the stack, in
22537	// which case the base address is partly offset and subsequent stores are
22538	// relative to that.
22539	unsigned ArgIndex = Outs [OutIdx].OrigArgIndex;
22540	unsigned ArgPartOffset = Outs [OutIdx].PartOffset;
22541	assert(VA.getValVT().isVector() \|\| ArgPartOffset == `0`);
22542	// Calculate the total size to store. We don't have access to what we're
22543	// actually storing other than performing the loop and collecting the
22544	// info.
22545	SmallVector<std::pair<SDValue, SDValue>> Parts;
22546	while (i + `1` != e && Outs [OutIdx + `1`].OrigArgIndex == ArgIndex) {
22547	SDValue PartValue = OutVals [OutIdx + `1`];
22548	unsigned PartOffset = Outs [OutIdx + `1`].PartOffset - ArgPartOffset;
22549	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
22550	EVT PartVT = PartValue.getValueType();
22551	if (PartVT.isScalableVector())
22552	Offset = DAG.getNode(Opcode: ISD::VSCALE, DL, VT: XLenVT, Operand: Offset);
22553	StoredSize += PartVT.getStoreSize();
22554	StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
22555	Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
22556	++i;
22557	++OutIdx;
22558	}
22559	SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
22560	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
22561	MemOpChains.push_back(
22562	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
22563	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
22564	for (const auto &Part : Parts) {
22565	SDValue PartValue = Part.first;
22566	SDValue PartOffset = Part.second;
22567	SDValue Address =
22568	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
22569	MemOpChains.push_back(
22570	Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
22571	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
22572	}
22573	ArgValue = SpillSlot;
22574	} else {
22575	ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL, Subtarget);
22576	}
22577
22578	// Use local copy if it is a byval arg.
22579	if (Flags.isByVal())
22580	ArgValue = ByValArgs [j++];
22581
22582	if (VA.isRegLoc()) {
22583	// Queue up the argument copies and emit them at the end.
22584	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
22585	} else {
22586	assert(VA.isMemLoc() && "Argument not register or memory");
22587	assert(!IsTailCall && "Tail call not allowed if stack is used "
22588	"for passing parameters");
22589
22590	// Work out the address of the stack slot.
22591	if (!StackPtr.getNode())
22592	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: RISCV::X2, VT: PtrVT);
22593	SDValue Address =
22594	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
22595	N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
22596
22597	// Emit the store.
22598	MemOpChains.push_back(
22599	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address,
22600	PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset())));
22601	}
22602	}
22603
22604	// Join the stores, which are independent of one another.
22605	if (!MemOpChains.empty())
22606	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
22607
22608	SDValue Glue;
22609
22610	// Build a sequence of copy-to-reg nodes, chained and glued together.
22611	for (auto &Reg : RegsToPass) {
22612	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
22613	Glue = Chain.getValue(R: `1`);
22614	}
22615
22616	// Validate that none of the argument registers have been marked as
22617	// reserved, if so report an error. Do the same for the return address if this
22618	// is not a tailcall.
22619	validateCCReservedRegs(Regs: RegsToPass, MF);
22620	if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(R: RISCV::X1))
22621	MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported {
22622	MF.getFunction(),
22623	"Return address register required, but has been reserved."});
22624
22625	// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
22626	// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
22627	// split it and then direct call can be matched by PseudoCALL.
22628	bool CalleeIsLargeExternalSymbol = false;
22629	if (getTargetMachine().getCodeModel() == CodeModel::Large) {
22630	if (auto *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee))
22631	Callee = getLargeGlobalAddress(N: S, DL, Ty: PtrVT, DAG);
22632	else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
22633	Callee = getLargeExternalSymbol(N: S, DL, Ty: PtrVT, DAG);
22634	CalleeIsLargeExternalSymbol = true;
22635	}
22636	} else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
22637	const GlobalValue *GV = S->getGlobal();
22638	Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: `0`, TargetFlags: RISCVII::MO_CALL);
22639	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
22640	Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: RISCVII::MO_CALL);
22641	}
22642
22643	// The first call operand is the chain and the second is the target address.
22644	SmallVector<SDValue, `8`> Ops;
22645	Ops.push_back(Elt: Chain);
22646	Ops.push_back(Elt: Callee);
22647
22648	// Add argument registers to the end of the list so that they are
22649	// known live into the call.
22650	for (auto &Reg : RegsToPass)
22651	Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
22652
22653	// Add a register mask operand representing the call-preserved registers.
22654	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
22655	const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
22656	assert(Mask && "Missing call preserved mask for calling convention");
22657	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
22658
22659	// Glue the call to the argument copies, if any.
22660	if (Glue.getNode())
22661	Ops.push_back(Elt: Glue);
22662
22663	assert((!CLI.CFIType \|\| CLI.CB->isIndirectCall()) &&
22664	"Unexpected CFI type for a direct call");
22665
22666	// Emit the call.
22667	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
22668
22669	// Use software guarded branch for large code model non-indirect calls
22670	// Tail call to external symbol will have a null CLI.CB and we need another
22671	// way to determine the callsite type
22672	bool NeedSWGuarded = false;
22673	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
22674	Subtarget.hasStdExtZicfilp() &&
22675	((CLI.CB && !CLI.CB->isIndirectCall()) \|\| CalleeIsLargeExternalSymbol))
22676	NeedSWGuarded = true;
22677
22678	if (IsTailCall) {
22679	MF.getFrameInfo().setHasTailCall();
22680	unsigned CallOpc =
22681	NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
22682	SDValue Ret = DAG.getNode(Opcode: CallOpc, DL, VTList: NodeTys, Ops);
22683	if (CLI.CFIType)
22684	Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
22685	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
22686	return Ret;
22687	}
22688
22689	unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
22690	Chain = DAG.getNode(Opcode: CallOpc, DL, VTList: NodeTys, Ops);
22691	if (CLI.CFIType)
22692	Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
22693	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
22694	Glue = Chain.getValue(R: `1`);
22695
22696	// Mark the end of the call, which is glued to the call itself.
22697	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue, DL);
22698	Glue = Chain.getValue(R: `1`);
22699
22700	// Assign locations to each value returned by this call.
22701	SmallVector<CCValAssign, `16`> RVLocs;
22702	CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
22703	analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /IsRet=/true, Fn: CC_RISCV);
22704
22705	// Copy all of the result registers out of their specified physreg.
22706	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
22707	auto &VA = RVLocs [i];
22708	// Copy the value out
22709	SDValue RetValue =
22710	DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
22711	// Glue the RetValue to the end of the call sequence
22712	Chain = RetValue.getValue(R: `1`);
22713	Glue = RetValue.getValue(R: `2`);
22714
22715	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22716	assert(VA.needsCustom());
22717	SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs [++i].getLocReg(),
22718	VT: MVT::i32, Glue);
22719	Chain = RetValue2.getValue(R: `1`);
22720	Glue = RetValue2.getValue(R: `2`);
22721	RetValue = DAG.getNode(Opcode: RISCVISD::BuildPairF64, DL, VT: MVT::f64, N1: RetValue,
22722	N2: RetValue2);
22723	} else
22724	RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL, Subtarget);
22725
22726	InVals.push_back(Elt: RetValue);
22727	}
22728
22729	return Chain;
22730	}
22731
22732	bool RISCVTargetLowering::CanLowerReturn(
22733	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
22734	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
22735	const Type RetTy) const* {
22736	SmallVector<CCValAssign, `16`> RVLocs;
22737	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
22738
22739	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
22740	MVT VT = Outs [i].VT;
22741	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
22742	if (CC_RISCV(ValNo: i, ValVT: VT, LocVT: VT, LocInfo: CCValAssign::Full, ArgFlags, State&: CCInfo,
22743	/IsFixed=/true, /IsRet=/true, OrigTy: nullptr))
22744	return false;
22745	}
22746	return true;
22747	}
22748
22749	SDValue
22750	RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
22751	bool IsVarArg,
22752	const SmallVectorImpl<ISD::OutputArg> &Outs,
22753	const SmallVectorImpl<SDValue> &OutVals,
22754	const SDLoc &DL, SelectionDAG &DAG) const {
22755	MachineFunction &MF = DAG.getMachineFunction();
22756	const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
22757
22758	// Stores the assignment of the return value to a location.
22759	SmallVector<CCValAssign, `16`> RVLocs;
22760
22761	// Info about the registers and stack slot.
22762	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
22763	*DAG.getContext());
22764
22765	analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /IsRet=/true,
22766	CLI: nullptr, Fn: CC_RISCV);
22767
22768	if (CallConv == CallingConv::GHC && !RVLocs.empty())
22769	report_fatal_error(reason: "GHC functions return void only");
22770
22771	SDValue Glue;
22772	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
22773
22774	// Copy the result values into the output registers.
22775	for (unsigned i = `0`, e = RVLocs.size(), OutIdx = `0`; i < e; ++i, ++OutIdx) {
22776	SDValue Val = OutVals [OutIdx];
22777	CCValAssign &VA = RVLocs [i];
22778	assert(VA.isRegLoc() && "Can only return in registers!");
22779
22780	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22781	// Handle returning f64 on RV32D with a soft float ABI.
22782	assert(VA.isRegLoc() && "Expected return via registers");
22783	assert(VA.needsCustom());
22784	SDValue SplitF64 = DAG.getNode(Opcode: RISCVISD::SplitF64, DL,
22785	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
22786	SDValue Lo = SplitF64.getValue(R: `0`);
22787	SDValue Hi = SplitF64.getValue(R: `1`);
22788	Register RegLo = VA.getLocReg();
22789	Register RegHi = RVLocs [++i].getLocReg();
22790
22791	if (STI.isRegisterReservedByUser(i: RegLo) \|\|
22792	STI.isRegisterReservedByUser(i: RegHi))
22793	MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported {
22794	MF.getFunction(),
22795	"Return value register required, but has been reserved."});
22796
22797	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
22798	Glue = Chain.getValue(R: `1`);
22799	RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
22800	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
22801	Glue = Chain.getValue(R: `1`);
22802	RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
22803	} else {
22804	// Handle a 'normal' return.
22805	Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
22806	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
22807
22808	if (STI.isRegisterReservedByUser(i: VA.getLocReg()))
22809	MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported {
22810	MF.getFunction(),
22811	"Return value register required, but has been reserved."});
22812
22813	// Guarantee that all emitted copies are stuck together.
22814	Glue = Chain.getValue(R: `1`);
22815	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
22816	}
22817	}
22818
22819	RetOps [`0`] = Chain; // Update chain.
22820
22821	// Add the glue node if we have it.
22822	if (Glue.getNode()) {
22823	RetOps.push_back(Elt: Glue);
22824	}
22825
22826	if (any_of(Range&: RVLocs,
22827	P: [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22828	MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
22829
22830	unsigned RetOpc = RISCVISD::RET_GLUE;
22831	// Interrupt service routines use different return instructions.
22832	const Function &Func = DAG.getMachineFunction().getFunction();
22833	if (Func.hasFnAttribute(Kind: "interrupt")) {
22834	if (!Func.getReturnType()->isVoidTy())
22835	report_fatal_error(
22836	reason: "Functions with the interrupt attribute must have void return type!");
22837
22838	MachineFunction &MF = DAG.getMachineFunction();
22839	StringRef Kind =
22840	MF.getFunction().getFnAttribute(Kind: "interrupt").getValueAsString();
22841
22842	if (Kind == "supervisor")
22843	RetOpc = RISCVISD::SRET_GLUE;
22844	else if (Kind == "qci-nest" \|\| Kind == "qci-nonest") {
22845	assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
22846	"Need Xqciint for qci-(no)nest");
22847	RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
22848	} else
22849	RetOpc = RISCVISD::MRET_GLUE;
22850	}
22851
22852	return DAG.getNode(Opcode: RetOpc, DL, VT: MVT::Other, Ops: RetOps);
22853	}
22854
22855	void RISCVTargetLowering::validateCCReservedRegs(
22856	const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
22857	MachineFunction &MF) const {
22858	const Function &F = MF.getFunction();
22859	const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
22860
22861	if (llvm::any_of(Range: Regs, P: [&STI](auto Reg) {
22862	return STI.isRegisterReservedByUser(i: Reg.first);
22863	}))
22864	F.getContext().diagnose(DI: DiagnosticInfoUnsupported {
22865	F, "Argument register required, but has been reserved."});
22866	}
22867
22868	// Check if the result of the node is only used as a return value, as
22869	// otherwise we can't perform a tail-call.
22870	bool RISCVTargetLowering::isUsedByReturnOnly(SDNode N, SDValue &Chain) const* {
22871	if (N->getNumValues() != `1`)
22872	return false;
22873	if (!N->hasNUsesOfValue(NUses: `1`, Value: `0`))
22874	return false;
22875
22876	SDNode Copy = N->user_begin();
22877
22878	if (Copy->getOpcode() == ISD::BITCAST) {
22879	return isUsedByReturnOnly(N: Copy, Chain);
22880	}
22881
22882	// TODO: Handle additional opcodes in order to support tail-calling libcalls
22883	// with soft float ABIs.
22884	if (Copy->getOpcode() != ISD::CopyToReg) {
22885	return false;
22886	}
22887
22888	// If the ISD::CopyToReg has a glue operand, we conservatively assume it
22889	// isn't safe to perform a tail call.
22890	if (Copy->getOperand(Num: Copy->getNumOperands() - `1`).getValueType() == MVT::Glue)
22891	return false;
22892
22893	// The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
22894	bool HasRet = false;
22895	for (SDNode *Node : Copy->users()) {
22896	if (Node->getOpcode() != RISCVISD::RET_GLUE)
22897	return false;
22898	HasRet = true;
22899	}
22900	if (!HasRet)
22901	return false;
22902
22903	Chain = Copy->getOperand(Num: `0`);
22904	return true;
22905	}
22906
22907	bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
22908	return CI->isTailCall();
22909	}
22910
22911	/// getConstraintType - Given a constraint letter, return the type of
22912	/// constraint it is for this target.
22913	RISCVTargetLowering::ConstraintType
22914	RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
22915	if (Constraint.size() == `1`) {
22916	switch (Constraint [`0`]) {
22917	default:
22918	break;
22919	case `'f'`:
22920	case `'R'`:
22921	return C_RegisterClass;
22922	case `'I'`:
22923	case `'J'`:
22924	case `'K'`:
22925	return C_Immediate;
22926	case `'A'`:
22927	return C_Memory;
22928	case `'s'`:
22929	case `'S'`: // A symbolic address
22930	return C_Other;
22931	}
22932	} else {
22933	if (Constraint == "vr" \|\| Constraint == "vd" \|\| Constraint == "vm")
22934	return C_RegisterClass;
22935	if (Constraint == "cr" \|\| Constraint == "cR" \|\| Constraint == "cf")
22936	return C_RegisterClass;
22937	}
22938	return TargetLowering::getConstraintType(Constraint);
22939	}
22940
22941	std::pair<unsigned, const TargetRegisterClass *>
22942	RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
22943	StringRef Constraint,
22944	MVT VT) const {
22945	// First, see if this is a constraint that directly corresponds to a RISC-V
22946	// register class.
22947	if (Constraint.size() == `1`) {
22948	switch (Constraint [`0`]) {
22949	case `'r'`:
22950	// TODO: Support fixed vectors up to XLen for P extension?
22951	if (VT.isVector())
22952	break;
22953	if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
22954	return std::make_pair(x: `0U`, y: &RISCV::GPRF16NoX0RegClass);
22955	if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
22956	return std::make_pair(x: `0U`, y: &RISCV::GPRF32NoX0RegClass);
22957	if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
22958	return std::make_pair(x: `0U`, y: &RISCV::GPRPairNoX0RegClass);
22959	return std::make_pair(x: `0U`, y: &RISCV::GPRNoX0RegClass);
22960	case `'f'`:
22961	if (VT == MVT::f16) {
22962	if (Subtarget.hasStdExtZfhmin())
22963	return std::make_pair(x: `0U`, y: &RISCV::FPR16RegClass);
22964	if (Subtarget.hasStdExtZhinxmin())
22965	return std::make_pair(x: `0U`, y: &RISCV::GPRF16NoX0RegClass);
22966	} else if (VT == MVT::f32) {
22967	if (Subtarget.hasStdExtF())
22968	return std::make_pair(x: `0U`, y: &RISCV::FPR32RegClass);
22969	if (Subtarget.hasStdExtZfinx())
22970	return std::make_pair(x: `0U`, y: &RISCV::GPRF32NoX0RegClass);
22971	} else if (VT == MVT::f64) {
22972	if (Subtarget.hasStdExtD())
22973	return std::make_pair(x: `0U`, y: &RISCV::FPR64RegClass);
22974	if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
22975	return std::make_pair(x: `0U`, y: &RISCV::GPRPairNoX0RegClass);
22976	if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
22977	return std::make_pair(x: `0U`, y: &RISCV::GPRNoX0RegClass);
22978	}
22979	break;
22980	case `'R'`:
22981	if (((VT == MVT::i64 \|\| VT == MVT::f64) && !Subtarget.is64Bit()) \|\|
22982	(VT == MVT::i128 && Subtarget.is64Bit()))
22983	return std::make_pair(x: `0U`, y: &RISCV::GPRPairNoX0RegClass);
22984	break;
22985	default:
22986	break;
22987	}
22988	} else if (Constraint == "vr") {
22989	for (const auto *RC :
22990	{&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
22991	&RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
22992	&RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
22993	&RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
22994	&RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
22995	&RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
22996	&RISCV::VRN2M4RegClass}) {
22997	if (TRI->isTypeLegalForClass(RC: *RC, T: VT.SimpleTy))
22998	return std::make_pair(x: `0U`, y&: RC);
22999	}
23000	} else if (Constraint == "vd") {
23001	for (const auto *RC :
23002	{&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23003	&RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23004	&RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23005	&RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23006	&RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23007	&RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23008	&RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23009	&RISCV::VRN2M4NoV0RegClass}) {
23010	if (TRI->isTypeLegalForClass(RC: *RC, T: VT.SimpleTy))
23011	return std::make_pair(x: `0U`, y&: RC);
23012	}
23013	} else if (Constraint == "vm") {
23014	if (TRI->isTypeLegalForClass(RC: RISCV::VMV0RegClass, T: VT.SimpleTy))
23015	return std::make_pair(x: `0U`, y: &RISCV::VMV0RegClass);
23016	} else if (Constraint == "cr") {
23017	if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23018	return std::make_pair(x: `0U`, y: &RISCV::GPRF16CRegClass);
23019	if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23020	return std::make_pair(x: `0U`, y: &RISCV::GPRF32CRegClass);
23021	if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23022	return std::make_pair(x: `0U`, y: &RISCV::GPRPairCRegClass);
23023	if (!VT.isVector())
23024	return std::make_pair(x: `0U`, y: &RISCV::GPRCRegClass);
23025	} else if (Constraint == "cR") {
23026	if (((VT == MVT::i64 \|\| VT == MVT::f64) && !Subtarget.is64Bit()) \|\|
23027	(VT == MVT::i128 && Subtarget.is64Bit()))
23028	return std::make_pair(x: `0U`, y: &RISCV::GPRPairCRegClass);
23029	} else if (Constraint == "cf") {
23030	if (VT == MVT::f16) {
23031	if (Subtarget.hasStdExtZfhmin())
23032	return std::make_pair(x: `0U`, y: &RISCV::FPR16CRegClass);
23033	if (Subtarget.hasStdExtZhinxmin())
23034	return std::make_pair(x: `0U`, y: &RISCV::GPRF16CRegClass);
23035	} else if (VT == MVT::f32) {
23036	if (Subtarget.hasStdExtF())
23037	return std::make_pair(x: `0U`, y: &RISCV::FPR32CRegClass);
23038	if (Subtarget.hasStdExtZfinx())
23039	return std::make_pair(x: `0U`, y: &RISCV::GPRF32CRegClass);
23040	} else if (VT == MVT::f64) {
23041	if (Subtarget.hasStdExtD())
23042	return std::make_pair(x: `0U`, y: &RISCV::FPR64CRegClass);
23043	if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23044	return std::make_pair(x: `0U`, y: &RISCV::GPRPairCRegClass);
23045	if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23046	return std::make_pair(x: `0U`, y: &RISCV::GPRCRegClass);
23047	}
23048	}
23049
23050	// Clang will correctly decode the usage of register name aliases into their
23051	// official names. However, other frontends like `rustc` do not. This allows
23052	// users of these frontends to use the ABI names for registers in LLVM-style
23053	// register constraints.
23054	unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23055	.Case(S: "{zero}", Value: RISCV::X0)
23056	.Case(S: "{ra}", Value: RISCV::X1)
23057	.Case(S: "{sp}", Value: RISCV::X2)
23058	.Case(S: "{gp}", Value: RISCV::X3)
23059	.Case(S: "{tp}", Value: RISCV::X4)
23060	.Case(S: "{t0}", Value: RISCV::X5)
23061	.Case(S: "{t1}", Value: RISCV::X6)
23062	.Case(S: "{t2}", Value: RISCV::X7)
23063	.Cases(S0: "{s0}", S1: "{fp}", Value: RISCV::X8)
23064	.Case(S: "{s1}", Value: RISCV::X9)
23065	.Case(S: "{a0}", Value: RISCV::X10)
23066	.Case(S: "{a1}", Value: RISCV::X11)
23067	.Case(S: "{a2}", Value: RISCV::X12)
23068	.Case(S: "{a3}", Value: RISCV::X13)
23069	.Case(S: "{a4}", Value: RISCV::X14)
23070	.Case(S: "{a5}", Value: RISCV::X15)
23071	.Case(S: "{a6}", Value: RISCV::X16)
23072	.Case(S: "{a7}", Value: RISCV::X17)
23073	.Case(S: "{s2}", Value: RISCV::X18)
23074	.Case(S: "{s3}", Value: RISCV::X19)
23075	.Case(S: "{s4}", Value: RISCV::X20)
23076	.Case(S: "{s5}", Value: RISCV::X21)
23077	.Case(S: "{s6}", Value: RISCV::X22)
23078	.Case(S: "{s7}", Value: RISCV::X23)
23079	.Case(S: "{s8}", Value: RISCV::X24)
23080	.Case(S: "{s9}", Value: RISCV::X25)
23081	.Case(S: "{s10}", Value: RISCV::X26)
23082	.Case(S: "{s11}", Value: RISCV::X27)
23083	.Case(S: "{t3}", Value: RISCV::X28)
23084	.Case(S: "{t4}", Value: RISCV::X29)
23085	.Case(S: "{t5}", Value: RISCV::X30)
23086	.Case(S: "{t6}", Value: RISCV::X31)
23087	.Default(Value: RISCV::NoRegister);
23088	if (XRegFromAlias != RISCV::NoRegister)
23089	return std::make_pair(x&: XRegFromAlias, y: &RISCV::GPRRegClass);
23090
23091	// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23092	// TableGen record rather than the AsmName to choose registers for InlineAsm
23093	// constraints, plus we want to match those names to the widest floating point
23094	// register type available, manually select floating point registers here.
23095	//
23096	// The second case is the ABI name of the register, so that frontends can also
23097	// use the ABI names in register constraint lists.
23098	if (Subtarget.hasStdExtF()) {
23099	unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23100	.Cases(S0: "{f0}", S1: "{ft0}", Value: RISCV::F0_F)
23101	.Cases(S0: "{f1}", S1: "{ft1}", Value: RISCV::F1_F)
23102	.Cases(S0: "{f2}", S1: "{ft2}", Value: RISCV::F2_F)
23103	.Cases(S0: "{f3}", S1: "{ft3}", Value: RISCV::F3_F)
23104	.Cases(S0: "{f4}", S1: "{ft4}", Value: RISCV::F4_F)
23105	.Cases(S0: "{f5}", S1: "{ft5}", Value: RISCV::F5_F)
23106	.Cases(S0: "{f6}", S1: "{ft6}", Value: RISCV::F6_F)
23107	.Cases(S0: "{f7}", S1: "{ft7}", Value: RISCV::F7_F)
23108	.Cases(S0: "{f8}", S1: "{fs0}", Value: RISCV::F8_F)
23109	.Cases(S0: "{f9}", S1: "{fs1}", Value: RISCV::F9_F)
23110	.Cases(S0: "{f10}", S1: "{fa0}", Value: RISCV::F10_F)
23111	.Cases(S0: "{f11}", S1: "{fa1}", Value: RISCV::F11_F)
23112	.Cases(S0: "{f12}", S1: "{fa2}", Value: RISCV::F12_F)
23113	.Cases(S0: "{f13}", S1: "{fa3}", Value: RISCV::F13_F)
23114	.Cases(S0: "{f14}", S1: "{fa4}", Value: RISCV::F14_F)
23115	.Cases(S0: "{f15}", S1: "{fa5}", Value: RISCV::F15_F)
23116	.Cases(S0: "{f16}", S1: "{fa6}", Value: RISCV::F16_F)
23117	.Cases(S0: "{f17}", S1: "{fa7}", Value: RISCV::F17_F)
23118	.Cases(S0: "{f18}", S1: "{fs2}", Value: RISCV::F18_F)
23119	.Cases(S0: "{f19}", S1: "{fs3}", Value: RISCV::F19_F)
23120	.Cases(S0: "{f20}", S1: "{fs4}", Value: RISCV::F20_F)
23121	.Cases(S0: "{f21}", S1: "{fs5}", Value: RISCV::F21_F)
23122	.Cases(S0: "{f22}", S1: "{fs6}", Value: RISCV::F22_F)
23123	.Cases(S0: "{f23}", S1: "{fs7}", Value: RISCV::F23_F)
23124	.Cases(S0: "{f24}", S1: "{fs8}", Value: RISCV::F24_F)
23125	.Cases(S0: "{f25}", S1: "{fs9}", Value: RISCV::F25_F)
23126	.Cases(S0: "{f26}", S1: "{fs10}", Value: RISCV::F26_F)
23127	.Cases(S0: "{f27}", S1: "{fs11}", Value: RISCV::F27_F)
23128	.Cases(S0: "{f28}", S1: "{ft8}", Value: RISCV::F28_F)
23129	.Cases(S0: "{f29}", S1: "{ft9}", Value: RISCV::F29_F)
23130	.Cases(S0: "{f30}", S1: "{ft10}", Value: RISCV::F30_F)
23131	.Cases(S0: "{f31}", S1: "{ft11}", Value: RISCV::F31_F)
23132	.Default(Value: RISCV::NoRegister);
23133	if (FReg != RISCV::NoRegister) {
23134	assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23135	if (Subtarget.hasStdExtD() && (VT == MVT::f64 \|\| VT == MVT::Other)) {
23136	unsigned RegNo = FReg - RISCV::F0_F;
23137	unsigned DReg = RISCV::F0_D + RegNo;
23138	return std::make_pair(x&: DReg, y: &RISCV::FPR64RegClass);
23139	}
23140	if (VT == MVT::f32 \|\| VT == MVT::Other)
23141	return std::make_pair(x&: FReg, y: &RISCV::FPR32RegClass);
23142	if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23143	unsigned RegNo = FReg - RISCV::F0_F;
23144	unsigned HReg = RISCV::F0_H + RegNo;
23145	return std::make_pair(x&: HReg, y: &RISCV::FPR16RegClass);
23146	}
23147	}
23148	}
23149
23150	if (Subtarget.hasVInstructions()) {
23151	Register VReg = StringSwitch<Register>(Constraint.lower())
23152	.Case(S: "{v0}", Value: RISCV::V0)
23153	.Case(S: "{v1}", Value: RISCV::V1)
23154	.Case(S: "{v2}", Value: RISCV::V2)
23155	.Case(S: "{v3}", Value: RISCV::V3)
23156	.Case(S: "{v4}", Value: RISCV::V4)
23157	.Case(S: "{v5}", Value: RISCV::V5)
23158	.Case(S: "{v6}", Value: RISCV::V6)
23159	.Case(S: "{v7}", Value: RISCV::V7)
23160	.Case(S: "{v8}", Value: RISCV::V8)
23161	.Case(S: "{v9}", Value: RISCV::V9)
23162	.Case(S: "{v10}", Value: RISCV::V10)
23163	.Case(S: "{v11}", Value: RISCV::V11)
23164	.Case(S: "{v12}", Value: RISCV::V12)
23165	.Case(S: "{v13}", Value: RISCV::V13)
23166	.Case(S: "{v14}", Value: RISCV::V14)
23167	.Case(S: "{v15}", Value: RISCV::V15)
23168	.Case(S: "{v16}", Value: RISCV::V16)
23169	.Case(S: "{v17}", Value: RISCV::V17)
23170	.Case(S: "{v18}", Value: RISCV::V18)
23171	.Case(S: "{v19}", Value: RISCV::V19)
23172	.Case(S: "{v20}", Value: RISCV::V20)
23173	.Case(S: "{v21}", Value: RISCV::V21)
23174	.Case(S: "{v22}", Value: RISCV::V22)
23175	.Case(S: "{v23}", Value: RISCV::V23)
23176	.Case(S: "{v24}", Value: RISCV::V24)
23177	.Case(S: "{v25}", Value: RISCV::V25)
23178	.Case(S: "{v26}", Value: RISCV::V26)
23179	.Case(S: "{v27}", Value: RISCV::V27)
23180	.Case(S: "{v28}", Value: RISCV::V28)
23181	.Case(S: "{v29}", Value: RISCV::V29)
23182	.Case(S: "{v30}", Value: RISCV::V30)
23183	.Case(S: "{v31}", Value: RISCV::V31)
23184	.Default(Value: RISCV::NoRegister);
23185	if (VReg != RISCV::NoRegister) {
23186	if (TRI->isTypeLegalForClass(RC: RISCV::VMRegClass, T: VT.SimpleTy))
23187	return std::make_pair(x&: VReg, y: &RISCV::VMRegClass);
23188	if (TRI->isTypeLegalForClass(RC: RISCV::VRRegClass, T: VT.SimpleTy))
23189	return std::make_pair(x&: VReg, y: &RISCV::VRRegClass);
23190	for (const auto *RC :
23191	{&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23192	if (TRI->isTypeLegalForClass(RC: *RC, T: VT.SimpleTy)) {
23193	VReg = TRI->getMatchingSuperReg(Reg: VReg, SubIdx: RISCV::sub_vrm1_0, RC);
23194	return std::make_pair(x&: VReg, y&: RC);
23195	}
23196	}
23197	}
23198	}
23199
23200	std::pair<Register, const TargetRegisterClass *> Res =
23201	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
23202
23203	// If we picked one of the Zfinx register classes, remap it to the GPR class.
23204	// FIXME: When Zfinx is supported in CodeGen this will need to take the
23205	// Subtarget into account.
23206	if (Res.second == &RISCV::GPRF16RegClass \|\|
23207	Res.second == &RISCV::GPRF32RegClass \|\|
23208	Res.second == &RISCV::GPRPairRegClass)
23209	return std::make_pair(x&: Res.first, y: &RISCV::GPRRegClass);
23210
23211	return Res;
23212	}
23213
23214	InlineAsm::ConstraintCode
23215	RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
23216	// Currently only support length 1 constraints.
23217	if (ConstraintCode.size() == `1`) {
23218	switch (ConstraintCode [`0`]) {
23219	case `'A'`:
23220	return InlineAsm::ConstraintCode::A;
23221	default:
23222	break;
23223	}
23224	}
23225
23226	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23227	}
23228
23229	void RISCVTargetLowering::LowerAsmOperandForConstraint(
23230	SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23231	SelectionDAG &DAG) const {
23232	// Currently only support length 1 constraints.
23233	if (Constraint.size() == `1`) {
23234	switch (Constraint [`0`]) {
23235	case `'I'`:
23236	// Validate & create a 12-bit signed immediate operand.
23237	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
23238	uint64_t CVal = C->getSExtValue();
23239	if (isInt<`12`>(x: CVal))
23240	Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc (Op),
23241	VT: Subtarget.getXLenVT()));
23242	}
23243	return;
23244	case `'J'`:
23245	// Validate & create an integer zero operand.
23246	if (isNullConstant(V: Op))
23247	Ops.push_back(
23248	x: DAG.getTargetConstant(Val: `0`, DL: SDLoc (Op), VT: Subtarget.getXLenVT()));
23249	return;
23250	case `'K'`:
23251	// Validate & create a 5-bit unsigned immediate operand.
23252	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
23253	uint64_t CVal = C->getZExtValue();
23254	if (isUInt<`5`>(x: CVal))
23255	Ops.push_back(
23256	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getXLenVT()));
23257	}
23258	return;
23259	case `'S'`:
23260	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint: "s", Ops, DAG);
23261	return;
23262	default:
23263	break;
23264	}
23265	}
23266	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
23267	}
23268
23269	Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
23270	Instruction *Inst,
23271	AtomicOrdering Ord) const {
23272	if (Subtarget.hasStdExtZtso()) {
23273	if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23274	return Builder.CreateFence(Ordering: Ord);
23275	return nullptr;
23276	}
23277
23278	if (isa<LoadInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23279	return Builder.CreateFence(Ordering: Ord);
23280	if (isa<StoreInst>(Val: Inst) && isReleaseOrStronger(AO: Ord))
23281	return Builder.CreateFence(Ordering: AtomicOrdering::Release);
23282	return nullptr;
23283	}
23284
23285	Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
23286	Instruction *Inst,
23287	AtomicOrdering Ord) const {
23288	if (Subtarget.hasStdExtZtso()) {
23289	if (isa<StoreInst>(Val: Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
23290	return Builder.CreateFence(Ordering: Ord);
23291	return nullptr;
23292	}
23293
23294	if (isa<LoadInst>(Val: Inst) && isAcquireOrStronger(AO: Ord))
23295	return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
23296	if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Val: Inst) &&
23297	Ord == AtomicOrdering::SequentiallyConsistent)
23298	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
23299	return nullptr;
23300	}
23301
23302	TargetLowering::AtomicExpansionKind
23303	RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
23304	// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
23305	// point operations can't be used in an lr/sc sequence without breaking the
23306	// forward-progress guarantee.
23307	if (AI->isFloatingPointOperation() \|\|
23308	AI->getOperation() == AtomicRMWInst::UIncWrap \|\|
23309	AI->getOperation() == AtomicRMWInst::UDecWrap \|\|
23310	AI->getOperation() == AtomicRMWInst::USubCond \|\|
23311	AI->getOperation() == AtomicRMWInst::USubSat)
23312	return AtomicExpansionKind::CmpXChg;
23313
23314	// Don't expand forced atomics, we want to have __sync libcalls instead.
23315	if (Subtarget.hasForcedAtomics())
23316	return AtomicExpansionKind::None;
23317
23318	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
23319	if (AI->getOperation() == AtomicRMWInst::Nand) {
23320	if (Subtarget.hasStdExtZacas() &&
23321	(Size >= `32` \|\| Subtarget.hasStdExtZabha()))
23322	return AtomicExpansionKind::CmpXChg;
23323	if (Size < `32`)
23324	return AtomicExpansionKind::MaskedIntrinsic;
23325	}
23326
23327	if (Size < `32` && !Subtarget.hasStdExtZabha())
23328	return AtomicExpansionKind::MaskedIntrinsic;
23329
23330	return AtomicExpansionKind::None;
23331	}
23332
23333	static Intrinsic::ID
23334	getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
23335	if (XLen == `32`) {
23336	switch (BinOp) {
23337	default:
23338	llvm_unreachable("Unexpected AtomicRMW BinOp");
23339	case AtomicRMWInst::Xchg:
23340	return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
23341	case AtomicRMWInst::Add:
23342	return Intrinsic::riscv_masked_atomicrmw_add_i32;
23343	case AtomicRMWInst::Sub:
23344	return Intrinsic::riscv_masked_atomicrmw_sub_i32;
23345	case AtomicRMWInst::Nand:
23346	return Intrinsic::riscv_masked_atomicrmw_nand_i32;
23347	case AtomicRMWInst::Max:
23348	return Intrinsic::riscv_masked_atomicrmw_max_i32;
23349	case AtomicRMWInst::Min:
23350	return Intrinsic::riscv_masked_atomicrmw_min_i32;
23351	case AtomicRMWInst::UMax:
23352	return Intrinsic::riscv_masked_atomicrmw_umax_i32;
23353	case AtomicRMWInst::UMin:
23354	return Intrinsic::riscv_masked_atomicrmw_umin_i32;
23355	}
23356	}
23357
23358	if (XLen == `64`) {
23359	switch (BinOp) {
23360	default:
23361	llvm_unreachable("Unexpected AtomicRMW BinOp");
23362	case AtomicRMWInst::Xchg:
23363	return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
23364	case AtomicRMWInst::Add:
23365	return Intrinsic::riscv_masked_atomicrmw_add_i64;
23366	case AtomicRMWInst::Sub:
23367	return Intrinsic::riscv_masked_atomicrmw_sub_i64;
23368	case AtomicRMWInst::Nand:
23369	return Intrinsic::riscv_masked_atomicrmw_nand_i64;
23370	case AtomicRMWInst::Max:
23371	return Intrinsic::riscv_masked_atomicrmw_max_i64;
23372	case AtomicRMWInst::Min:
23373	return Intrinsic::riscv_masked_atomicrmw_min_i64;
23374	case AtomicRMWInst::UMax:
23375	return Intrinsic::riscv_masked_atomicrmw_umax_i64;
23376	case AtomicRMWInst::UMin:
23377	return Intrinsic::riscv_masked_atomicrmw_umin_i64;
23378	}
23379	}
23380
23381	llvm_unreachable("Unexpected XLen\n");
23382	}
23383
23384	Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
23385	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
23386	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
23387	// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
23388	// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
23389	// mask, as this produces better code than the LR/SC loop emitted by
23390	// int_riscv_masked_atomicrmw_xchg.
23391	if (AI->getOperation() == AtomicRMWInst::Xchg &&
23392	isa<ConstantInt>(Val: AI->getValOperand())) {
23393	ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
23394	if (CVal->isZero())
23395	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
23396	Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
23397	Align: AI->getAlign(), Ordering: Ord);
23398	if (CVal->isMinusOne())
23399	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
23400	Align: AI->getAlign(), Ordering: Ord);
23401	}
23402
23403	unsigned XLen = Subtarget.getXLen();
23404	Value *Ordering =
23405	Builder.getIntN(N: XLen, C: static_cast<uint64_t>(AI->getOrdering()));
23406	Type *Tys[] = {AlignedAddr->getType()};
23407	Function *LrwOpScwLoop = Intrinsic::getOrInsertDeclaration(
23408	M: AI->getModule(),
23409	id: getIntrinsicForMaskedAtomicRMWBinOp(XLen, BinOp: AI->getOperation()), Tys);
23410
23411	if (XLen == `64`) {
23412	Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
23413	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
23414	ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
23415	}
23416
23417	Value *Result;
23418
23419	// Must pass the shift amount needed to sign extend the loaded value prior
23420	// to performing a signed comparison for min/max. ShiftAmt is the number of
23421	// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
23422	// is the number of bits to left+right shift the value in order to
23423	// sign-extend.
23424	if (AI->getOperation() == AtomicRMWInst::Min \|\|
23425	AI->getOperation() == AtomicRMWInst::Max) {
23426	const DataLayout &DL = AI->getDataLayout();
23427	unsigned ValWidth =
23428	DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
23429	Value *SextShamt =
23430	Builder.CreateSub(LHS: Builder.getIntN(N: XLen, C: XLen - ValWidth), RHS: ShiftAmt);
23431	Result = Builder.CreateCall(Callee: LrwOpScwLoop,
23432	Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
23433	} else {
23434	Result =
23435	Builder.CreateCall(Callee: LrwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
23436	}
23437
23438	if (XLen == `64`)
23439	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
23440	return Result;
23441	}
23442
23443	TargetLowering::AtomicExpansionKind
23444	RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
23445	AtomicCmpXchgInst CI) const* {
23446	// Don't expand forced atomics, we want to have __sync libcalls instead.
23447	if (Subtarget.hasForcedAtomics())
23448	return AtomicExpansionKind::None;
23449
23450	unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
23451	if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
23452	(Size == `8` \|\| Size == `16`))
23453	return AtomicExpansionKind::MaskedIntrinsic;
23454	return AtomicExpansionKind::None;
23455	}
23456
23457	Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
23458	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
23459	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
23460	unsigned XLen = Subtarget.getXLen();
23461	Value Ordering = Builder.getIntN(N: XLen, C: static_cast*<uint64_t>(Ord));
23462	Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
23463	if (XLen == `64`) {
23464	CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
23465	NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
23466	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
23467	CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
23468	}
23469	Type *Tys[] = {AlignedAddr->getType()};
23470	Value *Result = Builder.CreateIntrinsic(
23471	ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
23472	if (XLen == `64`)
23473	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
23474	return Result;
23475	}
23476
23477	bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
23478	EVT DataVT) const {
23479	// We have indexed loads for all supported EEW types. Indices are always
23480	// zero extended.
23481	return Extend.getOpcode() == ISD::ZERO_EXTEND &&
23482	isTypeLegal(VT: Extend.getValueType()) &&
23483	isTypeLegal(VT: Extend.getOperand(i: `0`).getValueType()) &&
23484	Extend.getOperand(i: `0`).getValueType().getVectorElementType() != MVT::i1;
23485	}
23486
23487	bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
23488	EVT VT) const {
23489	if (!isOperationLegalOrCustom(Op, VT) \|\| !FPVT.isSimple())
23490	return false;
23491
23492	switch (FPVT.getSimpleVT().SimpleTy) {
23493	case MVT::f16:
23494	return Subtarget.hasStdExtZfhmin();
23495	case MVT::f32:
23496	return Subtarget.hasStdExtF();
23497	case MVT::f64:
23498	return Subtarget.hasStdExtD();
23499	default:
23500	return false;
23501	}
23502	}
23503
23504	unsigned RISCVTargetLowering::getJumpTableEncoding() const {
23505	// If we are using the small code model, we can reduce size of jump table
23506	// entry to 4 bytes.
23507	if (Subtarget.is64Bit() && !isPositionIndependent() &&
23508	getTargetMachine().getCodeModel() == CodeModel::Small) {
23509	return MachineJumpTableInfo::EK_Custom32;
23510	}
23511	return TargetLowering::getJumpTableEncoding();
23512	}
23513
23514	const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
23515	const MachineJumpTableInfo MJTI, const* MachineBasicBlock *MBB,
23516	unsigned uid, MCContext &Ctx) const {
23517	assert(Subtarget.is64Bit() && !isPositionIndependent() &&
23518	getTargetMachine().getCodeModel() == CodeModel::Small);
23519	return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
23520	}
23521
23522	bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
23523	// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
23524	// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
23525	// a power of two as well.
23526	// FIXME: This doesn't work for zve32, but that's already broken
23527	// elsewhere for the same reason.
23528	assert(Subtarget.getRealMinVLen() >= `64` && "zve32* unsupported");
23529	static_assert(RISCV::RVVBitsPerBlock == `64`,
23530	"RVVBitsPerBlock changed, audit needed");
23531	return true;
23532	}
23533
23534	bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
23535	SDValue &Offset,
23536	ISD::MemIndexedMode &AM,
23537	SelectionDAG &DAG) const {
23538	// Target does not support indexed loads.
23539	if (!Subtarget.hasVendorXTHeadMemIdx())
23540	return false;
23541
23542	if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
23543	return false;
23544
23545	Base = Op->getOperand(Num: `0`);
23546	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: `1`))) {
23547	int64_t RHSC = RHS->getSExtValue();
23548	if (Op->getOpcode() == ISD::SUB)
23549	RHSC = -(uint64_t)RHSC;
23550
23551	// The constants that can be encoded in the THeadMemIdx instructions
23552	// are of the form (sign_extend(imm5) << imm2).
23553	bool isLegalIndexedOffset = false;
23554	for (unsigned i = `0`; i < `4`; i++)
23555	if (isInt<`5`>(x: RHSC >> i) && ((RHSC % (`1LL` << i)) == `0`)) {
23556	isLegalIndexedOffset = true;
23557	break;
23558	}
23559
23560	if (!isLegalIndexedOffset)
23561	return false;
23562
23563	Offset = Op->getOperand(Num: `1`);
23564	return true;
23565	}
23566
23567	return false;
23568	}
23569
23570	bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
23571	SDValue &Offset,
23572	ISD::MemIndexedMode &AM,
23573	SelectionDAG &DAG) const {
23574	EVT VT;
23575	SDValue Ptr;
23576	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
23577	VT = LD->getMemoryVT();
23578	Ptr = LD->getBasePtr();
23579	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
23580	VT = ST->getMemoryVT();
23581	Ptr = ST->getBasePtr();
23582	} else
23583	return false;
23584
23585	if (!getIndexedAddressParts(Op: Ptr.getNode(), Base, Offset, AM, DAG))
23586	return false;
23587
23588	AM = ISD::PRE_INC;
23589	return true;
23590	}
23591
23592	bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode N, SDNode Op,
23593	SDValue &Base,
23594	SDValue &Offset,
23595	ISD::MemIndexedMode &AM,
23596	SelectionDAG &DAG) const {
23597	if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
23598	if (Op->getOpcode() != ISD::ADD)
23599	return false;
23600
23601	if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N))
23602	Base = LS->getBasePtr();
23603	else
23604	return false;
23605
23606	if (Base == Op->getOperand(Num: `0`))
23607	Offset = Op->getOperand(Num: `1`);
23608	else if (Base == Op->getOperand(Num: `1`))
23609	Offset = Op->getOperand(Num: `0`);
23610	else
23611	return false;
23612
23613	AM = ISD::POST_INC;
23614	return true;
23615	}
23616
23617	EVT VT;
23618	SDValue Ptr;
23619	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
23620	VT = LD->getMemoryVT();
23621	Ptr = LD->getBasePtr();
23622	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
23623	VT = ST->getMemoryVT();
23624	Ptr = ST->getBasePtr();
23625	} else
23626	return false;
23627
23628	if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
23629	return false;
23630	// Post-indexing updates the base, so it's not a valid transform
23631	// if that's not the same as the load's pointer.
23632	if (Ptr != Base)
23633	return false;
23634
23635	AM = ISD::POST_INC;
23636	return true;
23637	}
23638
23639	bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
23640	EVT VT) const {
23641	EVT SVT = VT.getScalarType();
23642
23643	if (!SVT.isSimple())
23644	return false;
23645
23646	switch (SVT.getSimpleVT().SimpleTy) {
23647	case MVT::f16:
23648	return VT.isVector() ? Subtarget.hasVInstructionsF16()
23649	: Subtarget.hasStdExtZfhOrZhinx();
23650	case MVT::f32:
23651	return Subtarget.hasStdExtFOrZfinx();
23652	case MVT::f64:
23653	return Subtarget.hasStdExtDOrZdinx();
23654	default:
23655	break;
23656	}
23657
23658	return false;
23659	}
23660
23661	ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
23662	// Zacas will use amocas.w which does not require extension.
23663	return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
23664	}
23665
23666	Register RISCVTargetLowering::getExceptionPointerRegister(
23667	const Constant PersonalityFn) const* {
23668	return RISCV::X10;
23669	}
23670
23671	Register RISCVTargetLowering::getExceptionSelectorRegister(
23672	const Constant PersonalityFn) const* {
23673	return RISCV::X11;
23674	}
23675
23676	bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
23677	// Return false to suppress the unnecessary extensions if the LibCall
23678	// arguments or return value is a float narrower than XLEN on a soft FP ABI.
23679	if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
23680	Type.getSizeInBits() < Subtarget.getXLen()))
23681	return false;
23682
23683	return true;
23684	}
23685
23686	bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(Type *Ty,
23687	bool IsSigned) const {
23688	if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: `32`))
23689	return true;
23690
23691	return IsSigned;
23692	}
23693
23694	bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
23695	SDValue C) const {
23696	// Check integral scalar types.
23697	if (!VT.isScalarInteger())
23698	return false;
23699
23700	// Omit the optimization if the sub target has the M extension and the data
23701	// size exceeds XLen.
23702	const bool HasZmmul = Subtarget.hasStdExtZmmul();
23703	if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
23704	return false;
23705
23706	auto *ConstNode = cast<ConstantSDNode>(Val&: C);
23707	const APInt &Imm = ConstNode->getAPIntValue();
23708
23709	// Break the MUL to a SLLI and an ADD/SUB.
23710	if ((Imm + `1`).isPowerOf2() \|\| (Imm - `1`).isPowerOf2() \|\|
23711	(`1` - Imm).isPowerOf2() \|\| (-`1` - Imm).isPowerOf2())
23712	return true;
23713
23714	// Optimize the MUL to (SHADD x, (SLLI x, bits)) if Imm is not simm12.*
23715	if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(N: `12`) &&
23716	((Imm - `2`).isPowerOf2() \|\| (Imm - `4`).isPowerOf2() \|\|
23717	(Imm - `8`).isPowerOf2()))
23718	return true;
23719
23720	// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
23721	// a pair of LUI/ADDI.
23722	if (!Imm.isSignedIntN(N: `12`) && Imm.countr_zero() < `12` &&
23723	ConstNode->hasOneUse()) {
23724	APInt ImmS = Imm.ashr(ShiftAmt: Imm.countr_zero());
23725	if ((ImmS + `1`).isPowerOf2() \|\| (ImmS - `1`).isPowerOf2() \|\|
23726	(`1` - ImmS).isPowerOf2())
23727	return true;
23728	}
23729
23730	return false;
23731	}
23732
23733	bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
23734	SDValue ConstNode) const {
23735	// Let the DAGCombiner decide for vectors.
23736	EVT VT = AddNode.getValueType();
23737	if (VT.isVector())
23738	return true;
23739
23740	// Let the DAGCombiner decide for larger types.
23741	if (VT.getScalarSizeInBits() > Subtarget.getXLen())
23742	return true;
23743
23744	// It is worse if c1 is simm12 while c1c2 is not.*
23745	ConstantSDNode *C1Node = cast<ConstantSDNode>(Val: AddNode.getOperand(i: `1`));
23746	ConstantSDNode *C2Node = cast<ConstantSDNode>(Val&: ConstNode);
23747	const APInt &C1 = C1Node->getAPIntValue();
23748	const APInt &C2 = C2Node->getAPIntValue();
23749	if (C1.isSignedIntN(N: `12`) && !(C1 * C2).isSignedIntN(N: `12`))
23750	return false;
23751
23752	// Default to true and let the DAGCombiner decide.
23753	return true;
23754	}
23755
23756	bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
23757	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
23758	unsigned Fast) const* {
23759	if (!VT.isVector()) {
23760	if (Fast)
23761	*Fast = Subtarget.enableUnalignedScalarMem();
23762	return Subtarget.enableUnalignedScalarMem();
23763	}
23764
23765	// All vector implementations must support element alignment
23766	EVT ElemVT = VT.getVectorElementType();
23767	if (Alignment >= ElemVT.getStoreSize()) {
23768	if (Fast)
23769	*Fast = `1`;
23770	return true;
23771	}
23772
23773	// Note: We lower an unmasked unaligned vector access to an equally sized
23774	// e8 element type access. Given this, we effectively support all unmasked
23775	// misaligned accesses. TODO: Work through the codegen implications of
23776	// allowing such accesses to be formed, and considered fast.
23777	if (Fast)
23778	*Fast = Subtarget.enableUnalignedVectorMem();
23779	return Subtarget.enableUnalignedVectorMem();
23780	}
23781
23782
23783	EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
23784	const AttributeList &FuncAttributes) const {
23785	if (!Subtarget.hasVInstructions())
23786	return MVT::Other;
23787
23788	if (FuncAttributes.hasFnAttr(Kind: Attribute::NoImplicitFloat))
23789	return MVT::Other;
23790
23791	// We use LMUL1 memory operations here for a non-obvious reason. Our caller
23792	// has an expansion threshold, and we want the number of hardware memory
23793	// operations to correspond roughly to that threshold. LMUL>1 operations
23794	// are typically expanded linearly internally, and thus correspond to more
23795	// than one actual memory operation. Note that store merging and load
23796	// combining will typically form larger LMUL operations from the LMUL1
23797	// operations emitted here, and that's okay because combining isn't
23798	// introducing new memory operations; it's just merging existing ones.
23799	// NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
23800	const unsigned MinVLenInBytes =
23801	std::min(a: Subtarget.getRealMinVLen() / `8`, b: `1024U`);
23802
23803	if (Op.size() < MinVLenInBytes)
23804	// TODO: Figure out short memops. For the moment, do the default thing
23805	// which ends up using scalar sequences.
23806	return MVT::Other;
23807
23808	// If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
23809	// fixed vectors.
23810	if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
23811	return MVT::Other;
23812
23813	// Prefer i8 for non-zero memset as it allows us to avoid materializing
23814	// a large scalar constant and instead use vmv.v.x/i to do the
23815	// broadcast. For everything else, prefer ELenVT to minimize VL and thus
23816	// maximize the chance we can encode the size in the vsetvli.
23817	MVT ELenVT = MVT::getIntegerVT(BitWidth: Subtarget.getELen());
23818	MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
23819
23820	// Do we have sufficient alignment for our preferred VT? If not, revert
23821	// to largest size allowed by our alignment criteria.
23822	if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
23823	Align RequiredAlign(PreferredVT.getStoreSize());
23824	if (Op.isFixedDstAlign())
23825	RequiredAlign = std::min(a: RequiredAlign, b: Op.getDstAlign());
23826	if (Op.isMemcpy())
23827	RequiredAlign = std::min(a: RequiredAlign, b: Op.getSrcAlign());
23828	PreferredVT = MVT::getIntegerVT(BitWidth: RequiredAlign.value() * `8`);
23829	}
23830	return MVT::getVectorVT(VT: PreferredVT, NumElements: MinVLenInBytes/PreferredVT.getStoreSize());
23831	}
23832
23833	bool RISCVTargetLowering::splitValueIntoRegisterParts(
23834	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
23835	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
23836	bool IsABIRegCopy = CC.has_value();
23837	EVT ValueVT = Val.getValueType();
23838
23839	MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
23840	if ((ValueVT == PairVT \|\|
23841	(!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
23842	ValueVT == MVT::f64)) &&
23843	NumParts == `1` && PartVT == MVT::Untyped) {
23844	// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
23845	MVT XLenVT = Subtarget.getXLenVT();
23846	if (ValueVT == MVT::f64)
23847	Val = DAG.getBitcast(VT: MVT::i64, V: Val);
23848	auto [Lo, Hi] = DAG.SplitScalar(N: Val, DL, LoVT: XLenVT, HiVT: XLenVT);
23849	// Always creating an MVT::Untyped part, so always use
23850	// RISCVISD::BuildGPRPair.
23851	Parts[`0`] = DAG.getNode(Opcode: RISCVISD::BuildGPRPair, DL, VT: PartVT, N1: Lo, N2: Hi);
23852	return true;
23853	}
23854
23855	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
23856	PartVT == MVT::f32) {
23857	// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
23858	// nan, and cast to f32.
23859	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
23860	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
23861	Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
23862	N2: DAG.getConstant(Val: `0xFFFF0000`, DL, VT: MVT::i32));
23863	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
23864	Parts[`0`] = Val;
23865	return true;
23866	}
23867
23868	if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
23869	#ifndef NDEBUG
23870	unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
23871	[[maybe_unused]] unsigned ValLMUL =
23872	divideCeil(ValueVT.getSizeInBits().getKnownMinValue(),
23873	ValNF * RISCV::RVVBitsPerBlock);
23874	unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
23875	[[maybe_unused]] unsigned PartLMUL =
23876	divideCeil(PartVT.getSizeInBits().getKnownMinValue(),
23877	PartNF * RISCV::RVVBitsPerBlock);
23878	assert(ValNF == PartNF && ValLMUL == PartLMUL &&
23879	"RISC-V vector tuple type only accepts same register class type "
23880	"TUPLE_INSERT");
23881	#endif
23882
23883	Val = DAG.getNode(Opcode: RISCVISD::TUPLE_INSERT, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT),
23884	N2: Val, N3: DAG.getVectorIdxConstant(Val: `0`, DL));
23885	Parts[`0`] = Val;
23886	return true;
23887	}
23888
23889	if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
23890	LLVMContext &Context = *DAG.getContext();
23891	EVT ValueEltVT = ValueVT.getVectorElementType();
23892	EVT PartEltVT = PartVT.getVectorElementType();
23893	unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
23894	unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
23895	if (PartVTBitSize % ValueVTBitSize == `0`) {
23896	assert(PartVTBitSize >= ValueVTBitSize);
23897	// If the element types are different, bitcast to the same element type of
23898	// PartVT first.
23899	// Give an example here, we want copy a <vscale x 1 x i8> value to
23900	// <vscale x 4 x i16>.
23901	// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
23902	// subvector, then we can bitcast to <vscale x 4 x i16>.
23903	if (ValueEltVT != PartEltVT) {
23904	if (PartVTBitSize > ValueVTBitSize) {
23905	unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
23906	assert(Count != `0` && "The number of element should not be zero.");
23907	EVT SameEltTypeVT =
23908	EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /IsScalable=/true);
23909	Val = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SameEltTypeVT), SubVec: Val, Idx: `0`);
23910	}
23911	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val);
23912	} else {
23913	Val = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: PartVT), SubVec: Val, Idx: `0`);
23914	}
23915	Parts[`0`] = Val;
23916	return true;
23917	}
23918	}
23919
23920	return false;
23921	}
23922
23923	SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
23924	SelectionDAG &DAG, const SDLoc &DL, const SDValue Parts, unsigned* NumParts,
23925	MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
23926	bool IsABIRegCopy = CC.has_value();
23927
23928	MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
23929	if ((ValueVT == PairVT \|\|
23930	(!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
23931	ValueVT == MVT::f64)) &&
23932	NumParts == `1` && PartVT == MVT::Untyped) {
23933	// Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
23934	MVT XLenVT = Subtarget.getXLenVT();
23935
23936	SDValue Val = Parts[`0`];
23937	// Always starting with an MVT::Untyped part, so always use
23938	// RISCVISD::SplitGPRPair
23939	Val = DAG.getNode(Opcode: RISCVISD::SplitGPRPair, DL, VTList: DAG.getVTList(VT1: XLenVT, VT2: XLenVT),
23940	N: Val);
23941	Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: PairVT, N1: Val.getValue(R: `0`),
23942	N2: Val.getValue(R: `1`));
23943	if (ValueVT == MVT::f64)
23944	Val = DAG.getBitcast(VT: ValueVT, V: Val);
23945	return Val;
23946	}
23947
23948	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
23949	PartVT == MVT::f32) {
23950	SDValue Val = Parts[`0`];
23951
23952	// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
23953	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
23954	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
23955	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
23956	return Val;
23957	}
23958
23959	if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
23960	LLVMContext &Context = *DAG.getContext();
23961	SDValue Val = Parts[`0`];
23962	EVT ValueEltVT = ValueVT.getVectorElementType();
23963	EVT PartEltVT = PartVT.getVectorElementType();
23964	unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
23965	unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
23966	if (PartVTBitSize % ValueVTBitSize == `0`) {
23967	assert(PartVTBitSize >= ValueVTBitSize);
23968	EVT SameEltTypeVT = ValueVT;
23969	// If the element types are different, convert it to the same element type
23970	// of PartVT.
23971	// Give an example here, we want copy a <vscale x 1 x i8> value from
23972	// <vscale x 4 x i16>.
23973	// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
23974	// then we can extract <vscale x 1 x i8>.
23975	if (ValueEltVT != PartEltVT) {
23976	unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
23977	assert(Count != `0` && "The number of element should not be zero.");
23978	SameEltTypeVT =
23979	EVT::getVectorVT(Context, VT: ValueEltVT, NumElements: Count, /IsScalable=/true);
23980	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: SameEltTypeVT, Operand: Val);
23981	}
23982	Val = DAG.getExtractSubvector(DL, VT: ValueVT, Vec: Val, Idx: `0`);
23983	return Val;
23984	}
23985	}
23986	return SDValue ();
23987	}
23988
23989	bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
23990	// When aggressively optimizing for code size, we prefer to use a div
23991	// instruction, as it is usually smaller than the alternative sequence.
23992	// TODO: Add vector division?
23993	bool OptSize = Attr.hasFnAttr(Kind: Attribute::MinSize);
23994	return OptSize && !VT.isVector();
23995	}
23996
23997	bool RISCVTargetLowering::preferScalarizeSplat(SDNode N) const* {
23998	// Scalarize zero_ext and sign_ext might stop match to widening instruction in
23999	// some situation.
24000	unsigned Opc = N->getOpcode();
24001	if (Opc == ISD::ZERO_EXTEND \|\| Opc == ISD::SIGN_EXTEND)
24002	return false;
24003	return true;
24004	}
24005
24006	static Value useTpOffset(IRBuilderBase &IRB, unsigned* Offset) {
24007	Module *M = IRB.GetInsertBlock()->getModule();
24008	Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24009	M, id: Intrinsic::thread_pointer, Tys: IRB.getPtrTy());
24010	return IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(),
24011	Ptr: IRB.CreateCall(Callee: ThreadPointerFunc), Idx0: Offset);
24012	}
24013
24014	Value RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const* {
24015	// Fuchsia provides a fixed TLS slot for the stack cookie.
24016	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24017	if (Subtarget.isTargetFuchsia())
24018	return useTpOffset(IRB, Offset: -`0x10`);
24019
24020	// Android provides a fixed TLS slot for the stack cookie. See the definition
24021	// of TLS_SLOT_STACK_GUARD in
24022	// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24023	if (Subtarget.isTargetAndroid())
24024	return useTpOffset(IRB, Offset: -`0x18`);
24025
24026	Module *M = IRB.GetInsertBlock()->getModule();
24027
24028	if (M->getStackProtectorGuard() == "tls") {
24029	// Users must specify the offset explicitly
24030	int Offset = M->getStackProtectorGuardOffset();
24031	return useTpOffset(IRB, Offset);
24032	}
24033
24034	return TargetLowering::getIRStackGuard(IRB);
24035	}
24036
24037	bool RISCVTargetLowering::isLegalInterleavedAccessType(
24038	VectorType VTy, unsigned* Factor, Align Alignment, unsigned AddrSpace,
24039	const DataLayout &DL) const {
24040	EVT VT = getValueType(DL, Ty: VTy);
24041	// Don't lower vlseg/vsseg for vector types that can't be split.
24042	if (!isTypeLegal(VT))
24043	return false;
24044
24045	if (!isLegalElementTypeForRVV(ScalarTy: VT.getScalarType()) \|\|
24046	!allowsMemoryAccessForAlignment(Context&: VTy->getContext(), DL, VT, AddrSpace,
24047	Alignment))
24048	return false;
24049
24050	MVT ContainerVT = VT.getSimpleVT();
24051
24052	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
24053	if (!Subtarget.useRVVForFixedLengthVectors())
24054	return false;
24055	// Sometimes the interleaved access pass picks up splats as interleaves of
24056	// one element. Don't lower these.
24057	if (FVTy->getNumElements() < `2`)
24058	return false;
24059
24060	ContainerVT = getContainerForFixedLengthVector(VT: VT.getSimpleVT());
24061	}
24062
24063	// Need to make sure that EMUL NFIELDS ≤ 8*
24064	auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMul: getLMUL(VT: ContainerVT));
24065	if (Fractional)
24066	return true;
24067	return Factor * LMUL <= `8`;
24068	}
24069
24070	bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
24071	Align Alignment) const {
24072	if (!Subtarget.hasVInstructions())
24073	return false;
24074
24075	// Only support fixed vectors if we know the minimum vector size.
24076	if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24077	return false;
24078
24079	EVT ScalarType = DataType.getScalarType();
24080	if (!isLegalElementTypeForRVV(ScalarTy: ScalarType))
24081	return false;
24082
24083	if (!Subtarget.enableUnalignedVectorMem() &&
24084	Alignment < ScalarType.getStoreSize())
24085	return false;
24086
24087	return true;
24088	}
24089
24090	static const Intrinsic::ID FixedVlsegIntrIds[] = {
24091	Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask,
24092	Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask,
24093	Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask,
24094	Intrinsic::riscv_seg8_load_mask};
24095
24096	/// Lower an interleaved load into a vlsegN intrinsic.
24097	///
24098	/// E.g. Lower an interleaved load (Factor = 2):
24099	/// %wide.vec = load <8 x i32>, <8 x i32> %ptr*
24100	/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
24101	/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
24102	///
24103	/// Into:
24104	/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
24105	/// %ptr, i64 4)
24106	/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
24107	/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
24108	bool RISCVTargetLowering::lowerInterleavedLoad(
24109	LoadInst LI, ArrayRef<ShuffleVectorInst > Shuffles,
24110	ArrayRef<unsigned> Indices, unsigned Factor) const {
24111	assert(Indices.size() == Shuffles.size());
24112
24113	IRBuilder<> Builder(LI);
24114
24115	auto *VTy = cast<FixedVectorType>(Val: Shuffles [`0`]->getType());
24116	if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: LI->getAlign(),
24117	AddrSpace: LI->getPointerAddressSpace(),
24118	DL: LI->getDataLayout()))
24119	return false;
24120
24121	auto *PtrTy = LI->getPointerOperandType();
24122	auto *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen());
24123
24124	// If the segment load is going to be performed segment at a time anyways
24125	// and there's only one element used, use a strided load instead. This
24126	// will be equally fast, and create less vector register pressure.
24127	if (Indices.size() == `1` && !Subtarget.hasOptimizedSegmentLoadStore(NF: Factor)) {
24128	unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / `8`;
24129	Value Stride = ConstantInt::get(Ty: XLenTy, V: Factor ScalarSizeInBytes);
24130	Value Offset = ConstantInt::get(Ty: XLenTy, V: Indices [`0`] ScalarSizeInBytes);
24131	Value *BasePtr = Builder.CreatePtrAdd(Ptr: LI->getPointerOperand(), Offset);
24132	Value *Mask = Builder.getAllOnesMask(NumElts: VTy->getElementCount());
24133	Value *VL = Builder.getInt32(C: VTy->getNumElements());
24134
24135	CallInst *CI =
24136	Builder.CreateIntrinsic(ID: Intrinsic::experimental_vp_strided_load,
24137	Types: {VTy, BasePtr->getType(), Stride->getType()},
24138	Args: {BasePtr, Stride, Mask, VL});
24139	CI->addParamAttr(
24140	ArgNo: `0`, Attr: Attribute::getWithAlignment(Context&: CI->getContext(), Alignment: LI->getAlign()));
24141	Shuffles [`0`]->replaceAllUsesWith(V: CI);
24142	return true;
24143	};
24144
24145	Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements());
24146	Value *Mask = Builder.getAllOnesMask(NumElts: VTy->getElementCount());
24147	CallInst *VlsegN = Builder.CreateIntrinsic(
24148	ID: FixedVlsegIntrIds[Factor - `2`], Types: {VTy, PtrTy, XLenTy},
24149	Args: {LI->getPointerOperand(), Mask, VL});
24150
24151	for (unsigned i = `0`; i < Shuffles.size(); i++) {
24152	Value *SubVec = Builder.CreateExtractValue(Agg: VlsegN, Idxs: Indices [i]);
24153	Shuffles [i]->replaceAllUsesWith(V: SubVec);
24154	}
24155
24156	return true;
24157	}
24158
24159	static const Intrinsic::ID FixedVssegIntrIds[] = {
24160	Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
24161	Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
24162	Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
24163	Intrinsic::riscv_seg8_store_mask};
24164
24165	/// Lower an interleaved store into a vssegN intrinsic.
24166	///
24167	/// E.g. Lower an interleaved store (Factor = 3):
24168	/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
24169	/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
24170	/// store <12 x i32> %i.vec, <12 x i32> %ptr*
24171	///
24172	/// Into:
24173	/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
24174	/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
24175	/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
24176	/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
24177	/// %ptr, i32 4)
24178	///
24179	/// Note that the new shufflevectors will be removed and we'll only generate one
24180	/// vsseg3 instruction in CodeGen.
24181	bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
24182	ShuffleVectorInst *SVI,
24183	unsigned Factor) const {
24184	IRBuilder<> Builder(SI);
24185	auto Mask = SVI->getShuffleMask();
24186	auto *ShuffleVTy = cast<FixedVectorType>(Val: SVI->getType());
24187	// Given SVI : <nfactor x ty>, then VTy : <n x ty>*
24188	auto *VTy = FixedVectorType::get(ElementType: ShuffleVTy->getElementType(),
24189	NumElts: ShuffleVTy->getNumElements() / Factor);
24190	if (!isLegalInterleavedAccessType(VTy, Factor, Alignment: SI->getAlign(),
24191	AddrSpace: SI->getPointerAddressSpace(),
24192	DL: SI->getDataLayout()))
24193	return false;
24194
24195	auto *PtrTy = SI->getPointerOperandType();
24196	auto *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen());
24197
24198	unsigned Index;
24199	// If the segment store only has one active lane (i.e. the interleave is
24200	// just a spread shuffle), we can use a strided store instead. This will
24201	// be equally fast, and create less vector register pressure.
24202	if (!Subtarget.hasOptimizedSegmentLoadStore(NF: Factor) &&
24203	isSpreadMask(Mask, Factor, Index)) {
24204	unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / `8`;
24205	Value *Data = SVI->getOperand(i_nocapture: `0`);
24206	auto *DataVTy = cast<FixedVectorType>(Val: Data->getType());
24207	Value Stride = ConstantInt::get(Ty: XLenTy, V: Factor ScalarSizeInBytes);
24208	Value Offset = ConstantInt::get(Ty: XLenTy, V: Index ScalarSizeInBytes);
24209	Value *BasePtr = Builder.CreatePtrAdd(Ptr: SI->getPointerOperand(), Offset);
24210	Value *Mask = Builder.getAllOnesMask(NumElts: DataVTy->getElementCount());
24211	Value *VL = Builder.getInt32(C: VTy->getNumElements());
24212
24213	CallInst *CI = Builder.CreateIntrinsic(
24214	ID: Intrinsic::experimental_vp_strided_store,
24215	Types: {Data->getType(), BasePtr->getType(), Stride->getType()},
24216	Args: {Data, BasePtr, Stride, Mask, VL});
24217	CI->addParamAttr(
24218	ArgNo: `1`, Attr: Attribute::getWithAlignment(Context&: CI->getContext(), Alignment: SI->getAlign()));
24219
24220	return true;
24221	}
24222
24223	Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
24224	M: SI->getModule(), id: FixedVssegIntrIds[Factor - `2`], Tys: {VTy, PtrTy, XLenTy});
24225
24226	SmallVector<Value *, `10`> Ops;
24227	SmallVector<int, `16`> NewShuffleMask;
24228
24229	for (unsigned i = `0`; i < Factor; i++) {
24230	// Collect shuffle mask for this lane.
24231	for (unsigned j = `0`; j < VTy->getNumElements(); j++)
24232	NewShuffleMask.push_back(Elt: Mask [i + Factor * j]);
24233
24234	Value *Shuffle = Builder.CreateShuffleVector(
24235	V1: SVI->getOperand(i_nocapture: `0`), V2: SVI->getOperand(i_nocapture: `1`), Mask: NewShuffleMask);
24236	Ops.push_back(Elt: Shuffle);
24237
24238	NewShuffleMask.clear();
24239	}
24240	// This VL should be OK (should be executable in one vsseg instruction,
24241	// potentially under larger LMULs) because we checked that the fixed vector
24242	// type fits in isLegalInterleavedAccessType
24243	Value *VL = ConstantInt::get(Ty: XLenTy, V: VTy->getNumElements());
24244	Value *StoreMask = Builder.getAllOnesMask(NumElts: VTy->getElementCount());
24245	Ops.append(IL: {SI->getPointerOperand(), StoreMask, VL});
24246
24247	Builder.CreateCall(Callee: VssegNFunc, Args: Ops);
24248
24249	return true;
24250	}
24251
24252	bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
24253	LoadInst LI, ArrayRef<Value > DeinterleaveValues) const {
24254	unsigned Factor = DeinterleaveValues.size();
24255	if (Factor > `8`)
24256	return false;
24257
24258	assert(LI->isSimple());
24259	IRBuilder<> Builder(LI);
24260
24261	auto *ResVTy = cast<VectorType>(Val: DeinterleaveValues [`0`]->getType());
24262
24263	const DataLayout &DL = LI->getDataLayout();
24264
24265	if (!isLegalInterleavedAccessType(VTy: ResVTy, Factor, Alignment: LI->getAlign(),
24266	AddrSpace: LI->getPointerAddressSpace(), DL))
24267	return false;
24268
24269	Value *Return;
24270	Type *PtrTy = LI->getPointerOperandType();
24271	Type *XLenTy = Type::getIntNTy(C&: LI->getContext(), N: Subtarget.getXLen());
24272
24273	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: ResVTy)) {
24274	Value *VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements());
24275	Value *Mask = Builder.getAllOnesMask(NumElts: FVTy->getElementCount());
24276	Return = Builder.CreateIntrinsic(ID: FixedVlsegIntrIds[Factor - `2`],
24277	Types: {ResVTy, PtrTy, XLenTy},
24278	Args: {LI->getPointerOperand(), Mask, VL});
24279	} else {
24280	static const Intrinsic::ID IntrIds[] = {
24281	Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
24282	Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
24283	Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
24284	Intrinsic::riscv_vlseg8};
24285
24286	unsigned SEW = DL.getTypeSizeInBits(Ty: ResVTy->getElementType());
24287	unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
24288	Type *VecTupTy = TargetExtType::get(
24289	Context&: LI->getContext(), Name: "riscv.vector.tuple",
24290	Types: ScalableVectorType::get(ElementType: Type::getInt8Ty(C&: LI->getContext()),
24291	MinNumElts: NumElts * SEW / `8`),
24292	Ints: Factor);
24293
24294	Value *VL = Constant::getAllOnesValue(Ty: XLenTy);
24295
24296	Value *Vlseg = Builder.CreateIntrinsic(
24297	ID: IntrIds[Factor - `2`], Types: {VecTupTy, PtrTy, XLenTy},
24298	Args: {PoisonValue::get(T: VecTupTy), LI->getPointerOperand(), VL,
24299	ConstantInt::get(Ty: XLenTy, V: Log2_64(Value: SEW))});
24300
24301	SmallVector<Type *, `2`> AggrTypes{Factor, ResVTy};
24302	Return = PoisonValue::get(T: StructType::get(Context&: LI->getContext(), Elements: AggrTypes));
24303	for (unsigned i = `0`; i < Factor; ++i) {
24304	Value *VecExtract = Builder.CreateIntrinsic(
24305	ID: Intrinsic::riscv_tuple_extract, Types: {ResVTy, VecTupTy},
24306	Args: {Vlseg, Builder.getInt32(C: i)});
24307	Return = Builder.CreateInsertValue(Agg: Return, Val: VecExtract, Idxs: i);
24308	}
24309	}
24310
24311	for (auto [Idx, DIV] : enumerate(First&: DeinterleaveValues)) {
24312	// We have to create a brand new ExtractValue to replace each
24313	// of these old ExtractValue instructions.
24314	Value *NewEV =
24315	Builder.CreateExtractValue(Agg: Return, Idxs: {static_cast<unsigned>(Idx)});
24316	DIV->replaceAllUsesWith(V: NewEV);
24317	}
24318
24319	return true;
24320	}
24321
24322	bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
24323	StoreInst SI, ArrayRef<Value > InterleaveValues) const {
24324	unsigned Factor = InterleaveValues.size();
24325	if (Factor > `8`)
24326	return false;
24327
24328	assert(SI->isSimple());
24329	IRBuilder<> Builder(SI);
24330
24331	auto *InVTy = cast<VectorType>(Val: InterleaveValues [`0`]->getType());
24332	auto *PtrTy = SI->getPointerOperandType();
24333	const DataLayout &DL = SI->getDataLayout();
24334
24335	if (!isLegalInterleavedAccessType(VTy: InVTy, Factor, Alignment: SI->getAlign(),
24336	AddrSpace: SI->getPointerAddressSpace(), DL))
24337	return false;
24338
24339	Type *XLenTy = Type::getIntNTy(C&: SI->getContext(), N: Subtarget.getXLen());
24340
24341	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: InVTy)) {
24342	Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
24343	M: SI->getModule(), id: FixedVssegIntrIds[Factor - `2`], Tys: {InVTy, PtrTy, XLenTy});
24344
24345	SmallVector<Value *, `10`> Ops(InterleaveValues);
24346	Value *VL = ConstantInt::get(Ty: XLenTy, V: FVTy->getNumElements());
24347	Value *Mask = Builder.getAllOnesMask(NumElts: FVTy->getElementCount());
24348	Ops.append(IL: {SI->getPointerOperand(), Mask, VL});
24349
24350	Builder.CreateCall(Callee: VssegNFunc, Args: Ops);
24351	} else {
24352	static const Intrinsic::ID IntrIds[] = {
24353	Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
24354	Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
24355	Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
24356	Intrinsic::riscv_vsseg8};
24357
24358	unsigned SEW = DL.getTypeSizeInBits(Ty: InVTy->getElementType());
24359	unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
24360	Type *VecTupTy = TargetExtType::get(
24361	Context&: SI->getContext(), Name: "riscv.vector.tuple",
24362	Types: ScalableVectorType::get(ElementType: Type::getInt8Ty(C&: SI->getContext()),
24363	MinNumElts: NumElts * SEW / `8`),
24364	Ints: Factor);
24365
24366	Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
24367	M: SI->getModule(), id: IntrIds[Factor - `2`], Tys: {VecTupTy, PtrTy, XLenTy});
24368
24369	Value *VL = Constant::getAllOnesValue(Ty: XLenTy);
24370
24371	Value *StoredVal = PoisonValue::get(T: VecTupTy);
24372	for (unsigned i = `0`; i < Factor; ++i)
24373	StoredVal = Builder.CreateIntrinsic(
24374	ID: Intrinsic::riscv_tuple_insert, Types: {VecTupTy, InVTy},
24375	Args: {StoredVal, InterleaveValues [i], Builder.getInt32(C: i)});
24376
24377	Builder.CreateCall(Callee: VssegNFunc, Args: {StoredVal, SI->getPointerOperand(), VL,
24378	ConstantInt::get(Ty: XLenTy, V: Log2_64(Value: SEW))});
24379	}
24380
24381	return true;
24382	}
24383
24384	static bool isMultipleOfN(const Value V, const* DataLayout &DL, unsigned N) {
24385	assert(N);
24386	if (N == `1`)
24387	return true;
24388
24389	using namespace PatternMatch;
24390	// Right now we're only recognizing the simplest pattern.
24391	uint64_t C;
24392	if (match(V, P: m_CombineOr(L: m_ConstantInt(V&: C),
24393	R: m_c_Mul(L: m_Value(), R: m_ConstantInt(V&: C)))) &&
24394	C && C % N == `0`)
24395	return true;
24396
24397	if (isPowerOf2_32(Value: N)) {
24398	KnownBits KB = llvm::computeKnownBits(V, DL);
24399	return KB.countMinTrailingZeros() >= Log2_32(Value: N);
24400	}
24401
24402	return false;
24403	}
24404
24405	/// Lower an interleaved vp.load into a vlsegN intrinsic.
24406	///
24407	/// E.g. Lower an interleaved vp.load (Factor = 2):
24408	/// %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
24409	/// %mask,
24410	/// i32 %wide.rvl)
24411	/// %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
24412	/// @llvm.vector.deinterleave2.nxv64i8(
24413	/// <vscale x 64 x i8> %l)
24414	/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
24415	/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
24416	///
24417	/// Into:
24418	/// %rvl = udiv %wide.rvl, 2
24419	/// %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
24420	/// @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
24421	/// <vscale x 32 x i8> undef,
24422	/// ptr %ptr,
24423	/// %mask,
24424	/// i64 %rvl,
24425	/// i64 1)
24426	/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
24427	/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
24428	///
24429	/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
24430	/// removed by the caller
24431	/// TODO: We probably can loosen the dependency on matching extractvalue when
24432	/// dealing with factor of 2 (extractvalue is still required for most of other
24433	/// factors though).
24434	bool RISCVTargetLowering::lowerInterleavedVPLoad(
24435	VPIntrinsic Load, Value Mask,
24436	ArrayRef<Value > DeinterleaveResults) const* {
24437	assert(Mask && "Expect a valid mask");
24438	assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
24439	"Unexpected intrinsic");
24440
24441	const unsigned Factor = DeinterleaveResults.size();
24442
24443	auto *VTy = dyn_cast<VectorType>(Val: DeinterleaveResults [`0`]->getType());
24444	if (!VTy)
24445	return false;
24446
24447	auto &DL = Load->getModule()->getDataLayout();
24448	Align Alignment = Load->getParamAlign(ArgNo: `0`).value_or(
24449	u: DL.getABITypeAlign(Ty: VTy->getElementType()));
24450	if (!isLegalInterleavedAccessType(
24451	VTy, Factor, Alignment,
24452	AddrSpace: Load->getArgOperand(i: `0`)->getType()->getPointerAddressSpace(), DL))
24453	return false;
24454
24455	IRBuilder<> Builder(Load);
24456
24457	Value *WideEVL = Load->getVectorLengthParam();
24458	// Conservatively check if EVL is a multiple of factor, otherwise some
24459	// (trailing) elements might be lost after the transformation.
24460	if (!isMultipleOfN(V: WideEVL, DL: Load->getDataLayout(), N: Factor))
24461	return false;
24462
24463	auto *PtrTy = Load->getArgOperand(i: `0`)->getType();
24464	auto *XLenTy = Type::getIntNTy(C&: Load->getContext(), N: Subtarget.getXLen());
24465	Value *EVL = Builder.CreateZExt(
24466	V: Builder.CreateUDiv(LHS: WideEVL, RHS: ConstantInt::get(Ty: WideEVL->getType(), V: Factor)),
24467	DestTy: XLenTy);
24468
24469	Value Return = nullptr*;
24470	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
24471	Return = Builder.CreateIntrinsic(ID: FixedVlsegIntrIds[Factor - `2`],
24472	Types: {FVTy, PtrTy, XLenTy},
24473	Args: {Load->getArgOperand(i: `0`), Mask, EVL});
24474	} else {
24475	static const Intrinsic::ID IntrMaskIds[] = {
24476	Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
24477	Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
24478	Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
24479	Intrinsic::riscv_vlseg8_mask,
24480	};
24481
24482	unsigned SEW = DL.getTypeSizeInBits(Ty: VTy->getElementType());
24483	unsigned NumElts = VTy->getElementCount().getKnownMinValue();
24484	Type *VecTupTy = TargetExtType::get(
24485	Context&: Load->getContext(), Name: "riscv.vector.tuple",
24486	Types: ScalableVectorType::get(ElementType: Type::getInt8Ty(C&: Load->getContext()),
24487	MinNumElts: NumElts * SEW / `8`),
24488	Ints: Factor);
24489
24490	Value *PoisonVal = PoisonValue::get(T: VecTupTy);
24491
24492	Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
24493	M: Load->getModule(), id: IntrMaskIds[Factor - `2`],
24494	Tys: {VecTupTy, PtrTy, Mask->getType(), EVL->getType()});
24495
24496	Value *Operands[] = {
24497	PoisonVal,
24498	Load->getArgOperand(i: `0`),
24499	Mask,
24500	EVL,
24501	ConstantInt::get(Ty: XLenTy,
24502	V: RISCVVType::TAIL_AGNOSTIC \| RISCVVType::MASK_AGNOSTIC),
24503	ConstantInt::get(Ty: XLenTy, V: Log2_64(Value: SEW))};
24504
24505	CallInst *VlsegN = Builder.CreateCall(Callee: VlsegNFunc, Args: Operands);
24506
24507	SmallVector<Type *, `8`> AggrTypes{Factor, VTy};
24508	Return = PoisonValue::get(T: StructType::get(Context&: Load->getContext(), Elements: AggrTypes));
24509	Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration(
24510	M: Load->getModule(), id: Intrinsic::riscv_tuple_extract, Tys: {VTy, VecTupTy});
24511	for (unsigned i = `0`; i < Factor; ++i) {
24512	Value *VecExtract =
24513	Builder.CreateCall(Callee: VecExtractFunc, Args: {VlsegN, Builder.getInt32(C: i)});
24514	Return = Builder.CreateInsertValue(Agg: Return, Val: VecExtract, Idxs: i);
24515	}
24516	}
24517
24518	for (auto [Idx, DIO] : enumerate(First&: DeinterleaveResults)) {
24519	if (!DIO)
24520	continue;
24521	// We have to create a brand new ExtractValue to replace each
24522	// of these old ExtractValue instructions.
24523	Value *NewEV =
24524	Builder.CreateExtractValue(Agg: Return, Idxs: {static_cast<unsigned>(Idx)});
24525	DIO->replaceAllUsesWith(V: NewEV);
24526	}
24527
24528	return true;
24529	}
24530
24531	/// Lower an interleaved vp.store into a vssegN intrinsic.
24532	///
24533	/// E.g. Lower an interleaved vp.store (Factor = 2):
24534	///
24535	/// %is = tail call <vscale x 64 x i8>
24536	/// @llvm.vector.interleave2.nxv64i8(
24537	/// <vscale x 32 x i8> %load0,
24538	/// <vscale x 32 x i8> %load1
24539	/// %wide.rvl = shl nuw nsw i32 %rvl, 1
24540	/// tail call void @llvm.vp.store.nxv64i8.p0(
24541	/// <vscale x 64 x i8> %is, ptr %ptr,
24542	/// %mask,
24543	/// i32 %wide.rvl)
24544	///
24545	/// Into:
24546	/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
24547	/// <vscale x 32 x i8> %load1,
24548	/// <vscale x 32 x i8> %load2, ptr %ptr,
24549	/// %mask,
24550	/// i64 %rvl)
24551	bool RISCVTargetLowering::lowerInterleavedVPStore(
24552	VPIntrinsic Store, Value Mask,
24553	ArrayRef<Value > InterleaveOperands) const* {
24554	assert(Mask && "Expect a valid mask");
24555	assert(Store->getIntrinsicID() == Intrinsic::vp_store &&
24556	"Unexpected intrinsic");
24557
24558	const unsigned Factor = InterleaveOperands.size();
24559
24560	auto *VTy = dyn_cast<VectorType>(Val: InterleaveOperands [`0`]->getType());
24561	if (!VTy)
24562	return false;
24563
24564	const DataLayout &DL = Store->getDataLayout();
24565	Align Alignment = Store->getParamAlign(ArgNo: `1`).value_or(
24566	u: DL.getABITypeAlign(Ty: VTy->getElementType()));
24567	if (!isLegalInterleavedAccessType(
24568	VTy, Factor, Alignment,
24569	AddrSpace: Store->getArgOperand(i: `1`)->getType()->getPointerAddressSpace(), DL))
24570	return false;
24571
24572	IRBuilder<> Builder(Store);
24573	Value *WideEVL = Store->getArgOperand(i: `3`);
24574	// Conservatively check if EVL is a multiple of factor, otherwise some
24575	// (trailing) elements might be lost after the transformation.
24576	if (!isMultipleOfN(V: WideEVL, DL: Store->getDataLayout(), N: Factor))
24577	return false;
24578
24579	auto *PtrTy = Store->getArgOperand(i: `1`)->getType();
24580	auto *XLenTy = Type::getIntNTy(C&: Store->getContext(), N: Subtarget.getXLen());
24581	Value *EVL = Builder.CreateZExt(
24582	V: Builder.CreateUDiv(LHS: WideEVL, RHS: ConstantInt::get(Ty: WideEVL->getType(), V: Factor)),
24583	DestTy: XLenTy);
24584
24585	if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
24586	SmallVector<Value *, `8`> Operands(InterleaveOperands);
24587	Operands.append(IL: {Store->getArgOperand(i: `1`), Mask, EVL});
24588	Builder.CreateIntrinsic(ID: FixedVssegIntrIds[Factor - `2`],
24589	Types: {FVTy, PtrTy, XLenTy}, Args: Operands);
24590	return true;
24591	}
24592
24593	static const Intrinsic::ID IntrMaskIds[] = {
24594	Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
24595	Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
24596	Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
24597	Intrinsic::riscv_vsseg8_mask,
24598	};
24599
24600	unsigned SEW = DL.getTypeSizeInBits(Ty: VTy->getElementType());
24601	unsigned NumElts = VTy->getElementCount().getKnownMinValue();
24602	Type *VecTupTy = TargetExtType::get(
24603	Context&: Store->getContext(), Name: "riscv.vector.tuple",
24604	Types: ScalableVectorType::get(ElementType: Type::getInt8Ty(C&: Store->getContext()),
24605	MinNumElts: NumElts * SEW / `8`),
24606	Ints: Factor);
24607
24608	Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration(
24609	M: Store->getModule(), id: Intrinsic::riscv_tuple_insert, Tys: {VecTupTy, VTy});
24610	Value *StoredVal = PoisonValue::get(T: VecTupTy);
24611	for (unsigned i = `0`; i < Factor; ++i)
24612	StoredVal = Builder.CreateCall(
24613	Callee: VecInsertFunc, Args: {StoredVal, InterleaveOperands [i], Builder.getInt32(C: i)});
24614
24615	Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
24616	M: Store->getModule(), id: IntrMaskIds[Factor - `2`],
24617	Tys: {VecTupTy, PtrTy, Mask->getType(), EVL->getType()});
24618
24619	Value *Operands[] = {StoredVal, Store->getArgOperand(i: `1`), Mask, EVL,
24620	ConstantInt::get(Ty: XLenTy, V: Log2_64(Value: SEW))};
24621
24622	Builder.CreateCall(Callee: VssegNFunc, Args: Operands);
24623	return true;
24624	}
24625
24626	MachineInstr *
24627	RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
24628	MachineBasicBlock::instr_iterator &MBBI,
24629	const TargetInstrInfo TII) const* {
24630	assert(MBBI->isCall() && MBBI->getCFIType() &&
24631	"Invalid call instruction for a KCFI check");
24632	assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24633	MBBI->getOpcode()));
24634
24635	MachineOperand &Target = MBBI ->getOperand(i: `0`);
24636	Target.setIsRenamable(false);
24637
24638	return BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI ->getDebugLoc(), MCID: TII->get(Opcode: RISCV::KCFI_CHECK))
24639	.addReg(RegNo: Target.getReg())
24640	.addImm(Val: MBBI ->getCFIType())
24641	.getInstr();
24642	}
24643
24644	#define GET_REGISTER_MATCHER
24645	#include "RISCVGenAsmMatcher.inc"
24646
24647	Register
24648	RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
24649	const MachineFunction &MF) const {
24650	Register Reg = MatchRegisterAltName(Name: RegName);
24651	if (!Reg)
24652	Reg = MatchRegisterName(Name: RegName);
24653	if (!Reg)
24654	return Reg;
24655
24656	BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24657	if (!ReservedRegs.test(Idx: Reg) && !Subtarget.isRegisterReservedByUser(i: Reg))
24658	report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
24659	StringRef (RegName) + "\"."));
24660	return Reg;
24661	}
24662
24663	MachineMemOperand::Flags
24664	RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
24665	const MDNode *NontemporalInfo = I.getMetadata(KindID: LLVMContext::MD_nontemporal);
24666
24667	if (NontemporalInfo == nullptr)
24668	return MachineMemOperand::MONone;
24669
24670	// 1 for default value work as __RISCV_NTLH_ALL
24671	// 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24672	// 3 -> __RISCV_NTLH_ALL_PRIVATE
24673	// 4 -> __RISCV_NTLH_INNERMOST_SHARED
24674	// 5 -> __RISCV_NTLH_ALL
24675	int NontemporalLevel = `5`;
24676	const MDNode *RISCVNontemporalInfo =
24677	I.getMetadata(Kind: "riscv-nontemporal-domain");
24678	if (RISCVNontemporalInfo != nullptr)
24679	NontemporalLevel =
24680	cast<ConstantInt>(
24681	Val: cast<ConstantAsMetadata>(Val: RISCVNontemporalInfo->getOperand(I: `0`))
24682	->getValue())
24683	->getZExtValue();
24684
24685	assert((`1` <= NontemporalLevel && NontemporalLevel <= `5`) &&
24686	"RISC-V target doesn't support this non-temporal domain.");
24687
24688	NontemporalLevel -= `2`;
24689	MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
24690	if (NontemporalLevel & `0b1`)
24691	Flags \|= MONontemporalBit0;
24692	if (NontemporalLevel & `0b10`)
24693	Flags \|= MONontemporalBit1;
24694
24695	return Flags;
24696	}
24697
24698	MachineMemOperand::Flags
24699	RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
24700
24701	MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24702	MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
24703	TargetFlags \|= (NodeFlags & MONontemporalBit0);
24704	TargetFlags \|= (NodeFlags & MONontemporalBit1);
24705	return TargetFlags;
24706	}
24707
24708	bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
24709	const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24710	return getTargetMMOFlags(Node: NodeX) == getTargetMMOFlags(Node: NodeY);
24711	}
24712
24713	bool RISCVTargetLowering::isCtpopFast(EVT VT) const {
24714	if (VT.isScalableVector())
24715	return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
24716	if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
24717	return true;
24718	return Subtarget.hasStdExtZbb() &&
24719	(VT == MVT::i32 \|\| VT == MVT::i64 \|\| VT.isFixedLengthVector());
24720	}
24721
24722	unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
24723	ISD::CondCode Cond) const {
24724	return isCtpopFast(VT) ? `0` : `1`;
24725	}
24726
24727	bool RISCVTargetLowering::shouldInsertFencesForAtomic(
24728	const Instruction I) const* {
24729	if (Subtarget.hasStdExtZalasr()) {
24730	if (Subtarget.hasStdExtZtso()) {
24731	// Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24732	// should be lowered to plain load/store. The easiest way to do this is
24733	// to say we should insert fences for them, and the fence insertion code
24734	// will just not insert any fences
24735	auto *LI = dyn_cast<LoadInst>(Val: I);
24736	auto *SI = dyn_cast<StoreInst>(Val: I);
24737	if ((LI &&
24738	(LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) \|\|
24739	(SI &&
24740	(SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24741	// Here, this is a load or store which is seq_cst, and needs a .aq or
24742	// .rl therefore we shouldn't try to insert fences
24743	return false;
24744	}
24745	// Here, we are a TSO inst that isn't a seq_cst load/store
24746	return isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I);
24747	}
24748	return false;
24749	}
24750	// Note that one specific case requires fence insertion for an
24751	// AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24752	// than this hook due to limitations in the interface here.
24753	return isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I);
24754	}
24755
24756	bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
24757
24758	// GISel support is in progress or complete for these opcodes.
24759	unsigned Op = Inst.getOpcode();
24760	if (Op == Instruction::Add \|\| Op == Instruction::Sub \|\|
24761	Op == Instruction::And \|\| Op == Instruction::Or \|\|
24762	Op == Instruction::Xor \|\| Op == Instruction::InsertElement \|\|
24763	Op == Instruction::ShuffleVector \|\| Op == Instruction::Load \|\|
24764	Op == Instruction::Freeze \|\| Op == Instruction::Store)
24765	return false;
24766
24767	if (Inst.getType()->isScalableTy())
24768	return true;
24769
24770	for (unsigned i = `0`; i < Inst.getNumOperands(); ++i)
24771	if (Inst.getOperand(i)->getType()->isScalableTy() &&
24772	!isa<ReturnInst>(Val: &Inst))
24773	return true;
24774
24775	if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: &Inst)) {
24776	if (AI->getAllocatedType()->isScalableTy())
24777	return true;
24778	}
24779
24780	return false;
24781	}
24782
24783	SDValue
24784	RISCVTargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
24785	SelectionDAG &DAG,
24786	SmallVectorImpl<SDNode > &Created) const* {
24787	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
24788	if (isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
24789	return SDValue (N, `0`); // Lower SDIV as SDIV
24790
24791	// Only perform this transform if short forward branch opt is supported.
24792	if (!Subtarget.hasShortForwardBranchOpt())
24793	return SDValue ();
24794	EVT VT = N->getValueType(ResNo: `0`);
24795	if (!(VT == MVT::i32 \|\| (VT == MVT::i64 && Subtarget.is64Bit())))
24796	return SDValue ();
24797
24798	// Ensure 2k-1 < 2048 so that we can just emit a single addi/addiw.
24799	if (Divisor.sgt(RHS: `2048`) \|\| Divisor.slt(RHS: -`2048`))
24800	return SDValue ();
24801	return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24802	}
24803
24804	bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24805	EVT VT, const APInt &AndMask) const {
24806	if (Subtarget.hasStdExtZicond() \|\| Subtarget.hasVendorXVentanaCondOps())
24807	return !Subtarget.hasStdExtZbs() && AndMask.ugt(RHS: `1024`);
24808	return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
24809	}
24810
24811	unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
24812	return Subtarget.getMinimumJumpTableEntries();
24813	}
24814
24815	SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl,
24816	SDValue Value, SDValue Addr,
24817	int JTI,
24818	SelectionDAG &DAG) const {
24819	if (Subtarget.hasStdExtZicfilp()) {
24820	// When Zicfilp enabled, we need to use software guarded branch for jump
24821	// table branch.
24822	SDValue Chain = Value;
24823	// Jump table debug info is only needed if CodeView is enabled.
24824	if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF())
24825	Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
24826	return DAG.getNode(Opcode: RISCVISD::SW_GUARDED_BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
24827	}
24828	return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
24829	}
24830
24831	// If an output pattern produces multiple instructions tablegen may pick an
24832	// arbitrary type from an instructions destination register class to use for the
24833	// VT of that MachineSDNode. This VT may be used to look up the representative
24834	// register class. If the type isn't legal, the default implementation will
24835	// not find a register class.
24836	//
24837	// Some integer types smaller than XLen are listed in the GPR register class to
24838	// support isel patterns for GISel, but are not legal in SelectionDAG. The
24839	// arbitrary type tablegen picks may be one of these smaller types.
24840	//
24841	// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
24842	// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
24843	std::pair<const TargetRegisterClass *, uint8_t>
24844	RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
24845	MVT VT) const {
24846	switch (VT.SimpleTy) {
24847	default:
24848	break;
24849	case MVT::i8:
24850	case MVT::i16:
24851	case MVT::i32:
24852	return TargetLowering::findRepresentativeClass(TRI, VT: Subtarget.getXLenVT());
24853	case MVT::bf16:
24854	case MVT::f16:
24855	return TargetLowering::findRepresentativeClass(TRI, VT: MVT::f32);
24856	}
24857
24858	return TargetLowering::findRepresentativeClass(TRI, VT);
24859	}
24860
24861	namespace llvm::RISCVVIntrinsicsTable {
24862
24863	#define GET_RISCVVIntrinsicsTable_IMPL
24864	#include "RISCVGenSearchableTables.inc"
24865
24866	} // namespace llvm::RISCVVIntrinsicsTable
24867
24868	bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
24869
24870	// If the function specifically requests inline stack probes, emit them.
24871	if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
24872	return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
24873	"inline-asm";
24874
24875	return false;
24876	}
24877
24878	unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
24879	Align StackAlign) const {
24880	// The default stack probe size is 4096 if the function has no
24881	// stack-probe-size attribute.
24882	const Function &Fn = MF.getFunction();
24883	unsigned StackProbeSize =
24884	Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: `4096`);
24885	// Round down to the stack alignment.
24886	StackProbeSize = alignDown(Value: StackProbeSize, Align: StackAlign.value());
24887	return StackProbeSize ? StackProbeSize : StackAlign.value();
24888	}
24889
24890	SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
24891	SelectionDAG &DAG) const {
24892	MachineFunction &MF = DAG.getMachineFunction();
24893	if (!hasInlineStackProbe(MF))
24894	return SDValue ();
24895
24896	MVT XLenVT = Subtarget.getXLenVT();
24897	// Get the inputs.
24898	SDValue Chain = Op.getOperand(i: `0`);
24899	SDValue Size = Op.getOperand(i: `1`);
24900
24901	MaybeAlign Align =
24902	cast<ConstantSDNode>(Val: Op.getOperand(i: `2`))->getMaybeAlignValue();
24903	SDLoc dl(Op);
24904	EVT VT = Op.getValueType();
24905
24906	// Construct the new SP value in a GPR.
24907	SDValue SP = DAG.getCopyFromReg(Chain, dl, Reg: RISCV::X2, VT: XLenVT);
24908	Chain = SP.getValue(R: `1`);
24909	SP = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: XLenVT, N1: SP, N2: Size);
24910	if (Align)
24911	SP = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SP.getValue(R: `0`),
24912	N2: DAG.getSignedConstant(Val: -(uint64_t)Align ->value(), DL: dl, VT));
24913
24914	// Set the real SP to the new value with a probing loop.
24915	Chain = DAG.getNode(Opcode: RISCVISD::PROBED_ALLOCA, DL: dl, VT: MVT::Other, N1: Chain, N2: SP);
24916	return DAG.getMergeValues(Ops: {SP, Chain}, dl);
24917	}
24918
24919	MachineBasicBlock *
24920	RISCVTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,
24921	MachineBasicBlock MBB) const* {
24922	MachineFunction &MF = *MBB->getParent();
24923	MachineBasicBlock::iterator MBBI = MI.getIterator();
24924	DebugLoc DL = MBB->findDebugLoc(MBBI);
24925	Register TargetReg = MI.getOperand(i: `0`).getReg();
24926
24927	const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
24928	bool IsRV64 = Subtarget.is64Bit();
24929	Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
24930	const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
24931	uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
24932
24933	MachineFunction::iterator MBBInsertPoint = std::next(x: MBB->getIterator());
24934	MachineBasicBlock *LoopTestMBB =
24935	MF.CreateMachineBasicBlock(BB: MBB->getBasicBlock());
24936	MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
24937	MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB->getBasicBlock());
24938	MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
24939	Register SPReg = RISCV::X2;
24940	Register ScratchReg =
24941	MF.getRegInfo().createVirtualRegister(RegClass: &RISCV::GPRRegClass);
24942
24943	// ScratchReg = ProbeSize
24944	TII->movImm(MBB&: *MBB, MBBI, DL, DstReg: ScratchReg, Val: ProbeSize, Flag: MachineInstr::NoFlags);
24945
24946	// LoopTest:
24947	// SUB SP, SP, ProbeSize
24948	BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: SPReg)
24949	.addReg(RegNo: SPReg)
24950	.addReg(RegNo: ScratchReg);
24951
24952	// s[d\|w] zero, 0(sp)
24953	BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
24954	MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
24955	.addReg(RegNo: RISCV::X0)
24956	.addReg(RegNo: SPReg)
24957	.addImm(Val: `0`);
24958
24959	// BLT TargetReg, SP, LoopTest
24960	BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::BLT))
24961	.addReg(RegNo: TargetReg)
24962	.addReg(RegNo: SPReg)
24963	.addMBB(MBB: LoopTestMBB);
24964
24965	// Adjust with: MV SP, TargetReg.
24966	BuildMI(BB&: *ExitMBB, I: ExitMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: SPReg)
24967	.addReg(RegNo: TargetReg)
24968	.addImm(Val: `0`);
24969
24970	ExitMBB->splice(Where: ExitMBB->end(), Other: MBB, From: std::next(x: MBBI), To: MBB->end());
24971	ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
24972
24973	LoopTestMBB->addSuccessor(Succ: ExitMBB);
24974	LoopTestMBB->addSuccessor(Succ: LoopTestMBB);
24975	MBB->addSuccessor(Succ: LoopTestMBB);
24976
24977	MI.eraseFromParent();
24978	MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
24979	return ExitMBB->begin()->getParent();
24980	}
24981
24982	ArrayRef<MCPhysReg> RISCVTargetLowering::getRoundingControlRegisters() const {
24983	if (Subtarget.hasStdExtFOrZfinx()) {
24984	static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
24985	return RCRegs;
24986	}
24987	return {};
24988	}
24989

Browse the source code of llvm_projects/llvm/lib/Target/RISCV/RISCVISelLowering.cpp