LoongArchISelLowering.cpp source code [llvm_projects/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp]

1	//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that LoongArch uses to lower LLVM code into
10	// a selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "LoongArchISelLowering.h"
15	#include "LoongArch.h"
16	#include "LoongArchMachineFunctionInfo.h"
17	#include "LoongArchRegisterInfo.h"
18	#include "LoongArchSubtarget.h"
19	#include "LoongArchTargetMachine.h"
20	#include "MCTargetDesc/LoongArchBaseInfo.h"
21	#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22	#include "llvm/ADT/Statistic.h"
23	#include "llvm/ADT/StringExtras.h"
24	#include "llvm/CodeGen/ISDOpcodes.h"
25	#include "llvm/CodeGen/RuntimeLibcallUtil.h"
26	#include "llvm/CodeGen/SelectionDAGNodes.h"
27	#include "llvm/IR/IRBuilder.h"
28	#include "llvm/IR/IntrinsicsLoongArch.h"
29	#include "llvm/Support/CodeGen.h"
30	#include "llvm/Support/Debug.h"
31	#include "llvm/Support/ErrorHandling.h"
32	#include "llvm/Support/KnownBits.h"
33	#include "llvm/Support/MathExtras.h"
34
35	using namespace llvm;
36
37	#define DEBUG_TYPE "loongarch-isel-lowering"
38
39	STATISTIC(NumTailCalls, "Number of tail calls");
40
41	static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42	cl::desc ("Trap on integer division by zero."),
43	cl::init(Val: false));
44
45	LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46	const LoongArchSubtarget &STI)
47	: TargetLowering (TM), Subtarget(STI) {
48
49	MVT GRLenVT = Subtarget.getGRLenVT();
50
51	// Set up the register classes.
52
53	addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
54	if (Subtarget.hasBasicF())
55	addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
56	if (Subtarget.hasBasicD())
57	addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
58
59	static const MVT::SimpleValueType LSXVTs[] = {
60	MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61	static const MVT::SimpleValueType LASXVTs[] = {
62	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64	if (Subtarget.hasExtLSX())
65	for (MVT VT : LSXVTs)
66	addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
67
68	if (Subtarget.hasExtLASX())
69	for (MVT VT : LASXVTs)
70	addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
71
72	// Set operations for LA32 and LA64.
73
74	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
75	MemVT: MVT::i1, Action: Promote);
76
77	setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
78	setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
79	setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
80	setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
81	setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
82	setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
83
84	setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85	ISD::JumpTable, ISD::GlobalTLSAddress},
86	VT: GRLenVT, Action: Custom);
87
88	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
89
90	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
91	setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
92	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
93	setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
94
95	setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
96	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
97
98	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
99	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
100	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
101
102	// Expand bitreverse.i16 with native-width bitrev and shift for now, before
103	// we get to know which of sll and revb.2h is faster.
104	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
105	setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
106
107	// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108	// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109	// and i32 could still be byte-swapped relatively cheaply.
110	setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
111
112	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
113	setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
114	setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
115	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
116	setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
117
118	setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
119	setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
120
121	// Set operations for LA64 only.
122
123	if (Subtarget.is64Bit()) {
124	setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
125	setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
126	setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
127	setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
128	setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
129	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
130	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
131	setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
132	setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
133	setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
134	setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
135	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
136	setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
137	setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
138	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
139	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
140	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
141
142	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
143	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
144	setOperationAction(Ops: {ISD::UDIV, ISD::UREM}, VT: MVT::i32, Action: Custom);
145	}
146
147	// Set operations for LA32 only.
148
149	if (!Subtarget.is64Bit()) {
150	setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
151	setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
152	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
153	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
154	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
155	}
156
157	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
158
159	static const ISD::CondCode FPCCToExpand[] = {
160	ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
161	ISD::SETGE, ISD::SETNE, ISD::SETGT};
162
163	// Set operations for 'F' feature.
164
165	if (Subtarget.hasBasicF()) {
166	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
167	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
168	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
169
170	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
171	setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
172	setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
173	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
174	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
175	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
176	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
177	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
178	setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
179	setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
180	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
181	setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
182	setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand);
183	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
184	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
185
186	if (Subtarget.is64Bit())
187	setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
188
189	if (!Subtarget.hasBasicD()) {
190	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
191	if (Subtarget.is64Bit()) {
192	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
193	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
194	}
195	}
196	}
197
198	// Set operations for 'D' feature.
199
200	if (Subtarget.hasBasicD()) {
201	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
202	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
203	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
204	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
205	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
206
207	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
208	setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
209	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
210	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
211	setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
212	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
213	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
214	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
215	setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
216	setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
217	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
218	setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
219	setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand);
220	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
221	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
222
223	if (Subtarget.is64Bit())
224	setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
225	}
226
227	// Set operations for 'LSX' feature.
228
229	if (Subtarget.hasExtLSX()) {
230	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
231	// Expand all truncating stores and extending loads.
232	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233	setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
234	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
235	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
236	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
237	}
238	// By default everything must be expanded. Then we will selectively turn
239	// on ones that can be effectively codegen'd.
240	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
241	setOperationAction(Op, VT, Action: Expand);
242	}
243
244	for (MVT VT : LSXVTs) {
245	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
246	setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
247	setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
248
249	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
250	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
251	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
252
253	setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
254	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
255	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
256	}
257	for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
258	setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
259	setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
260	Action: Legal);
261	setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
262	VT, Action: Legal);
263	setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
264	setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
265	setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
266	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
267	setCondCodeAction(
268	CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
269	Action: Expand);
270	}
271	for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
272	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
273	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
274	}
275	for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
276	setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
277	setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
278	setOperationAction(Op: ISD::FMA, VT, Action: Legal);
279	setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
280	setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
281	setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
282	ISD::SETUGE, ISD::SETUGT},
283	VT, Action: Expand);
284	}
285	}
286
287	// Set operations for 'LASX' feature.
288
289	if (Subtarget.hasExtLASX()) {
290	for (MVT VT : LASXVTs) {
291	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
292	setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
293	setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
294
295	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
296	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
297	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
298
299	setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
300	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
301	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
302	}
303	for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
304	setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
305	setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
306	Action: Legal);
307	setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
308	VT, Action: Legal);
309	setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
310	setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
311	setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
312	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
313	setCondCodeAction(
314	CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
315	Action: Expand);
316	}
317	for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
318	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
319	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
320	}
321	for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
322	setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
323	setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
324	setOperationAction(Op: ISD::FMA, VT, Action: Legal);
325	setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
326	setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
327	setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
328	ISD::SETUGE, ISD::SETUGT},
329	VT, Action: Expand);
330	}
331	}
332
333	// Set DAG combine for LA32 and LA64.
334
335	setTargetDAGCombine(ISD::AND);
336	setTargetDAGCombine(ISD::OR);
337	setTargetDAGCombine(ISD::SRL);
338	setTargetDAGCombine(ISD::SETCC);
339
340	// Set DAG combine for 'LSX' feature.
341
342	if (Subtarget.hasExtLSX())
343	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
344
345	// Compute derived properties from the register classes.
346	computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
347
348	setStackPointerRegisterToSaveRestore(LoongArch::R3);
349
350	setBooleanContents(ZeroOrOneBooleanContent);
351	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
352
353	setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
354
355	setMinCmpXchgSizeInBits(`32`);
356
357	// Function alignments.
358	setMinFunctionAlignment(Align (`4`));
359	// Set preferred alignments.
360	setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
361	setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
362	setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
363	}
364
365	bool LoongArchTargetLowering::isOffsetFoldingLegal(
366	const GlobalAddressSDNode GA) const* {
367	// In order to maximise the opportunity for common subexpression elimination,
368	// keep a separate ADD node for the global address offset instead of folding
369	// it in the global address node. Later peephole optimisations may choose to
370	// fold it back in when profitable.
371	return false;
372	}
373
374	SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
375	SelectionDAG &DAG) const {
376	switch (Op.getOpcode()) {
377	case ISD::ATOMIC_FENCE:
378	return lowerATOMIC_FENCE(Op, DAG);
379	case ISD::EH_DWARF_CFA:
380	return lowerEH_DWARF_CFA(Op, DAG);
381	case ISD::GlobalAddress:
382	return lowerGlobalAddress(Op, DAG);
383	case ISD::GlobalTLSAddress:
384	return lowerGlobalTLSAddress(Op, DAG);
385	case ISD::INTRINSIC_WO_CHAIN:
386	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
387	case ISD::INTRINSIC_W_CHAIN:
388	return lowerINTRINSIC_W_CHAIN(Op, DAG);
389	case ISD::INTRINSIC_VOID:
390	return lowerINTRINSIC_VOID(Op, DAG);
391	case ISD::BlockAddress:
392	return lowerBlockAddress(Op, DAG);
393	case ISD::JumpTable:
394	return lowerJumpTable(Op, DAG);
395	case ISD::SHL_PARTS:
396	return lowerShiftLeftParts(Op, DAG);
397	case ISD::SRA_PARTS:
398	return lowerShiftRightParts(Op, DAG, IsSRA: true);
399	case ISD::SRL_PARTS:
400	return lowerShiftRightParts(Op, DAG, IsSRA: false);
401	case ISD::ConstantPool:
402	return lowerConstantPool(Op, DAG);
403	case ISD::FP_TO_SINT:
404	return lowerFP_TO_SINT(Op, DAG);
405	case ISD::BITCAST:
406	return lowerBITCAST(Op, DAG);
407	case ISD::UINT_TO_FP:
408	return lowerUINT_TO_FP(Op, DAG);
409	case ISD::SINT_TO_FP:
410	return lowerSINT_TO_FP(Op, DAG);
411	case ISD::VASTART:
412	return lowerVASTART(Op, DAG);
413	case ISD::FRAMEADDR:
414	return lowerFRAMEADDR(Op, DAG);
415	case ISD::RETURNADDR:
416	return lowerRETURNADDR(Op, DAG);
417	case ISD::WRITE_REGISTER:
418	return lowerWRITE_REGISTER(Op, DAG);
419	case ISD::INSERT_VECTOR_ELT:
420	return lowerINSERT_VECTOR_ELT(Op, DAG);
421	case ISD::EXTRACT_VECTOR_ELT:
422	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
423	case ISD::BUILD_VECTOR:
424	return lowerBUILD_VECTOR(Op, DAG);
425	case ISD::VECTOR_SHUFFLE:
426	return lowerVECTOR_SHUFFLE(Op, DAG);
427	}
428	return SDValue ();
429	}
430
431	/// Determine whether a range fits a regular pattern of values.
432	/// This function accounts for the possibility of jumping over the End iterator.
433	template <typename ValType>
434	static bool
435	fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
436	unsigned CheckStride,
437	typename SmallVectorImpl<ValType>::const_iterator End,
438	ValType ExpectedIndex, unsigned ExpectedIndexStride) {
439	auto &I = Begin;
440
441	while (I != End) {
442	if (I != -`1` && I != ExpectedIndex)
443	return false;
444	ExpectedIndex += ExpectedIndexStride;
445
446	// Incrementing past End is undefined behaviour so we must increment one
447	// step at a time and check for End at each step.
448	for (unsigned n = `0`; n < CheckStride && I != End; ++n, ++I)
449	; // Empty loop body.
450	}
451	return true;
452	}
453
454	/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
455	///
456	/// VREPLVEI performs vector broadcast based on an element specified by an
457	/// integer immediate, with its mask being similar to:
458	/// <x, x, x, ...>
459	/// where x is any valid index.
460	///
461	/// When undef's appear in the mask they are treated as if they were whatever
462	/// value is necessary in order to fit the above form.
463	static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
464	MVT VT, SDValue V1, SDValue V2,
465	SelectionDAG &DAG) {
466	int SplatIndex = -`1`;
467	for (const auto &M : Mask) {
468	if (M != -`1`) {
469	SplatIndex = M;
470	break;
471	}
472	}
473
474	if (SplatIndex == -`1`)
475	return DAG.getUNDEF(VT);
476
477	assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
478	if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: `1`, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: `0`)) {
479	APInt Imm(`64`, SplatIndex);
480	return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
481	N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
482	}
483
484	return SDValue ();
485	}
486
487	/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
488	///
489	/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
490	/// elements according to a <4 x i2> constant (encoded as an integer immediate).
491	///
492	/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
493	/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
494	/// When undef's appear they are treated as if they were whatever value is
495	/// necessary in order to fit the above forms.
496	///
497	/// For example:
498	/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
499	/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
500	/// i32 7, i32 6, i32 5, i32 4>
501	/// is lowered to:
502	/// (VSHUF4I_H $v0, $v1, 27)
503	/// where the 27 comes from:
504	/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
505	static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
506	MVT VT, SDValue V1, SDValue V2,
507	SelectionDAG &DAG) {
508
509	// When the size is less than 4, lower cost instructions may be used.
510	if (Mask.size() < `4`)
511	return SDValue ();
512
513	int SubMask[`4`] = {-`1`, -`1`, -`1`, -`1`};
514	for (unsigned i = `0`; i < `4`; ++i) {
515	for (unsigned j = i; j < Mask.size(); j += `4`) {
516	int Idx = Mask [j];
517
518	// Convert from vector index to 4-element subvector index
519	// If an index refers to an element outside of the subvector then give up
520	if (Idx != -`1`) {
521	Idx -= `4` * (j / `4`);
522	if (Idx < `0` \|\| Idx >= `4`)
523	return SDValue ();
524	}
525
526	// If the mask has an undef, replace it with the current index.
527	// Note that it might still be undef if the current index is also undef
528	if (SubMask[i] == -`1`)
529	SubMask[i] = Idx;
530	// Check that non-undef values are the same as in the mask. If they
531	// aren't then give up
532	else if (Idx != -`1` && Idx != SubMask[i])
533	return SDValue ();
534	}
535	}
536
537	// Calculate the immediate. Replace any remaining undefs with zero
538	APInt Imm(`64`, `0`);
539	for (int i = `3`; i >= `0`; --i) {
540	int Idx = SubMask[i];
541
542	if (Idx == -`1`)
543	Idx = `0`;
544
545	Imm <<= `2`;
546	Imm \|= Idx & `0x3`;
547	}
548
549	return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
550	N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
551	}
552
553	/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
554	///
555	/// VPACKEV interleaves the even elements from each vector.
556	///
557	/// It is possible to lower into VPACKEV when the mask consists of two of the
558	/// following forms interleaved:
559	/// <0, 2, 4, ...>
560	/// <n, n+2, n+4, ...>
561	/// where n is the number of elements in the vector.
562	/// For example:
563	/// <0, 0, 2, 2, 4, 4, ...>
564	/// <0, n, 2, n+2, 4, n+4, ...>
565	///
566	/// When undef's appear in the mask they are treated as if they were whatever
567	/// value is necessary in order to fit the above forms.
568	static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
569	MVT VT, SDValue V1, SDValue V2,
570	SelectionDAG &DAG) {
571
572	const auto &Begin = Mask.begin();
573	const auto &End = Mask.end();
574	SDValue OriV1 = V1, OriV2 = V2;
575
576	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
577	V1 = OriV1;
578	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
579	V1 = OriV2;
580	else
581	return SDValue ();
582
583	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
584	V2 = OriV1;
585	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
586	V2 = OriV2;
587	else
588	return SDValue ();
589
590	return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
591	}
592
593	/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
594	///
595	/// VPACKOD interleaves the odd elements from each vector.
596	///
597	/// It is possible to lower into VPACKOD when the mask consists of two of the
598	/// following forms interleaved:
599	/// <1, 3, 5, ...>
600	/// <n+1, n+3, n+5, ...>
601	/// where n is the number of elements in the vector.
602	/// For example:
603	/// <1, 1, 3, 3, 5, 5, ...>
604	/// <1, n+1, 3, n+3, 5, n+5, ...>
605	///
606	/// When undef's appear in the mask they are treated as if they were whatever
607	/// value is necessary in order to fit the above forms.
608	static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
609	MVT VT, SDValue V1, SDValue V2,
610	SelectionDAG &DAG) {
611
612	const auto &Begin = Mask.begin();
613	const auto &End = Mask.end();
614	SDValue OriV1 = V1, OriV2 = V2;
615
616	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
617	V1 = OriV1;
618	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
619	V1 = OriV2;
620	else
621	return SDValue ();
622
623	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
624	V2 = OriV1;
625	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
626	V2 = OriV2;
627	else
628	return SDValue ();
629
630	return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
631	}
632
633	/// Lower VECTOR_SHUFFLE into VILVH (if possible).
634	///
635	/// VILVH interleaves consecutive elements from the left (highest-indexed) half
636	/// of each vector.
637	///
638	/// It is possible to lower into VILVH when the mask consists of two of the
639	/// following forms interleaved:
640	/// <x, x+1, x+2, ...>
641	/// <n+x, n+x+1, n+x+2, ...>
642	/// where n is the number of elements in the vector and x is half n.
643	/// For example:
644	/// <x, x, x+1, x+1, x+2, x+2, ...>
645	/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
646	///
647	/// When undef's appear in the mask they are treated as if they were whatever
648	/// value is necessary in order to fit the above forms.
649	static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
650	MVT VT, SDValue V1, SDValue V2,
651	SelectionDAG &DAG) {
652
653	const auto &Begin = Mask.begin();
654	const auto &End = Mask.end();
655	unsigned HalfSize = Mask.size() / `2`;
656	SDValue OriV1 = V1, OriV2 = V2;
657
658	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
659	V1 = OriV1;
660	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
661	V1 = OriV2;
662	else
663	return SDValue ();
664
665	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
666	V2 = OriV1;
667	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size() + HalfSize,
668	ExpectedIndexStride: `1`))
669	V2 = OriV2;
670	else
671	return SDValue ();
672
673	return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
674	}
675
676	/// Lower VECTOR_SHUFFLE into VILVL (if possible).
677	///
678	/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
679	/// of each vector.
680	///
681	/// It is possible to lower into VILVL when the mask consists of two of the
682	/// following forms interleaved:
683	/// <0, 1, 2, ...>
684	/// <n, n+1, n+2, ...>
685	/// where n is the number of elements in the vector.
686	/// For example:
687	/// <0, 0, 1, 1, 2, 2, ...>
688	/// <0, n, 1, n+1, 2, n+2, ...>
689	///
690	/// When undef's appear in the mask they are treated as if they were whatever
691	/// value is necessary in order to fit the above forms.
692	static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
693	MVT VT, SDValue V1, SDValue V2,
694	SelectionDAG &DAG) {
695
696	const auto &Begin = Mask.begin();
697	const auto &End = Mask.end();
698	SDValue OriV1 = V1, OriV2 = V2;
699
700	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `1`))
701	V1 = OriV1;
702	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`))
703	V1 = OriV2;
704	else
705	return SDValue ();
706
707	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `1`))
708	V2 = OriV1;
709	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`))
710	V2 = OriV2;
711	else
712	return SDValue ();
713
714	return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
715	}
716
717	/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
718	///
719	/// VPICKEV copies the even elements of each vector into the result vector.
720	///
721	/// It is possible to lower into VPICKEV when the mask consists of two of the
722	/// following forms concatenated:
723	/// <0, 2, 4, ...>
724	/// <n, n+2, n+4, ...>
725	/// where n is the number of elements in the vector.
726	/// For example:
727	/// <0, 2, 4, ..., 0, 2, 4, ...>
728	/// <0, 2, 4, ..., n, n+2, n+4, ...>
729	///
730	/// When undef's appear in the mask they are treated as if they were whatever
731	/// value is necessary in order to fit the above forms.
732	static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
733	MVT VT, SDValue V1, SDValue V2,
734	SelectionDAG &DAG) {
735
736	const auto &Begin = Mask.begin();
737	const auto &Mid = Mask.begin() + Mask.size() / `2`;
738	const auto &End = Mask.end();
739	SDValue OriV1 = V1, OriV2 = V2;
740
741	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
742	V1 = OriV1;
743	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
744	V1 = OriV2;
745	else
746	return SDValue ();
747
748	if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
749	V2 = OriV1;
750	else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
751	V2 = OriV2;
752
753	else
754	return SDValue ();
755
756	return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
757	}
758
759	/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
760	///
761	/// VPICKOD copies the odd elements of each vector into the result vector.
762	///
763	/// It is possible to lower into VPICKOD when the mask consists of two of the
764	/// following forms concatenated:
765	/// <1, 3, 5, ...>
766	/// <n+1, n+3, n+5, ...>
767	/// where n is the number of elements in the vector.
768	/// For example:
769	/// <1, 3, 5, ..., 1, 3, 5, ...>
770	/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
771	///
772	/// When undef's appear in the mask they are treated as if they were whatever
773	/// value is necessary in order to fit the above forms.
774	static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
775	MVT VT, SDValue V1, SDValue V2,
776	SelectionDAG &DAG) {
777
778	const auto &Begin = Mask.begin();
779	const auto &Mid = Mask.begin() + Mask.size() / `2`;
780	const auto &End = Mask.end();
781	SDValue OriV1 = V1, OriV2 = V2;
782
783	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
784	V1 = OriV1;
785	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
786	V1 = OriV2;
787	else
788	return SDValue ();
789
790	if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
791	V2 = OriV1;
792	else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
793	V2 = OriV2;
794	else
795	return SDValue ();
796
797	return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
798	}
799
800	/// Lower VECTOR_SHUFFLE into VSHUF.
801	///
802	/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
803	/// adding it as an operand to the resulting VSHUF.
804	static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
805	MVT VT, SDValue V1, SDValue V2,
806	SelectionDAG &DAG) {
807
808	SmallVector<SDValue, `16`> Ops;
809	for (auto M : Mask)
810	Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64));
811
812	EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
813	SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
814
815	// VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
816	// <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
817	// VSHF concatenates the vectors in a bitwise fashion:
818	// <0b00, 0b01> + <0b10, 0b11> ->
819	// 0b0100 + 0b1110 -> 0b01001110
820	// <0b10, 0b11, 0b00, 0b01>
821	// We must therefore swap the operands to get the correct result.
822	return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
823	}
824
825	/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
826	///
827	/// This routine breaks down the specific type of 128-bit shuffle and
828	/// dispatches to the lowering routines accordingly.
829	static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
830	SDValue V1, SDValue V2, SelectionDAG &DAG) {
831	assert((VT.SimpleTy == MVT::v16i8 \|\| VT.SimpleTy == MVT::v8i16 \|\|
832	VT.SimpleTy == MVT::v4i32 \|\| VT.SimpleTy == MVT::v2i64 \|\|
833	VT.SimpleTy == MVT::v4f32 \|\| VT.SimpleTy == MVT::v2f64) &&
834	"Vector type is unsupported for lsx!");
835	assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
836	"Two operands have different types!");
837	assert(VT.getVectorNumElements() == Mask.size() &&
838	"Unexpected mask size for shuffle!");
839	assert(Mask.size() % `2` == `0` && "Expected even mask size.");
840
841	SDValue Result;
842	// TODO: Add more comparison patterns.
843	if (V2.isUndef()) {
844	if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
845	return Result;
846	if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
847	return Result;
848
849	// TODO: This comment may be enabled in the future to better match the
850	// pattern for instruction selection.
851	/ V2 = V1; /
852	}
853
854	// It is recommended not to change the pattern comparison order for better
855	// performance.
856	if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
857	return Result;
858	if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
859	return Result;
860	if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
861	return Result;
862	if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
863	return Result;
864	if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
865	return Result;
866	if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
867	return Result;
868	if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
869	return Result;
870
871	return SDValue ();
872	}
873
874	/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
875	///
876	/// It is a XVREPLVEI when the mask is:
877	/// <x, x, x, ..., x+n, x+n, x+n, ...>
878	/// where the number of x is equal to n and n is half the length of vector.
879	///
880	/// When undef's appear in the mask they are treated as if they were whatever
881	/// value is necessary in order to fit the above form.
882	static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
883	ArrayRef<int> Mask, MVT VT,
884	SDValue V1, SDValue V2,
885	SelectionDAG &DAG) {
886	int SplatIndex = -`1`;
887	for (const auto &M : Mask) {
888	if (M != -`1`) {
889	SplatIndex = M;
890	break;
891	}
892	}
893
894	if (SplatIndex == -`1`)
895	return DAG.getUNDEF(VT);
896
897	const auto &Begin = Mask.begin();
898	const auto &End = Mask.end();
899	unsigned HalfSize = Mask.size() / `2`;
900
901	assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
902	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: `0`) &&
903	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `1`, End, ExpectedIndex: SplatIndex + HalfSize,
904	ExpectedIndexStride: `0`)) {
905	APInt Imm(`64`, SplatIndex);
906	return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
907	N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
908	}
909
910	return SDValue ();
911	}
912
913	/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
914	static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
915	MVT VT, SDValue V1, SDValue V2,
916	SelectionDAG &DAG) {
917	// When the size is less than or equal to 4, lower cost instructions may be
918	// used.
919	if (Mask.size() <= `4`)
920	return SDValue ();
921	return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
922	}
923
924	/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
925	static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
926	MVT VT, SDValue V1, SDValue V2,
927	SelectionDAG &DAG) {
928	return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
929	}
930
931	/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
932	static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
933	MVT VT, SDValue V1, SDValue V2,
934	SelectionDAG &DAG) {
935	return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
936	}
937
938	/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
939	static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
940	MVT VT, SDValue V1, SDValue V2,
941	SelectionDAG &DAG) {
942
943	const auto &Begin = Mask.begin();
944	const auto &End = Mask.end();
945	unsigned HalfSize = Mask.size() / `2`;
946	unsigned LeftSize = HalfSize / `2`;
947	SDValue OriV1 = V1, OriV2 = V2;
948
949	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
950	ExpectedIndexStride: `1`) &&
951	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: `1`))
952	V1 = OriV1;
953	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize,
954	ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: `1`) &&
955	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End,
956	ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: `1`))
957	V1 = OriV2;
958	else
959	return SDValue ();
960
961	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
962	ExpectedIndexStride: `1`) &&
963	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize + LeftSize,
964	ExpectedIndexStride: `1`))
965	V2 = OriV1;
966	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize,
967	ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: `1`) &&
968	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End,
969	ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: `1`))
970	V2 = OriV2;
971	else
972	return SDValue ();
973
974	return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
975	}
976
977	/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
978	static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
979	MVT VT, SDValue V1, SDValue V2,
980	SelectionDAG &DAG) {
981
982	const auto &Begin = Mask.begin();
983	const auto &End = Mask.end();
984	unsigned HalfSize = Mask.size() / `2`;
985	SDValue OriV1 = V1, OriV2 = V2;
986
987	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: `0`, ExpectedIndexStride: `1`) &&
988	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
989	V1 = OriV1;
990	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`) &&
991	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End,
992	ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
993	V1 = OriV2;
994	else
995	return SDValue ();
996
997	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: `0`, ExpectedIndexStride: `1`) &&
998	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
999	V2 = OriV1;
1000	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: Mask.size(),
1001	ExpectedIndexStride: `1`) &&
1002	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End,
1003	ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
1004	V2 = OriV2;
1005	else
1006	return SDValue ();
1007
1008	return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
1009	}
1010
1011	/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1012	static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1013	MVT VT, SDValue V1, SDValue V2,
1014	SelectionDAG &DAG) {
1015
1016	const auto &Begin = Mask.begin();
1017	const auto &LeftMid = Mask.begin() + Mask.size() / `4`;
1018	const auto &Mid = Mask.begin() + Mask.size() / `2`;
1019	const auto &RightMid = Mask.end() - Mask.size() / `4`;
1020	const auto &End = Mask.end();
1021	unsigned HalfSize = Mask.size() / `2`;
1022	SDValue OriV1 = V1, OriV2 = V2;
1023
1024	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: `0`, ExpectedIndexStride: `2`) &&
1025	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: `2`))
1026	V1 = OriV1;
1027	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`) &&
1028	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `2`))
1029	V1 = OriV2;
1030	else
1031	return SDValue ();
1032
1033	if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: `0`, ExpectedIndexStride: `2`) &&
1034	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `2`))
1035	V2 = OriV1;
1036	else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`) &&
1037	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `2`))
1038	V2 = OriV2;
1039
1040	else
1041	return SDValue ();
1042
1043	return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
1044	}
1045
1046	/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1047	static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1048	MVT VT, SDValue V1, SDValue V2,
1049	SelectionDAG &DAG) {
1050
1051	const auto &Begin = Mask.begin();
1052	const auto &LeftMid = Mask.begin() + Mask.size() / `4`;
1053	const auto &Mid = Mask.begin() + Mask.size() / `2`;
1054	const auto &RightMid = Mask.end() - Mask.size() / `4`;
1055	const auto &End = Mask.end();
1056	unsigned HalfSize = Mask.size() / `2`;
1057	SDValue OriV1 = V1, OriV2 = V2;
1058
1059	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: `1`, ExpectedIndexStride: `2`) &&
1060	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: HalfSize + `1`, ExpectedIndexStride: `2`))
1061	V1 = OriV1;
1062	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`) &&
1063	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + `1`,
1064	ExpectedIndexStride: `2`))
1065	V1 = OriV2;
1066	else
1067	return SDValue ();
1068
1069	if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: `1`, ExpectedIndexStride: `2`) &&
1070	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: HalfSize + `1`, ExpectedIndexStride: `2`))
1071	V2 = OriV1;
1072	else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`) &&
1073	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + HalfSize + `1`,
1074	ExpectedIndexStride: `2`))
1075	V2 = OriV2;
1076	else
1077	return SDValue ();
1078
1079	return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
1080	}
1081
1082	/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1083	static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1084	MVT VT, SDValue V1, SDValue V2,
1085	SelectionDAG &DAG) {
1086
1087	int MaskSize = Mask.size();
1088	int HalfSize = Mask.size() / `2`;
1089	const auto &Begin = Mask.begin();
1090	const auto &Mid = Mask.begin() + HalfSize;
1091	const auto &End = Mask.end();
1092
1093	// VECTOR_SHUFFLE concatenates the vectors:
1094	// <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1095	// shuffling ->
1096	// <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1097	//
1098	// XVSHUF concatenates the vectors:
1099	// <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1100	// shuffling ->
1101	// <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1102	SmallVector<SDValue, `8`> MaskAlloc;
1103	for (auto it = Begin; it < Mid; it++) {
1104	if (it < `0`) // UNDEF*
1105	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i64));
1106	else if ((it >= `0` && it < HalfSize) \|\|
1107	(it >= MaskSize && it <= MaskSize + HalfSize)) {
1108	int M = it < HalfSize ? it : *it - HalfSize;
1109	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
1110	} else
1111	return SDValue ();
1112	}
1113	assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1114
1115	for (auto it = Mid; it < End; it++) {
1116	if (it < `0`) // UNDEF*
1117	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i64));
1118	else if ((it >= HalfSize && it < MaskSize) \|\|
1119	(it >= MaskSize + HalfSize && it < MaskSize * `2`)) {
1120	int M = it < MaskSize ? it - HalfSize : *it - MaskSize;
1121	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
1122	} else
1123	return SDValue ();
1124	}
1125	assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1126
1127	EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1128	SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
1129	return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
1130	}
1131
1132	/// Shuffle vectors by lane to generate more optimized instructions.
1133	/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1134	///
1135	/// Therefore, except for the following four cases, other cases are regarded
1136	/// as cross-lane shuffles, where optimization is relatively limited.
1137	///
1138	/// - Shuffle high, low lanes of two inputs vector
1139	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1140	/// - Shuffle low, high lanes of two inputs vector
1141	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1142	/// - Shuffle low, low lanes of two inputs vector
1143	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1144	/// - Shuffle high, high lanes of two inputs vector
1145	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1146	///
1147	/// The first case is the closest to LoongArch instructions and the other
1148	/// cases need to be converted to it for processing.
1149	///
1150	/// This function may modify V1, V2 and Mask
1151	static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1152	MutableArrayRef<int> Mask, MVT VT,
1153	SDValue &V1, SDValue &V2,
1154	SelectionDAG &DAG) {
1155
1156	enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1157
1158	int MaskSize = Mask.size();
1159	int HalfSize = Mask.size() / `2`;
1160
1161	HalfMaskType preMask = None, postMask = None;
1162
1163	if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
1164	return M < `0` \|\| (M >= `0` && M < HalfSize) \|\|
1165	(M >= MaskSize && M < MaskSize + HalfSize);
1166	}))
1167	preMask = HighLaneTy;
1168	else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
1169	return M < `0` \|\| (M >= HalfSize && M < MaskSize) \|\|
1170	(M >= MaskSize + HalfSize && M < MaskSize * `2`);
1171	}))
1172	preMask = LowLaneTy;
1173
1174	if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
1175	return M < `0` \|\| (M >= `0` && M < HalfSize) \|\|
1176	(M >= MaskSize && M < MaskSize + HalfSize);
1177	}))
1178	postMask = HighLaneTy;
1179	else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
1180	return M < `0` \|\| (M >= HalfSize && M < MaskSize) \|\|
1181	(M >= MaskSize + HalfSize && M < MaskSize * `2`);
1182	}))
1183	postMask = LowLaneTy;
1184
1185	// The pre-half of mask is high lane type, and the post-half of mask
1186	// is low lane type, which is closest to the LoongArch instructions.
1187	//
1188	// Note: In the LoongArch architecture, the high lane of mask corresponds
1189	// to the lower 128-bit of vector register, and the low lane of mask
1190	// corresponds the higher 128-bit of vector register.
1191	if (preMask == HighLaneTy && postMask == LowLaneTy) {
1192	return;
1193	}
1194	if (preMask == LowLaneTy && postMask == HighLaneTy) {
1195	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
1196	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
1197	N2: DAG.getConstant(Val: `0b01001110`, DL, VT: MVT::i64));
1198	V1 = DAG.getBitcast(VT, V: V1);
1199
1200	if (!V2.isUndef()) {
1201	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
1202	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
1203	N2: DAG.getConstant(Val: `0b01001110`, DL, VT: MVT::i64));
1204	V2 = DAG.getBitcast(VT, V: V2);
1205	}
1206
1207	for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1208	it = it < `0` ? it : it - HalfSize;
1209	}
1210	for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1211	it = it < `0` ? it : it + HalfSize;
1212	}
1213	} else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1214	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
1215	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
1216	N2: DAG.getConstant(Val: `0b11101110`, DL, VT: MVT::i64));
1217	V1 = DAG.getBitcast(VT, V: V1);
1218
1219	if (!V2.isUndef()) {
1220	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
1221	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
1222	N2: DAG.getConstant(Val: `0b11101110`, DL, VT: MVT::i64));
1223	V2 = DAG.getBitcast(VT, V: V2);
1224	}
1225
1226	for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1227	it = it < `0` ? it : it - HalfSize;
1228	}
1229	} else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1230	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
1231	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
1232	N2: DAG.getConstant(Val: `0b01000100`, DL, VT: MVT::i64));
1233	V1 = DAG.getBitcast(VT, V: V1);
1234
1235	if (!V2.isUndef()) {
1236	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
1237	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
1238	N2: DAG.getConstant(Val: `0b01000100`, DL, VT: MVT::i64));
1239	V2 = DAG.getBitcast(VT, V: V2);
1240	}
1241
1242	for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1243	it = it < `0` ? it : it + HalfSize;
1244	}
1245	} else { // cross-lane
1246	return;
1247	}
1248	}
1249
1250	/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1251	///
1252	/// This routine breaks down the specific type of 256-bit shuffle and
1253	/// dispatches to the lowering routines accordingly.
1254	static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1255	SDValue V1, SDValue V2, SelectionDAG &DAG) {
1256	assert((VT.SimpleTy == MVT::v32i8 \|\| VT.SimpleTy == MVT::v16i16 \|\|
1257	VT.SimpleTy == MVT::v8i32 \|\| VT.SimpleTy == MVT::v4i64 \|\|
1258	VT.SimpleTy == MVT::v8f32 \|\| VT.SimpleTy == MVT::v4f64) &&
1259	"Vector type is unsupported for lasx!");
1260	assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1261	"Two operands have different types!");
1262	assert(VT.getVectorNumElements() == Mask.size() &&
1263	"Unexpected mask size for shuffle!");
1264	assert(Mask.size() % `2` == `0` && "Expected even mask size.");
1265	assert(Mask.size() >= `4` && "Mask size is less than 4.");
1266
1267	// canonicalize non cross-lane shuffle vector
1268	SmallVector<int> NewMask(Mask);
1269	canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG);
1270
1271	SDValue Result;
1272	// TODO: Add more comparison patterns.
1273	if (V2.isUndef()) {
1274	if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG)))
1275	return Result;
1276	if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG)))
1277	return Result;
1278
1279	// TODO: This comment may be enabled in the future to better match the
1280	// pattern for instruction selection.
1281	/ V2 = V1; /
1282	}
1283
1284	// It is recommended not to change the pattern comparison order for better
1285	// performance.
1286	if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
1287	return Result;
1288	if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
1289	return Result;
1290	if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG)))
1291	return Result;
1292	if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG)))
1293	return Result;
1294	if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
1295	return Result;
1296	if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
1297	return Result;
1298	if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
1299	return Result;
1300
1301	return SDValue ();
1302	}
1303
1304	SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1305	SelectionDAG &DAG) const {
1306	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
1307	ArrayRef<int> OrigMask = SVOp->getMask();
1308	SDValue V1 = Op.getOperand(i: `0`);
1309	SDValue V2 = Op.getOperand(i: `1`);
1310	MVT VT = Op.getSimpleValueType();
1311	int NumElements = VT.getVectorNumElements();
1312	SDLoc DL(Op);
1313
1314	bool V1IsUndef = V1.isUndef();
1315	bool V2IsUndef = V2.isUndef();
1316	if (V1IsUndef && V2IsUndef)
1317	return DAG.getUNDEF(VT);
1318
1319	// When we create a shuffle node we put the UNDEF node to second operand,
1320	// but in some cases the first operand may be transformed to UNDEF.
1321	// In this case we should just commute the node.
1322	if (V1IsUndef)
1323	return DAG.getCommutedVectorShuffle(SV: *SVOp);
1324
1325	// Check for non-undef masks pointing at an undef vector and make the masks
1326	// undef as well. This makes it easier to match the shuffle based solely on
1327	// the mask.
1328	if (V2IsUndef &&
1329	any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
1330	SmallVector<int, `8`> NewMask(OrigMask);
1331	for (int &M : NewMask)
1332	if (M >= NumElements)
1333	M = -`1`;
1334	return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
1335	}
1336
1337	// Check for illegal shuffle mask element index values.
1338	int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? `1` : `2`);
1339	(void)MaskUpperLimit;
1340	assert(llvm::all_of(OrigMask,
1341	[&](int M) { return -`1` <= M && M < MaskUpperLimit; }) &&
1342	"Out of bounds shuffle index");
1343
1344	// For each vector width, delegate to a specialized lowering routine.
1345	if (VT.is128BitVector())
1346	return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
1347
1348	if (VT.is256BitVector())
1349	return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
1350
1351	return SDValue ();
1352	}
1353
1354	static bool isConstantOrUndef(const SDValue Op) {
1355	if (Op ->isUndef())
1356	return true;
1357	if (isa<ConstantSDNode>(Val: Op))
1358	return true;
1359	if (isa<ConstantFPSDNode>(Val: Op))
1360	return true;
1361	return false;
1362	}
1363
1364	static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1365	for (unsigned i = `0`; i < Op->getNumOperands(); ++i)
1366	if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
1367	return true;
1368	return false;
1369	}
1370
1371	SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1372	SelectionDAG &DAG) const {
1373	BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
1374	EVT ResTy = Op ->getValueType(ResNo: `0`);
1375	SDLoc DL(Op);
1376	APInt SplatValue, SplatUndef;
1377	unsigned SplatBitSize;
1378	bool HasAnyUndefs;
1379	bool Is128Vec = ResTy.is128BitVector();
1380	bool Is256Vec = ResTy.is256BitVector();
1381
1382	if ((!Subtarget.hasExtLSX() \|\| !Is128Vec) &&
1383	(!Subtarget.hasExtLASX() \|\| !Is256Vec))
1384	return SDValue ();
1385
1386	if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1387	/MinSplatBits=/`8`) &&
1388	SplatBitSize <= `64`) {
1389	// We can only cope with 8, 16, 32, or 64-bit elements.
1390	if (SplatBitSize != `8` && SplatBitSize != `16` && SplatBitSize != `32` &&
1391	SplatBitSize != `64`)
1392	return SDValue ();
1393
1394	EVT ViaVecTy;
1395
1396	switch (SplatBitSize) {
1397	default:
1398	return SDValue ();
1399	case `8`:
1400	ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1401	break;
1402	case `16`:
1403	ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1404	break;
1405	case `32`:
1406	ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1407	break;
1408	case `64`:
1409	ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1410	break;
1411	}
1412
1413	// SelectionDAG::getConstant will promote SplatValue appropriately.
1414	SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
1415
1416	// Bitcast to the type we originally wanted.
1417	if (ViaVecTy != ResTy)
1418	Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc (Node), VT: ResTy, Operand: Result);
1419
1420	return Result;
1421	}
1422
1423	if (DAG.isSplatValue(V: Op, /AllowUndefs=/false))
1424	return Op;
1425
1426	if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
1427	// Use INSERT_VECTOR_ELT operations rather than expand to stores.
1428	// The resulting code is the same length as the expansion, but it doesn't
1429	// use memory operations.
1430	EVT ResTy = Node->getValueType(ResNo: `0`);
1431
1432	assert(ResTy.isVector());
1433
1434	unsigned NumElts = ResTy.getVectorNumElements();
1435	SDValue Vector = DAG.getUNDEF(VT: ResTy);
1436	for (unsigned i = `0`; i < NumElts; ++i) {
1437	Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
1438	N2: Node->getOperand(Num: i),
1439	N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
1440	}
1441	return Vector;
1442	}
1443
1444	return SDValue ();
1445	}
1446
1447	SDValue
1448	LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1449	SelectionDAG &DAG) const {
1450	EVT VecTy = Op ->getOperand(Num: `0`)->getValueType(ResNo: `0`);
1451	SDValue Idx = Op ->getOperand(Num: `1`);
1452	EVT EltTy = VecTy.getVectorElementType();
1453	unsigned NumElts = VecTy.getVectorNumElements();
1454
1455	if (isa<ConstantSDNode>(Val: Idx) &&
1456	(EltTy == MVT::i32 \|\| EltTy == MVT::i64 \|\| EltTy == MVT::f32 \|\|
1457	EltTy == MVT::f64 \|\| Idx ->getAsZExtVal() < NumElts / `2`))
1458	return Op;
1459
1460	return SDValue ();
1461	}
1462
1463	SDValue
1464	LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1465	SelectionDAG &DAG) const {
1466	if (isa<ConstantSDNode>(Val: Op ->getOperand(Num: `2`)))
1467	return Op;
1468	return SDValue ();
1469	}
1470
1471	SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1472	SelectionDAG &DAG) const {
1473	SDLoc DL(Op);
1474	SyncScope::ID FenceSSID =
1475	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
1476
1477	// singlethread fences only synchronize with signal handlers on the same
1478	// thread and thus only need to preserve instruction order, not actually
1479	// enforce memory ordering.
1480	if (FenceSSID == SyncScope::SingleThread)
1481	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1482	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
1483
1484	return Op;
1485	}
1486
1487	SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1488	SelectionDAG &DAG) const {
1489
1490	if (Subtarget.is64Bit() && Op.getOperand(i: `2`).getValueType() == MVT::i32) {
1491	DAG.getContext()->emitError(
1492	ErrorStr: "On LA64, only 64-bit registers can be written.");
1493	return Op.getOperand(i: `0`);
1494	}
1495
1496	if (!Subtarget.is64Bit() && Op.getOperand(i: `2`).getValueType() == MVT::i64) {
1497	DAG.getContext()->emitError(
1498	ErrorStr: "On LA32, only 32-bit registers can be written.");
1499	return Op.getOperand(i: `0`);
1500	}
1501
1502	return Op;
1503	}
1504
1505	SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1506	SelectionDAG &DAG) const {
1507	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
1508	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
1509	"be a constant integer");
1510	return SDValue ();
1511	}
1512
1513	MachineFunction &MF = DAG.getMachineFunction();
1514	MF.getFrameInfo().setFrameAddressIsTaken(true);
1515	Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1516	EVT VT = Op.getValueType();
1517	SDLoc DL(Op);
1518	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
1519	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1520	int GRLenInBytes = Subtarget.getGRLen() / `8`;
1521
1522	while (Depth--) {
1523	int Offset = -(GRLenInBytes * `2`);
1524	SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
1525	N2: DAG.getIntPtrConstant(Val: Offset, DL));
1526	FrameAddr =
1527	DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo ());
1528	}
1529	return FrameAddr;
1530	}
1531
1532	SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1533	SelectionDAG &DAG) const {
1534	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1535	return SDValue ();
1536
1537	// Currently only support lowering return address for current frame.
1538	if (Op.getConstantOperandVal(i: `0`) != `0`) {
1539	DAG.getContext()->emitError(
1540	ErrorStr: "return address can only be determined for the current frame");
1541	return SDValue ();
1542	}
1543
1544	MachineFunction &MF = DAG.getMachineFunction();
1545	MF.getFrameInfo().setReturnAddressIsTaken(true);
1546	MVT GRLenVT = Subtarget.getGRLenVT();
1547
1548	// Return the value of the return address register, marking it an implicit
1549	// live-in.
1550	Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
1551	RC: getRegClassFor(VT: GRLenVT));
1552	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc (Op), Reg, VT: GRLenVT);
1553	}
1554
1555	SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1556	SelectionDAG &DAG) const {
1557	MachineFunction &MF = DAG.getMachineFunction();
1558	auto Size = Subtarget.getGRLen() / `8`;
1559	auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: `0`, IsImmutable: false);
1560	return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
1561	}
1562
1563	SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1564	SelectionDAG &DAG) const {
1565	MachineFunction &MF = DAG.getMachineFunction();
1566	auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1567
1568	SDLoc DL(Op);
1569	SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
1570	VT: getPointerTy(DL: MF.getDataLayout()));
1571
1572	// vastart just stores the address of the VarArgsFrameIndex slot into the
1573	// memory location argument.
1574	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
1575	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: FI, Ptr: Op.getOperand(i: `1`),
1576	PtrInfo: MachinePointerInfo (SV));
1577	}
1578
1579	SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1580	SelectionDAG &DAG) const {
1581	assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1582	!Subtarget.hasBasicD() && "unexpected target features");
1583
1584	SDLoc DL(Op);
1585	SDValue Op0 = Op.getOperand(i: `0`);
1586	if (Op0 ->getOpcode() == ISD::AND) {
1587	auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: `1`));
1588	if (C && C->getZExtValue() < UINT64_C(`0xFFFFFFFF`))
1589	return Op;
1590	}
1591
1592	if (Op0 ->getOpcode() == LoongArchISD::BSTRPICK &&
1593	Op0.getConstantOperandVal(i: `1`) < UINT64_C(`0X1F`) &&
1594	Op0.getConstantOperandVal(i: `2`) == UINT64_C(`0`))
1595	return Op;
1596
1597	if (Op0.getOpcode() == ISD::AssertZext &&
1598	dyn_cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT().bitsLT(VT: MVT::i32))
1599	return Op;
1600
1601	EVT OpVT = Op0.getValueType();
1602	EVT RetVT = Op.getValueType();
1603	RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1604	MakeLibCallOptions CallOptions;
1605	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
1606	SDValue Chain = SDValue ();
1607	SDValue Result;
1608	std::tie(args&: Result, args&: Chain) =
1609	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
1610	return Result;
1611	}
1612
1613	SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1614	SelectionDAG &DAG) const {
1615	assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1616	!Subtarget.hasBasicD() && "unexpected target features");
1617
1618	SDLoc DL(Op);
1619	SDValue Op0 = Op.getOperand(i: `0`);
1620
1621	if ((Op0.getOpcode() == ISD::AssertSext \|\|
1622	Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1623	dyn_cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT().bitsLE(VT: MVT::i32))
1624	return Op;
1625
1626	EVT OpVT = Op0.getValueType();
1627	EVT RetVT = Op.getValueType();
1628	RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1629	MakeLibCallOptions CallOptions;
1630	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
1631	SDValue Chain = SDValue ();
1632	SDValue Result;
1633	std::tie(args&: Result, args&: Chain) =
1634	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
1635	return Result;
1636	}
1637
1638	SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1639	SelectionDAG &DAG) const {
1640
1641	SDLoc DL(Op);
1642	SDValue Op0 = Op.getOperand(i: `0`);
1643
1644	if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1645	Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1646	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
1647	return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
1648	}
1649	return Op;
1650	}
1651
1652	SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1653	SelectionDAG &DAG) const {
1654
1655	SDLoc DL(Op);
1656
1657	if (Op.getValueSizeInBits() > `32` && Subtarget.hasBasicF() &&
1658	!Subtarget.hasBasicD()) {
1659	SDValue Dst =
1660	DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op.getOperand(i: `0`));
1661	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
1662	}
1663
1664	EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
1665	SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op.getOperand(i: `0`));
1666	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
1667	}
1668
1669	static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1670	SelectionDAG &DAG, unsigned Flags) {
1671	return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: Flags);
1672	}
1673
1674	static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1675	SelectionDAG &DAG, unsigned Flags) {
1676	return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
1677	TargetFlags: Flags);
1678	}
1679
1680	static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1681	SelectionDAG &DAG, unsigned Flags) {
1682	return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
1683	Offset: N->getOffset(), TargetFlags: Flags);
1684	}
1685
1686	static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1687	SelectionDAG &DAG, unsigned Flags) {
1688	return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
1689	}
1690
1691	template <class NodeTy>
1692	SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1693	CodeModel::Model M,
1694	bool IsLocal) const {
1695	SDLoc DL(N);
1696	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1697	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
1698	SDValue Load;
1699
1700	switch (M) {
1701	default:
1702	report_fatal_error(reason: "Unsupported code model");
1703
1704	case CodeModel::Large: {
1705	assert(Subtarget.is64Bit() && "Large code model requires LA64");
1706
1707	// This is not actually used, but is necessary for successfully matching
1708	// the PseudoLA__LARGE nodes.*
1709	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
1710	if (IsLocal) {
1711	// This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1712	// eventually becomes the desired 5-insn code sequence.
1713	Load = SDValue (DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
1714	Op1: Tmp, Op2: Addr),
1715	`0`);
1716	} else {
1717	// This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1718	// eventually becomes the desired 5-insn code sequence.
1719	Load = SDValue (
1720	DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
1721	`0`);
1722	}
1723	break;
1724	}
1725
1726	case CodeModel::Small:
1727	case CodeModel::Medium:
1728	if (IsLocal) {
1729	// This generates the pattern (PseudoLA_PCREL sym), which expands to
1730	// (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1731	Load = SDValue (
1732	DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), `0`);
1733	} else {
1734	// This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1735	// (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1736	Load =
1737	SDValue (DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), `0`);
1738	}
1739	}
1740
1741	if (!IsLocal) {
1742	// Mark the load instruction as invariant to enable hoisting in MachineLICM.
1743	MachineFunction &MF = DAG.getMachineFunction();
1744	MachineMemOperand *MemOp = MF.getMachineMemOperand(
1745	PtrInfo: MachinePointerInfo::getGOT(MF),
1746	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
1747	MachineMemOperand::MOInvariant,
1748	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
1749	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
1750	}
1751
1752	return Load;
1753	}
1754
1755	SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1756	SelectionDAG &DAG) const {
1757	return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
1758	M: DAG.getTarget().getCodeModel());
1759	}
1760
1761	SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1762	SelectionDAG &DAG) const {
1763	return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
1764	M: DAG.getTarget().getCodeModel());
1765	}
1766
1767	SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1768	SelectionDAG &DAG) const {
1769	return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
1770	M: DAG.getTarget().getCodeModel());
1771	}
1772
1773	SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1774	SelectionDAG &DAG) const {
1775	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
1776	assert(N->getOffset() == `0` && "unexpected offset in global node");
1777	auto CM = DAG.getTarget().getCodeModel();
1778	const GlobalValue *GV = N->getGlobal();
1779
1780	if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
1781	if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
1782	CM = *GCM;
1783	}
1784
1785	return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
1786	}
1787
1788	SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1789	SelectionDAG &DAG,
1790	unsigned Opc, bool UseGOT,
1791	bool Large) const {
1792	SDLoc DL(N);
1793	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1794	MVT GRLenVT = Subtarget.getGRLenVT();
1795
1796	// This is not actually used, but is necessary for successfully matching the
1797	// PseudoLA__LARGE nodes.*
1798	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
1799	SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: `0`);
1800	SDValue Offset = Large
1801	? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
1802	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
1803	if (UseGOT) {
1804	// Mark the load instruction as invariant to enable hoisting in MachineLICM.
1805	MachineFunction &MF = DAG.getMachineFunction();
1806	MachineMemOperand *MemOp = MF.getMachineMemOperand(
1807	PtrInfo: MachinePointerInfo::getGOT(MF),
1808	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
1809	MachineMemOperand::MOInvariant,
1810	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
1811	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
1812	}
1813
1814	// Add the thread pointer.
1815	return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
1816	N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
1817	}
1818
1819	SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1820	SelectionDAG &DAG,
1821	unsigned Opc,
1822	bool Large) const {
1823	SDLoc DL(N);
1824	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1825	IntegerType CallTy = Type::getIntNTy(C&: DAG.getContext(), N: Ty.getSizeInBits());
1826
1827	// This is not actually used, but is necessary for successfully matching the
1828	// PseudoLA__LARGE nodes.*
1829	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
1830
1831	// Use a PC-relative addressing mode to access the dynamic GOT address.
1832	SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: `0`);
1833	SDValue Load = Large ? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
1834	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
1835
1836	// Prepare argument list to generate call.
1837	ArgListTy Args;
1838	ArgListEntry Entry;
1839	Entry.Node = Load;
1840	Entry.Ty = CallTy;
1841	Args.push_back(x: Entry);
1842
1843	// Setup call to __tls_get_addr.
1844	TargetLowering::CallLoweringInfo CLI(DAG);
1845	CLI.setDebugLoc(DL)
1846	.setChain(DAG.getEntryNode())
1847	.setLibCallee(CC: CallingConv::C, ResultType: CallTy,
1848	Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
1849	ArgsList: std::move(Args));
1850
1851	return LowerCallTo(CLI).first;
1852	}
1853
1854	SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1855	SelectionDAG &DAG, unsigned Opc,
1856	bool Large) const {
1857	SDLoc DL(N);
1858	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1859	const GlobalValue *GV = N->getGlobal();
1860
1861	// This is not actually used, but is necessary for successfully matching the
1862	// PseudoLA__LARGE nodes.*
1863	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
1864
1865	// Use a PC-relative addressing mode to access the global dynamic GOT address.
1866	// This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1867	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
1868	return Large ? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
1869	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
1870	}
1871
1872	SDValue
1873	LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1874	SelectionDAG &DAG) const {
1875	if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1876	CallingConv::GHC)
1877	report_fatal_error(reason: "In GHC calling convention TLS is not supported");
1878
1879	bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1880	assert((!Large \|\| Subtarget.is64Bit()) && "Large code model requires LA64");
1881
1882	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
1883	assert(N->getOffset() == `0` && "unexpected offset in global node");
1884
1885	if (DAG.getTarget().useEmulatedTLS())
1886	report_fatal_error(reason: "the emulated TLS is prohibited",
1887	/GenCrashDiag=/gen_crash_diag: false);
1888
1889	bool IsDesc = DAG.getTarget().useTLSDESC();
1890
1891	switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
1892	case TLSModel::GeneralDynamic:
1893	// In this model, application code calls the dynamic linker function
1894	// __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1895	// runtime.
1896	if (!IsDesc)
1897	return getDynamicTLSAddr(N, DAG,
1898	Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1899	: LoongArch::PseudoLA_TLS_GD,
1900	Large);
1901	break;
1902	case TLSModel::LocalDynamic:
1903	// Same as GeneralDynamic, except for assembly modifiers and relocation
1904	// records.
1905	if (!IsDesc)
1906	return getDynamicTLSAddr(N, DAG,
1907	Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1908	: LoongArch::PseudoLA_TLS_LD,
1909	Large);
1910	break;
1911	case TLSModel::InitialExec:
1912	// This model uses the GOT to resolve TLS offsets.
1913	return getStaticTLSAddr(N, DAG,
1914	Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1915	: LoongArch::PseudoLA_TLS_IE,
1916	/UseGOT=/true, Large);
1917	case TLSModel::LocalExec:
1918	// This model is used when static linking as the TLS offsets are resolved
1919	// during program linking.
1920	//
1921	// This node doesn't need an extra argument for the large code model.
1922	return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
1923	/UseGOT=/false);
1924	}
1925
1926	return getTLSDescAddr(N, DAG,
1927	Opc: Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1928	: LoongArch::PseudoLA_TLS_DESC_PC,
1929	Large);
1930	}
1931
1932	template <unsigned N>
1933	static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
1934	SelectionDAG &DAG, bool IsSigned = false) {
1935	auto *CImm = cast<ConstantSDNode>(Val: Op ->getOperand(Num: ImmOp));
1936	// Check the ImmArg.
1937	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
1938	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1939	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) +
1940	": argument out of range.");
1941	return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc (Op), VT: Op.getValueType());
1942	}
1943	return SDValue ();
1944	}
1945
1946	SDValue
1947	LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1948	SelectionDAG &DAG) const {
1949	SDLoc DL(Op);
1950	switch (Op.getConstantOperandVal(i: `0`)) {
1951	default:
1952	return SDValue (); // Don't custom lower most intrinsics.
1953	case Intrinsic::thread_pointer: {
1954	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1955	return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
1956	}
1957	case Intrinsic::loongarch_lsx_vpickve2gr_d:
1958	case Intrinsic::loongarch_lsx_vpickve2gr_du:
1959	case Intrinsic::loongarch_lsx_vreplvei_d:
1960	case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1961	return checkIntrinsicImmArg<`1`>(Op, ImmOp: `2`, DAG);
1962	case Intrinsic::loongarch_lsx_vreplvei_w:
1963	case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1964	case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1965	case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1966	case Intrinsic::loongarch_lasx_xvpickve_d:
1967	case Intrinsic::loongarch_lasx_xvpickve_d_f:
1968	return checkIntrinsicImmArg<`2`>(Op, ImmOp: `2`, DAG);
1969	case Intrinsic::loongarch_lasx_xvinsve0_d:
1970	return checkIntrinsicImmArg<`2`>(Op, ImmOp: `3`, DAG);
1971	case Intrinsic::loongarch_lsx_vsat_b:
1972	case Intrinsic::loongarch_lsx_vsat_bu:
1973	case Intrinsic::loongarch_lsx_vrotri_b:
1974	case Intrinsic::loongarch_lsx_vsllwil_h_b:
1975	case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1976	case Intrinsic::loongarch_lsx_vsrlri_b:
1977	case Intrinsic::loongarch_lsx_vsrari_b:
1978	case Intrinsic::loongarch_lsx_vreplvei_h:
1979	case Intrinsic::loongarch_lasx_xvsat_b:
1980	case Intrinsic::loongarch_lasx_xvsat_bu:
1981	case Intrinsic::loongarch_lasx_xvrotri_b:
1982	case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1983	case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1984	case Intrinsic::loongarch_lasx_xvsrlri_b:
1985	case Intrinsic::loongarch_lasx_xvsrari_b:
1986	case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1987	case Intrinsic::loongarch_lasx_xvpickve_w:
1988	case Intrinsic::loongarch_lasx_xvpickve_w_f:
1989	return checkIntrinsicImmArg<`3`>(Op, ImmOp: `2`, DAG);
1990	case Intrinsic::loongarch_lasx_xvinsve0_w:
1991	return checkIntrinsicImmArg<`3`>(Op, ImmOp: `3`, DAG);
1992	case Intrinsic::loongarch_lsx_vsat_h:
1993	case Intrinsic::loongarch_lsx_vsat_hu:
1994	case Intrinsic::loongarch_lsx_vrotri_h:
1995	case Intrinsic::loongarch_lsx_vsllwil_w_h:
1996	case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1997	case Intrinsic::loongarch_lsx_vsrlri_h:
1998	case Intrinsic::loongarch_lsx_vsrari_h:
1999	case Intrinsic::loongarch_lsx_vreplvei_b:
2000	case Intrinsic::loongarch_lasx_xvsat_h:
2001	case Intrinsic::loongarch_lasx_xvsat_hu:
2002	case Intrinsic::loongarch_lasx_xvrotri_h:
2003	case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2004	case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2005	case Intrinsic::loongarch_lasx_xvsrlri_h:
2006	case Intrinsic::loongarch_lasx_xvsrari_h:
2007	case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2008	return checkIntrinsicImmArg<`4`>(Op, ImmOp: `2`, DAG);
2009	case Intrinsic::loongarch_lsx_vsrlni_b_h:
2010	case Intrinsic::loongarch_lsx_vsrani_b_h:
2011	case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2012	case Intrinsic::loongarch_lsx_vsrarni_b_h:
2013	case Intrinsic::loongarch_lsx_vssrlni_b_h:
2014	case Intrinsic::loongarch_lsx_vssrani_b_h:
2015	case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2016	case Intrinsic::loongarch_lsx_vssrani_bu_h:
2017	case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2018	case Intrinsic::loongarch_lsx_vssrarni_b_h:
2019	case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2020	case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2021	case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2022	case Intrinsic::loongarch_lasx_xvsrani_b_h:
2023	case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2024	case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2025	case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2026	case Intrinsic::loongarch_lasx_xvssrani_b_h:
2027	case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2028	case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2029	case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2030	case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2031	case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2032	case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2033	return checkIntrinsicImmArg<`4`>(Op, ImmOp: `3`, DAG);
2034	case Intrinsic::loongarch_lsx_vsat_w:
2035	case Intrinsic::loongarch_lsx_vsat_wu:
2036	case Intrinsic::loongarch_lsx_vrotri_w:
2037	case Intrinsic::loongarch_lsx_vsllwil_d_w:
2038	case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2039	case Intrinsic::loongarch_lsx_vsrlri_w:
2040	case Intrinsic::loongarch_lsx_vsrari_w:
2041	case Intrinsic::loongarch_lsx_vslei_bu:
2042	case Intrinsic::loongarch_lsx_vslei_hu:
2043	case Intrinsic::loongarch_lsx_vslei_wu:
2044	case Intrinsic::loongarch_lsx_vslei_du:
2045	case Intrinsic::loongarch_lsx_vslti_bu:
2046	case Intrinsic::loongarch_lsx_vslti_hu:
2047	case Intrinsic::loongarch_lsx_vslti_wu:
2048	case Intrinsic::loongarch_lsx_vslti_du:
2049	case Intrinsic::loongarch_lsx_vbsll_v:
2050	case Intrinsic::loongarch_lsx_vbsrl_v:
2051	case Intrinsic::loongarch_lasx_xvsat_w:
2052	case Intrinsic::loongarch_lasx_xvsat_wu:
2053	case Intrinsic::loongarch_lasx_xvrotri_w:
2054	case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2055	case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2056	case Intrinsic::loongarch_lasx_xvsrlri_w:
2057	case Intrinsic::loongarch_lasx_xvsrari_w:
2058	case Intrinsic::loongarch_lasx_xvslei_bu:
2059	case Intrinsic::loongarch_lasx_xvslei_hu:
2060	case Intrinsic::loongarch_lasx_xvslei_wu:
2061	case Intrinsic::loongarch_lasx_xvslei_du:
2062	case Intrinsic::loongarch_lasx_xvslti_bu:
2063	case Intrinsic::loongarch_lasx_xvslti_hu:
2064	case Intrinsic::loongarch_lasx_xvslti_wu:
2065	case Intrinsic::loongarch_lasx_xvslti_du:
2066	case Intrinsic::loongarch_lasx_xvbsll_v:
2067	case Intrinsic::loongarch_lasx_xvbsrl_v:
2068	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `2`, DAG);
2069	case Intrinsic::loongarch_lsx_vseqi_b:
2070	case Intrinsic::loongarch_lsx_vseqi_h:
2071	case Intrinsic::loongarch_lsx_vseqi_w:
2072	case Intrinsic::loongarch_lsx_vseqi_d:
2073	case Intrinsic::loongarch_lsx_vslei_b:
2074	case Intrinsic::loongarch_lsx_vslei_h:
2075	case Intrinsic::loongarch_lsx_vslei_w:
2076	case Intrinsic::loongarch_lsx_vslei_d:
2077	case Intrinsic::loongarch_lsx_vslti_b:
2078	case Intrinsic::loongarch_lsx_vslti_h:
2079	case Intrinsic::loongarch_lsx_vslti_w:
2080	case Intrinsic::loongarch_lsx_vslti_d:
2081	case Intrinsic::loongarch_lasx_xvseqi_b:
2082	case Intrinsic::loongarch_lasx_xvseqi_h:
2083	case Intrinsic::loongarch_lasx_xvseqi_w:
2084	case Intrinsic::loongarch_lasx_xvseqi_d:
2085	case Intrinsic::loongarch_lasx_xvslei_b:
2086	case Intrinsic::loongarch_lasx_xvslei_h:
2087	case Intrinsic::loongarch_lasx_xvslei_w:
2088	case Intrinsic::loongarch_lasx_xvslei_d:
2089	case Intrinsic::loongarch_lasx_xvslti_b:
2090	case Intrinsic::loongarch_lasx_xvslti_h:
2091	case Intrinsic::loongarch_lasx_xvslti_w:
2092	case Intrinsic::loongarch_lasx_xvslti_d:
2093	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `2`, DAG, /IsSigned=/true);
2094	case Intrinsic::loongarch_lsx_vsrlni_h_w:
2095	case Intrinsic::loongarch_lsx_vsrani_h_w:
2096	case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2097	case Intrinsic::loongarch_lsx_vsrarni_h_w:
2098	case Intrinsic::loongarch_lsx_vssrlni_h_w:
2099	case Intrinsic::loongarch_lsx_vssrani_h_w:
2100	case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2101	case Intrinsic::loongarch_lsx_vssrani_hu_w:
2102	case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2103	case Intrinsic::loongarch_lsx_vssrarni_h_w:
2104	case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2105	case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2106	case Intrinsic::loongarch_lsx_vfrstpi_b:
2107	case Intrinsic::loongarch_lsx_vfrstpi_h:
2108	case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2109	case Intrinsic::loongarch_lasx_xvsrani_h_w:
2110	case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2111	case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2112	case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2113	case Intrinsic::loongarch_lasx_xvssrani_h_w:
2114	case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2115	case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2116	case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2117	case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2118	case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2119	case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2120	case Intrinsic::loongarch_lasx_xvfrstpi_b:
2121	case Intrinsic::loongarch_lasx_xvfrstpi_h:
2122	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `3`, DAG);
2123	case Intrinsic::loongarch_lsx_vsat_d:
2124	case Intrinsic::loongarch_lsx_vsat_du:
2125	case Intrinsic::loongarch_lsx_vrotri_d:
2126	case Intrinsic::loongarch_lsx_vsrlri_d:
2127	case Intrinsic::loongarch_lsx_vsrari_d:
2128	case Intrinsic::loongarch_lasx_xvsat_d:
2129	case Intrinsic::loongarch_lasx_xvsat_du:
2130	case Intrinsic::loongarch_lasx_xvrotri_d:
2131	case Intrinsic::loongarch_lasx_xvsrlri_d:
2132	case Intrinsic::loongarch_lasx_xvsrari_d:
2133	return checkIntrinsicImmArg<`6`>(Op, ImmOp: `2`, DAG);
2134	case Intrinsic::loongarch_lsx_vsrlni_w_d:
2135	case Intrinsic::loongarch_lsx_vsrani_w_d:
2136	case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2137	case Intrinsic::loongarch_lsx_vsrarni_w_d:
2138	case Intrinsic::loongarch_lsx_vssrlni_w_d:
2139	case Intrinsic::loongarch_lsx_vssrani_w_d:
2140	case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2141	case Intrinsic::loongarch_lsx_vssrani_wu_d:
2142	case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2143	case Intrinsic::loongarch_lsx_vssrarni_w_d:
2144	case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2145	case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2146	case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2147	case Intrinsic::loongarch_lasx_xvsrani_w_d:
2148	case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2149	case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2150	case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2151	case Intrinsic::loongarch_lasx_xvssrani_w_d:
2152	case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2153	case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2154	case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2155	case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2156	case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2157	case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2158	return checkIntrinsicImmArg<`6`>(Op, ImmOp: `3`, DAG);
2159	case Intrinsic::loongarch_lsx_vsrlni_d_q:
2160	case Intrinsic::loongarch_lsx_vsrani_d_q:
2161	case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2162	case Intrinsic::loongarch_lsx_vsrarni_d_q:
2163	case Intrinsic::loongarch_lsx_vssrlni_d_q:
2164	case Intrinsic::loongarch_lsx_vssrani_d_q:
2165	case Intrinsic::loongarch_lsx_vssrlni_du_q:
2166	case Intrinsic::loongarch_lsx_vssrani_du_q:
2167	case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2168	case Intrinsic::loongarch_lsx_vssrarni_d_q:
2169	case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2170	case Intrinsic::loongarch_lsx_vssrarni_du_q:
2171	case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2172	case Intrinsic::loongarch_lasx_xvsrani_d_q:
2173	case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2174	case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2175	case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2176	case Intrinsic::loongarch_lasx_xvssrani_d_q:
2177	case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2178	case Intrinsic::loongarch_lasx_xvssrani_du_q:
2179	case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2180	case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2181	case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2182	case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2183	return checkIntrinsicImmArg<`7`>(Op, ImmOp: `3`, DAG);
2184	case Intrinsic::loongarch_lsx_vnori_b:
2185	case Intrinsic::loongarch_lsx_vshuf4i_b:
2186	case Intrinsic::loongarch_lsx_vshuf4i_h:
2187	case Intrinsic::loongarch_lsx_vshuf4i_w:
2188	case Intrinsic::loongarch_lasx_xvnori_b:
2189	case Intrinsic::loongarch_lasx_xvshuf4i_b:
2190	case Intrinsic::loongarch_lasx_xvshuf4i_h:
2191	case Intrinsic::loongarch_lasx_xvshuf4i_w:
2192	case Intrinsic::loongarch_lasx_xvpermi_d:
2193	return checkIntrinsicImmArg<`8`>(Op, ImmOp: `2`, DAG);
2194	case Intrinsic::loongarch_lsx_vshuf4i_d:
2195	case Intrinsic::loongarch_lsx_vpermi_w:
2196	case Intrinsic::loongarch_lsx_vbitseli_b:
2197	case Intrinsic::loongarch_lsx_vextrins_b:
2198	case Intrinsic::loongarch_lsx_vextrins_h:
2199	case Intrinsic::loongarch_lsx_vextrins_w:
2200	case Intrinsic::loongarch_lsx_vextrins_d:
2201	case Intrinsic::loongarch_lasx_xvshuf4i_d:
2202	case Intrinsic::loongarch_lasx_xvpermi_w:
2203	case Intrinsic::loongarch_lasx_xvpermi_q:
2204	case Intrinsic::loongarch_lasx_xvbitseli_b:
2205	case Intrinsic::loongarch_lasx_xvextrins_b:
2206	case Intrinsic::loongarch_lasx_xvextrins_h:
2207	case Intrinsic::loongarch_lasx_xvextrins_w:
2208	case Intrinsic::loongarch_lasx_xvextrins_d:
2209	return checkIntrinsicImmArg<`8`>(Op, ImmOp: `3`, DAG);
2210	case Intrinsic::loongarch_lsx_vrepli_b:
2211	case Intrinsic::loongarch_lsx_vrepli_h:
2212	case Intrinsic::loongarch_lsx_vrepli_w:
2213	case Intrinsic::loongarch_lsx_vrepli_d:
2214	case Intrinsic::loongarch_lasx_xvrepli_b:
2215	case Intrinsic::loongarch_lasx_xvrepli_h:
2216	case Intrinsic::loongarch_lasx_xvrepli_w:
2217	case Intrinsic::loongarch_lasx_xvrepli_d:
2218	return checkIntrinsicImmArg<`10`>(Op, ImmOp: `1`, DAG, /IsSigned=/true);
2219	case Intrinsic::loongarch_lsx_vldi:
2220	case Intrinsic::loongarch_lasx_xvldi:
2221	return checkIntrinsicImmArg<`13`>(Op, ImmOp: `1`, DAG, /IsSigned=/true);
2222	}
2223	}
2224
2225	// Helper function that emits error message for intrinsics with chain and return
2226	// merge values of a UNDEF and the chain.
2227	static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2228	StringRef ErrorMsg,
2229	SelectionDAG &DAG) {
2230	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
2231	return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: `0`)},
2232	dl: SDLoc (Op));
2233	}
2234
2235	SDValue
2236	LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2237	SelectionDAG &DAG) const {
2238	SDLoc DL(Op);
2239	MVT GRLenVT = Subtarget.getGRLenVT();
2240	EVT VT = Op.getValueType();
2241	SDValue Chain = Op.getOperand(i: `0`);
2242	const StringRef ErrorMsgOOR = "argument out of range";
2243	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2244	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2245
2246	switch (Op.getConstantOperandVal(i: `1`)) {
2247	default:
2248	return Op;
2249	case Intrinsic::loongarch_crc_w_b_w:
2250	case Intrinsic::loongarch_crc_w_h_w:
2251	case Intrinsic::loongarch_crc_w_w_w:
2252	case Intrinsic::loongarch_crc_w_d_w:
2253	case Intrinsic::loongarch_crcc_w_b_w:
2254	case Intrinsic::loongarch_crcc_w_h_w:
2255	case Intrinsic::loongarch_crcc_w_w_w:
2256	case Intrinsic::loongarch_crcc_w_d_w:
2257	return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
2258	case Intrinsic::loongarch_csrrd_w:
2259	case Intrinsic::loongarch_csrrd_d: {
2260	unsigned Imm = Op.getConstantOperandVal(i: `2`);
2261	return !isUInt<`14`>(x: Imm)
2262	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2263	: DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
2264	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2265	}
2266	case Intrinsic::loongarch_csrwr_w:
2267	case Intrinsic::loongarch_csrwr_d: {
2268	unsigned Imm = Op.getConstantOperandVal(i: `3`);
2269	return !isUInt<`14`>(x: Imm)
2270	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2271	: DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
2272	Ops: {Chain, Op.getOperand(i: `2`),
2273	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2274	}
2275	case Intrinsic::loongarch_csrxchg_w:
2276	case Intrinsic::loongarch_csrxchg_d: {
2277	unsigned Imm = Op.getConstantOperandVal(i: `4`);
2278	return !isUInt<`14`>(x: Imm)
2279	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2280	: DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
2281	Ops: {Chain, Op.getOperand(i: `2`), Op.getOperand(i: `3`),
2282	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2283	}
2284	case Intrinsic::loongarch_iocsrrd_d: {
2285	return DAG.getNode(
2286	Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
2287	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: `2`))});
2288	}
2289	#define IOCSRRD_CASE(NAME, NODE) \
2290	case Intrinsic::loongarch_##NAME: { \
2291	return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2292	{Chain, Op.getOperand(2)}); \
2293	}
2294	IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2295	IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2296	IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2297	#undef IOCSRRD_CASE
2298	case Intrinsic::loongarch_cpucfg: {
2299	return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
2300	Ops: {Chain, Op.getOperand(i: `2`)});
2301	}
2302	case Intrinsic::loongarch_lddir_d: {
2303	unsigned Imm = Op.getConstantOperandVal(i: `3`);
2304	return !isUInt<`8`>(x: Imm)
2305	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2306	: Op;
2307	}
2308	case Intrinsic::loongarch_movfcsr2gr: {
2309	if (!Subtarget.hasBasicF())
2310	return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
2311	unsigned Imm = Op.getConstantOperandVal(i: `2`);
2312	return !isUInt<`2`>(x: Imm)
2313	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2314	: DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
2315	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2316	}
2317	case Intrinsic::loongarch_lsx_vld:
2318	case Intrinsic::loongarch_lsx_vldrepl_b:
2319	case Intrinsic::loongarch_lasx_xvld:
2320	case Intrinsic::loongarch_lasx_xvldrepl_b:
2321	return !isInt<`12`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
2322	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2323	: SDValue ();
2324	case Intrinsic::loongarch_lsx_vldrepl_h:
2325	case Intrinsic::loongarch_lasx_xvldrepl_h:
2326	return !isShiftedInt<`11`, `1`>(
2327	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
2328	? emitIntrinsicWithChainErrorMessage(
2329	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
2330	: SDValue ();
2331	case Intrinsic::loongarch_lsx_vldrepl_w:
2332	case Intrinsic::loongarch_lasx_xvldrepl_w:
2333	return !isShiftedInt<`10`, `2`>(
2334	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
2335	? emitIntrinsicWithChainErrorMessage(
2336	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
2337	: SDValue ();
2338	case Intrinsic::loongarch_lsx_vldrepl_d:
2339	case Intrinsic::loongarch_lasx_xvldrepl_d:
2340	return !isShiftedInt<`9`, `3`>(
2341	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
2342	? emitIntrinsicWithChainErrorMessage(
2343	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
2344	: SDValue ();
2345	}
2346	}
2347
2348	// Helper function that emits error message for intrinsics with void return
2349	// value and return the chain.
2350	static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2351	SelectionDAG &DAG) {
2352
2353	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
2354	return Op.getOperand(i: `0`);
2355	}
2356
2357	SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2358	SelectionDAG &DAG) const {
2359	SDLoc DL(Op);
2360	MVT GRLenVT = Subtarget.getGRLenVT();
2361	SDValue Chain = Op.getOperand(i: `0`);
2362	uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: `1`);
2363	SDValue Op2 = Op.getOperand(i: `2`);
2364	const StringRef ErrorMsgOOR = "argument out of range";
2365	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2366	const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2367	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2368
2369	switch (IntrinsicEnum) {
2370	default:
2371	// TODO: Add more Intrinsics.
2372	return SDValue ();
2373	case Intrinsic::loongarch_cacop_d:
2374	case Intrinsic::loongarch_cacop_w: {
2375	if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2376	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
2377	if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2378	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
2379	// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2380	unsigned Imm1 = Op2 ->getAsZExtVal();
2381	int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue();
2382	if (!isUInt<`5`>(x: Imm1) \|\| !isInt<`12`>(x: Imm2))
2383	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
2384	return Op;
2385	}
2386	case Intrinsic::loongarch_dbar: {
2387	unsigned Imm = Op2 ->getAsZExtVal();
2388	return !isUInt<`15`>(x: Imm)
2389	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2390	: DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
2391	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2392	}
2393	case Intrinsic::loongarch_ibar: {
2394	unsigned Imm = Op2 ->getAsZExtVal();
2395	return !isUInt<`15`>(x: Imm)
2396	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2397	: DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
2398	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2399	}
2400	case Intrinsic::loongarch_break: {
2401	unsigned Imm = Op2 ->getAsZExtVal();
2402	return !isUInt<`15`>(x: Imm)
2403	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2404	: DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
2405	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2406	}
2407	case Intrinsic::loongarch_movgr2fcsr: {
2408	if (!Subtarget.hasBasicF())
2409	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
2410	unsigned Imm = Op2 ->getAsZExtVal();
2411	return !isUInt<`2`>(x: Imm)
2412	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2413	: DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
2414	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
2415	N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
2416	Operand: Op.getOperand(i: `3`)));
2417	}
2418	case Intrinsic::loongarch_syscall: {
2419	unsigned Imm = Op2 ->getAsZExtVal();
2420	return !isUInt<`15`>(x: Imm)
2421	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2422	: DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
2423	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2424	}
2425	#define IOCSRWR_CASE(NAME, NODE) \
2426	case Intrinsic::loongarch_##NAME: { \
2427	SDValue Op3 = Op.getOperand(3); \
2428	return Subtarget.is64Bit() \
2429	? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2430	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2431	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2432	: DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2433	Op3); \
2434	}
2435	IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2436	IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2437	IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2438	#undef IOCSRWR_CASE
2439	case Intrinsic::loongarch_iocsrwr_d: {
2440	return !Subtarget.is64Bit()
2441	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
2442	: DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
2443	N2: Op2,
2444	N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
2445	Operand: Op.getOperand(i: `3`)));
2446	}
2447	#define ASRT_LE_GT_CASE(NAME) \
2448	case Intrinsic::loongarch_##NAME: { \
2449	return !Subtarget.is64Bit() \
2450	? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2451	: Op; \
2452	}
2453	ASRT_LE_GT_CASE(asrtle_d)
2454	ASRT_LE_GT_CASE(asrtgt_d)
2455	#undef ASRT_LE_GT_CASE
2456	case Intrinsic::loongarch_ldpte_d: {
2457	unsigned Imm = Op.getConstantOperandVal(i: `3`);
2458	return !Subtarget.is64Bit()
2459	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
2460	: !isUInt<`8`>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2461	: Op;
2462	}
2463	case Intrinsic::loongarch_lsx_vst:
2464	case Intrinsic::loongarch_lasx_xvst:
2465	return !isInt<`12`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue())
2466	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2467	: SDValue ();
2468	case Intrinsic::loongarch_lasx_xvstelm_b:
2469	return (!isInt<`8`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2470	!isUInt<`5`>(x: Op.getConstantOperandVal(i: `5`)))
2471	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2472	: SDValue ();
2473	case Intrinsic::loongarch_lsx_vstelm_b:
2474	return (!isInt<`8`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2475	!isUInt<`4`>(x: Op.getConstantOperandVal(i: `5`)))
2476	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2477	: SDValue ();
2478	case Intrinsic::loongarch_lasx_xvstelm_h:
2479	return (!isShiftedInt<`8`, `1`>(
2480	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2481	!isUInt<`4`>(x: Op.getConstantOperandVal(i: `5`)))
2482	? emitIntrinsicErrorMessage(
2483	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
2484	: SDValue ();
2485	case Intrinsic::loongarch_lsx_vstelm_h:
2486	return (!isShiftedInt<`8`, `1`>(
2487	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2488	!isUInt<`3`>(x: Op.getConstantOperandVal(i: `5`)))
2489	? emitIntrinsicErrorMessage(
2490	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
2491	: SDValue ();
2492	case Intrinsic::loongarch_lasx_xvstelm_w:
2493	return (!isShiftedInt<`8`, `2`>(
2494	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2495	!isUInt<`3`>(x: Op.getConstantOperandVal(i: `5`)))
2496	? emitIntrinsicErrorMessage(
2497	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
2498	: SDValue ();
2499	case Intrinsic::loongarch_lsx_vstelm_w:
2500	return (!isShiftedInt<`8`, `2`>(
2501	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2502	!isUInt<`2`>(x: Op.getConstantOperandVal(i: `5`)))
2503	? emitIntrinsicErrorMessage(
2504	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
2505	: SDValue ();
2506	case Intrinsic::loongarch_lasx_xvstelm_d:
2507	return (!isShiftedInt<`8`, `3`>(
2508	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2509	!isUInt<`2`>(x: Op.getConstantOperandVal(i: `5`)))
2510	? emitIntrinsicErrorMessage(
2511	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
2512	: SDValue ();
2513	case Intrinsic::loongarch_lsx_vstelm_d:
2514	return (!isShiftedInt<`8`, `3`>(
2515	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
2516	!isUInt<`1`>(x: Op.getConstantOperandVal(i: `5`)))
2517	? emitIntrinsicErrorMessage(
2518	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
2519	: SDValue ();
2520	}
2521	}
2522
2523	SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2524	SelectionDAG &DAG) const {
2525	SDLoc DL(Op);
2526	SDValue Lo = Op.getOperand(i: `0`);
2527	SDValue Hi = Op.getOperand(i: `1`);
2528	SDValue Shamt = Op.getOperand(i: `2`);
2529	EVT VT = Lo.getValueType();
2530
2531	// if Shamt-GRLen < 0: // Shamt < GRLen
2532	// Lo = Lo << Shamt
2533	// Hi = (Hi << Shamt) \| ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2534	// else:
2535	// Lo = 0
2536	// Hi = Lo << (Shamt-GRLen)
2537
2538	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
2539	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
2540	SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
2541	SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT);
2542	SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
2543	SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
2544
2545	SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
2546	SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
2547	SDValue ShiftRightLo =
2548	DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
2549	SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
2550	SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
2551	SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
2552
2553	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
2554
2555	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
2556	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
2557
2558	SDValue Parts[`2`] = {Lo, Hi};
2559	return DAG.getMergeValues(Ops: Parts, dl: DL);
2560	}
2561
2562	SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2563	SelectionDAG &DAG,
2564	bool IsSRA) const {
2565	SDLoc DL(Op);
2566	SDValue Lo = Op.getOperand(i: `0`);
2567	SDValue Hi = Op.getOperand(i: `1`);
2568	SDValue Shamt = Op.getOperand(i: `2`);
2569	EVT VT = Lo.getValueType();
2570
2571	// SRA expansion:
2572	// if Shamt-GRLen < 0: // Shamt < GRLen
2573	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (ShAmt ^ GRLen-1))
2574	// Hi = Hi >>s Shamt
2575	// else:
2576	// Lo = Hi >>s (Shamt-GRLen);
2577	// Hi = Hi >>s (GRLen-1)
2578	//
2579	// SRL expansion:
2580	// if Shamt-GRLen < 0: // Shamt < GRLen
2581	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (ShAmt ^ GRLen-1))
2582	// Hi = Hi >>u Shamt
2583	// else:
2584	// Lo = Hi >>u (Shamt-GRLen);
2585	// Hi = 0;
2586
2587	unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2588
2589	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
2590	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
2591	SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
2592	SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT);
2593	SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
2594	SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
2595
2596	SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
2597	SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
2598	SDValue ShiftLeftHi =
2599	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
2600	SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
2601	SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
2602	SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
2603	SDValue HiFalse =
2604	IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
2605
2606	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
2607
2608	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
2609	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
2610
2611	SDValue Parts[`2`] = {Lo, Hi};
2612	return DAG.getMergeValues(Ops: Parts, dl: DL);
2613	}
2614
2615	// Returns the opcode of the target-specific SDNode that implements the 32-bit
2616	// form of the given Opcode.
2617	static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2618	switch (Opcode) {
2619	default:
2620	llvm_unreachable("Unexpected opcode");
2621	case ISD::UDIV:
2622	return LoongArchISD::DIV_WU;
2623	case ISD::UREM:
2624	return LoongArchISD::MOD_WU;
2625	case ISD::SHL:
2626	return LoongArchISD::SLL_W;
2627	case ISD::SRA:
2628	return LoongArchISD::SRA_W;
2629	case ISD::SRL:
2630	return LoongArchISD::SRL_W;
2631	case ISD::ROTL:
2632	case ISD::ROTR:
2633	return LoongArchISD::ROTR_W;
2634	case ISD::CTTZ:
2635	return LoongArchISD::CTZ_W;
2636	case ISD::CTLZ:
2637	return LoongArchISD::CLZ_W;
2638	}
2639	}
2640
2641	// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2642	// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2643	// otherwise be promoted to i64, making it difficult to select the
2644	// SLL_W/.../W later one because the fact the operation was originally of*
2645	// type i8/i16/i32 is lost.
2646	static SDValue customLegalizeToWOp(SDNode N, SelectionDAG &DAG, int* NumOp,
2647	unsigned ExtOpc = ISD::ANY_EXTEND) {
2648	SDLoc DL(N);
2649	LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
2650	SDValue NewOp0, NewRes;
2651
2652	switch (NumOp) {
2653	default:
2654	llvm_unreachable("Unexpected NumOp");
2655	case `1`: {
2656	NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
2657	NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
2658	break;
2659	}
2660	case `2`: {
2661	NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
2662	SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
2663	if (N->getOpcode() == ISD::ROTL) {
2664	SDValue TmpOp = DAG.getConstant(Val: `32`, DL, VT: MVT::i64);
2665	NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
2666	}
2667	NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
2668	break;
2669	}
2670	// TODO:Handle more NumOp.
2671	}
2672
2673	// ReplaceNodeResults requires we maintain the same type for the return
2674	// value.
2675	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewRes);
2676	}
2677
2678	// Converts the given 32-bit operation to a i64 operation with signed extension
2679	// semantic to reduce the signed extension instructions.
2680	static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2681	SDLoc DL(N);
2682	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
2683	SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
2684	SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
2685	SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
2686	N2: DAG.getValueType(MVT::i32));
2687	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
2688	}
2689
2690	// Helper function that emits error message for intrinsics with/without chain
2691	// and return a UNDEF or and the chain as the results.
2692	static void emitErrorAndReplaceIntrinsicResults(
2693	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2694	StringRef ErrorMsg, bool WithChain = true) {
2695	DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
2696	Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)));
2697	if (!WithChain)
2698	return;
2699	Results.push_back(Elt: N->getOperand(Num: `0`));
2700	}
2701
2702	template <unsigned N>
2703	static void
2704	replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2705	SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2706	unsigned ResOp) {
2707	const StringRef ErrorMsgOOR = "argument out of range";
2708	unsigned Imm = Node->getConstantOperandVal(Num: `2`);
2709	if (!isUInt<N>(Imm)) {
2710	emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
2711	/WithChain=/false);
2712	return;
2713	}
2714	SDLoc DL(Node);
2715	SDValue Vec = Node->getOperand(Num: `1`);
2716
2717	SDValue PickElt =
2718	DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
2719	N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
2720	N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
2721	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: `0`),
2722	Operand: PickElt.getValue(R: `0`)));
2723	}
2724
2725	static void replaceVecCondBranchResults(SDNode *N,
2726	SmallVectorImpl<SDValue> &Results,
2727	SelectionDAG &DAG,
2728	const LoongArchSubtarget &Subtarget,
2729	unsigned ResOp) {
2730	SDLoc DL(N);
2731	SDValue Vec = N->getOperand(Num: `1`);
2732
2733	SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
2734	Results.push_back(
2735	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: CB.getValue(R: `0`)));
2736	}
2737
2738	static void
2739	replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2740	SelectionDAG &DAG,
2741	const LoongArchSubtarget &Subtarget) {
2742	switch (N->getConstantOperandVal(Num: `0`)) {
2743	default:
2744	llvm_unreachable("Unexpected Intrinsic.");
2745	case Intrinsic::loongarch_lsx_vpickve2gr_b:
2746	replaceVPICKVE2GRResults<`4`>(Node: N, Results, DAG, Subtarget,
2747	ResOp: LoongArchISD::VPICK_SEXT_ELT);
2748	break;
2749	case Intrinsic::loongarch_lsx_vpickve2gr_h:
2750	case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2751	replaceVPICKVE2GRResults<`3`>(Node: N, Results, DAG, Subtarget,
2752	ResOp: LoongArchISD::VPICK_SEXT_ELT);
2753	break;
2754	case Intrinsic::loongarch_lsx_vpickve2gr_w:
2755	replaceVPICKVE2GRResults<`2`>(Node: N, Results, DAG, Subtarget,
2756	ResOp: LoongArchISD::VPICK_SEXT_ELT);
2757	break;
2758	case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2759	replaceVPICKVE2GRResults<`4`>(Node: N, Results, DAG, Subtarget,
2760	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
2761	break;
2762	case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2763	case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2764	replaceVPICKVE2GRResults<`3`>(Node: N, Results, DAG, Subtarget,
2765	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
2766	break;
2767	case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2768	replaceVPICKVE2GRResults<`2`>(Node: N, Results, DAG, Subtarget,
2769	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
2770	break;
2771	case Intrinsic::loongarch_lsx_bz_b:
2772	case Intrinsic::loongarch_lsx_bz_h:
2773	case Intrinsic::loongarch_lsx_bz_w:
2774	case Intrinsic::loongarch_lsx_bz_d:
2775	case Intrinsic::loongarch_lasx_xbz_b:
2776	case Intrinsic::loongarch_lasx_xbz_h:
2777	case Intrinsic::loongarch_lasx_xbz_w:
2778	case Intrinsic::loongarch_lasx_xbz_d:
2779	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2780	ResOp: LoongArchISD::VALL_ZERO);
2781	break;
2782	case Intrinsic::loongarch_lsx_bz_v:
2783	case Intrinsic::loongarch_lasx_xbz_v:
2784	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2785	ResOp: LoongArchISD::VANY_ZERO);
2786	break;
2787	case Intrinsic::loongarch_lsx_bnz_b:
2788	case Intrinsic::loongarch_lsx_bnz_h:
2789	case Intrinsic::loongarch_lsx_bnz_w:
2790	case Intrinsic::loongarch_lsx_bnz_d:
2791	case Intrinsic::loongarch_lasx_xbnz_b:
2792	case Intrinsic::loongarch_lasx_xbnz_h:
2793	case Intrinsic::loongarch_lasx_xbnz_w:
2794	case Intrinsic::loongarch_lasx_xbnz_d:
2795	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2796	ResOp: LoongArchISD::VALL_NONZERO);
2797	break;
2798	case Intrinsic::loongarch_lsx_bnz_v:
2799	case Intrinsic::loongarch_lasx_xbnz_v:
2800	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2801	ResOp: LoongArchISD::VANY_NONZERO);
2802	break;
2803	}
2804	}
2805
2806	void LoongArchTargetLowering::ReplaceNodeResults(
2807	SDNode N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const* {
2808	SDLoc DL(N);
2809	EVT VT = N->getValueType(ResNo: `0`);
2810	switch (N->getOpcode()) {
2811	default:
2812	llvm_unreachable("Don't know how to legalize this operation");
2813	case ISD::ADD:
2814	case ISD::SUB:
2815	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
2816	"Unexpected custom legalisation");
2817	Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
2818	break;
2819	case ISD::UDIV:
2820	case ISD::UREM:
2821	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2822	"Unexpected custom legalisation");
2823	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`, ExtOpc: ISD::SIGN_EXTEND));
2824	break;
2825	case ISD::SHL:
2826	case ISD::SRA:
2827	case ISD::SRL:
2828	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2829	"Unexpected custom legalisation");
2830	if (N->getOperand(Num: `1`).getOpcode() != ISD::Constant) {
2831	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`));
2832	break;
2833	}
2834	break;
2835	case ISD::ROTL:
2836	case ISD::ROTR:
2837	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2838	"Unexpected custom legalisation");
2839	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`));
2840	break;
2841	case ISD::FP_TO_SINT: {
2842	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2843	"Unexpected custom legalisation");
2844	SDValue Src = N->getOperand(Num: `0`);
2845	EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: `0`));
2846	if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
2847	TargetLowering::TypeSoftenFloat) {
2848	SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
2849	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
2850	return;
2851	}
2852	// If the FP type needs to be softened, emit a library call using the 'si'
2853	// version. If we left it to default legalization we'd end up with 'di'.
2854	RTLIB::Libcall LC;
2855	LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
2856	MakeLibCallOptions CallOptions;
2857	EVT OpVT = Src.getValueType();
2858	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true);
2859	SDValue Chain = SDValue ();
2860	SDValue Result;
2861	std::tie(args&: Result, args&: Chain) =
2862	makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
2863	Results.push_back(Elt: Result);
2864	break;
2865	}
2866	case ISD::BITCAST: {
2867	SDValue Src = N->getOperand(Num: `0`);
2868	EVT SrcVT = Src.getValueType();
2869	if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2870	Subtarget.hasBasicF()) {
2871	SDValue Dst =
2872	DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
2873	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
2874	}
2875	break;
2876	}
2877	case ISD::FP_TO_UINT: {
2878	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2879	"Unexpected custom legalisation");
2880	auto &TLI = DAG.getTargetLoweringInfo();
2881	SDValue Tmp1, Tmp2;
2882	TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
2883	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
2884	break;
2885	}
2886	case ISD::BSWAP: {
2887	SDValue Src = N->getOperand(Num: `0`);
2888	assert((VT == MVT::i16 \|\| VT == MVT::i32) &&
2889	"Unexpected custom legalization");
2890	MVT GRLenVT = Subtarget.getGRLenVT();
2891	SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
2892	SDValue Tmp;
2893	switch (VT.getSizeInBits()) {
2894	default:
2895	llvm_unreachable("Unexpected operand width");
2896	case `16`:
2897	Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
2898	break;
2899	case `32`:
2900	// Only LA64 will get to here due to the size mismatch between VT and
2901	// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2902	Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
2903	break;
2904	}
2905	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
2906	break;
2907	}
2908	case ISD::BITREVERSE: {
2909	SDValue Src = N->getOperand(Num: `0`);
2910	assert((VT == MVT::i8 \|\| (VT == MVT::i32 && Subtarget.is64Bit())) &&
2911	"Unexpected custom legalization");
2912	MVT GRLenVT = Subtarget.getGRLenVT();
2913	SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
2914	SDValue Tmp;
2915	switch (VT.getSizeInBits()) {
2916	default:
2917	llvm_unreachable("Unexpected operand width");
2918	case `8`:
2919	Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
2920	break;
2921	case `32`:
2922	Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
2923	break;
2924	}
2925	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
2926	break;
2927	}
2928	case ISD::CTLZ:
2929	case ISD::CTTZ: {
2930	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2931	"Unexpected custom legalisation");
2932	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `1`));
2933	break;
2934	}
2935	case ISD::INTRINSIC_W_CHAIN: {
2936	SDValue Chain = N->getOperand(Num: `0`);
2937	SDValue Op2 = N->getOperand(Num: `2`);
2938	MVT GRLenVT = Subtarget.getGRLenVT();
2939	const StringRef ErrorMsgOOR = "argument out of range";
2940	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2941	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2942
2943	switch (N->getConstantOperandVal(Num: `1`)) {
2944	default:
2945	llvm_unreachable("Unexpected Intrinsic.");
2946	case Intrinsic::loongarch_movfcsr2gr: {
2947	if (!Subtarget.hasBasicF()) {
2948	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
2949	return;
2950	}
2951	unsigned Imm = Op2 ->getAsZExtVal();
2952	if (!isUInt<`2`>(x: Imm)) {
2953	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
2954	return;
2955	}
2956	SDValue MOVFCSR2GRResults = DAG.getNode(
2957	Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc (N), ResultTys: {MVT::i64, MVT::Other},
2958	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2959	Results.push_back(
2960	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: `0`)));
2961	Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: `1`));
2962	break;
2963	}
2964	#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2965	case Intrinsic::loongarch_##NAME: { \
2966	SDValue NODE = DAG.getNode( \
2967	LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2968	{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2969	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2970	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2971	Results.push_back(NODE.getValue(1)); \
2972	break; \
2973	}
2974	CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2975	CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2976	CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2977	CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2978	CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2979	CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2980	#undef CRC_CASE_EXT_BINARYOP
2981
2982	#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2983	case Intrinsic::loongarch_##NAME: { \
2984	SDValue NODE = DAG.getNode( \
2985	LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2986	{Chain, Op2, \
2987	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2988	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2989	Results.push_back(NODE.getValue(1)); \
2990	break; \
2991	}
2992	CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2993	CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2994	#undef CRC_CASE_EXT_UNARYOP
2995	#define CSR_CASE(ID) \
2996	case Intrinsic::loongarch_##ID: { \
2997	if (!Subtarget.is64Bit()) \
2998	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
2999	break; \
3000	}
3001	CSR_CASE(csrrd_d);
3002	CSR_CASE(csrwr_d);
3003	CSR_CASE(csrxchg_d);
3004	CSR_CASE(iocsrrd_d);
3005	#undef CSR_CASE
3006	case Intrinsic::loongarch_csrrd_w: {
3007	unsigned Imm = Op2 ->getAsZExtVal();
3008	if (!isUInt<`14`>(x: Imm)) {
3009	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
3010	return;
3011	}
3012	SDValue CSRRDResults =
3013	DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
3014	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3015	Results.push_back(
3016	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: `0`)));
3017	Results.push_back(Elt: CSRRDResults.getValue(R: `1`));
3018	break;
3019	}
3020	case Intrinsic::loongarch_csrwr_w: {
3021	unsigned Imm = N->getConstantOperandVal(Num: `3`);
3022	if (!isUInt<`14`>(x: Imm)) {
3023	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
3024	return;
3025	}
3026	SDValue CSRWRResults =
3027	DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
3028	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
3029	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3030	Results.push_back(
3031	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: `0`)));
3032	Results.push_back(Elt: CSRWRResults.getValue(R: `1`));
3033	break;
3034	}
3035	case Intrinsic::loongarch_csrxchg_w: {
3036	unsigned Imm = N->getConstantOperandVal(Num: `4`);
3037	if (!isUInt<`14`>(x: Imm)) {
3038	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
3039	return;
3040	}
3041	SDValue CSRXCHGResults = DAG.getNode(
3042	Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
3043	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
3044	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `3`)),
3045	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3046	Results.push_back(
3047	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: `0`)));
3048	Results.push_back(Elt: CSRXCHGResults.getValue(R: `1`));
3049	break;
3050	}
3051	#define IOCSRRD_CASE(NAME, NODE) \
3052	case Intrinsic::loongarch_##NAME: { \
3053	SDValue IOCSRRDResults = \
3054	DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3055	{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3056	Results.push_back( \
3057	DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3058	Results.push_back(IOCSRRDResults.getValue(1)); \
3059	break; \
3060	}
3061	IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3062	IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3063	IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3064	#undef IOCSRRD_CASE
3065	case Intrinsic::loongarch_cpucfg: {
3066	SDValue CPUCFGResults =
3067	DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
3068	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
3069	Results.push_back(
3070	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: `0`)));
3071	Results.push_back(Elt: CPUCFGResults.getValue(R: `1`));
3072	break;
3073	}
3074	case Intrinsic::loongarch_lddir_d: {
3075	if (!Subtarget.is64Bit()) {
3076	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
3077	return;
3078	}
3079	break;
3080	}
3081	}
3082	break;
3083	}
3084	case ISD::READ_REGISTER: {
3085	if (Subtarget.is64Bit())
3086	DAG.getContext()->emitError(
3087	ErrorStr: "On LA64, only 64-bit registers can be read.");
3088	else
3089	DAG.getContext()->emitError(
3090	ErrorStr: "On LA32, only 32-bit registers can be read.");
3091	Results.push_back(Elt: DAG.getUNDEF(VT));
3092	Results.push_back(Elt: N->getOperand(Num: `0`));
3093	break;
3094	}
3095	case ISD::INTRINSIC_WO_CHAIN: {
3096	replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3097	break;
3098	}
3099	}
3100	}
3101
3102	static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3103	TargetLowering::DAGCombinerInfo &DCI,
3104	const LoongArchSubtarget &Subtarget) {
3105	if (DCI.isBeforeLegalizeOps())
3106	return SDValue ();
3107
3108	SDValue FirstOperand = N->getOperand(Num: `0`);
3109	SDValue SecondOperand = N->getOperand(Num: `1`);
3110	unsigned FirstOperandOpc = FirstOperand.getOpcode();
3111	EVT ValTy = N->getValueType(ResNo: `0`);
3112	SDLoc DL(N);
3113	uint64_t lsb, msb;
3114	unsigned SMIdx, SMLen;
3115	ConstantSDNode *CN;
3116	SDValue NewOperand;
3117	MVT GRLenVT = Subtarget.getGRLenVT();
3118
3119	// Op's second operand must be a shifted mask.
3120	if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) \|\|
3121	!isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
3122	return SDValue ();
3123
3124	if (FirstOperandOpc == ISD::SRA \|\| FirstOperandOpc == ISD::SRL) {
3125	// Pattern match BSTRPICK.
3126	// $dst = and ((sra or srl) $src , lsb), (2len - 1)
3127	// => BSTRPICK $dst, $src, msb, lsb
3128	// where msb = lsb + len - 1
3129
3130	// The second operand of the shift must be an immediate.
3131	if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: `1`))))
3132	return SDValue ();
3133
3134	lsb = CN->getZExtValue();
3135
3136	// Return if the shifted mask does not start at bit 0 or the sum of its
3137	// length and lsb exceeds the word's size.
3138	if (SMIdx != `0` \|\| lsb + SMLen > ValTy.getSizeInBits())
3139	return SDValue ();
3140
3141	NewOperand = FirstOperand.getOperand(i: `0`);
3142	} else {
3143	// Pattern match BSTRPICK.
3144	// $dst = and $src, (2len- 1) , if len > 12
3145	// => BSTRPICK $dst, $src, msb, lsb
3146	// where lsb = 0 and msb = len - 1
3147
3148	// If the mask is <= 0xfff, andi can be used instead.
3149	if (CN->getZExtValue() <= `0xfff`)
3150	return SDValue ();
3151
3152	// Return if the MSB exceeds.
3153	if (SMIdx + SMLen > ValTy.getSizeInBits())
3154	return SDValue ();
3155
3156	if (SMIdx > `0`) {
3157	// Omit if the constant has more than 2 uses. This a conservative
3158	// decision. Whether it is a win depends on the HW microarchitecture.
3159	// However it should always be better for 1 and 2 uses.
3160	if (CN->use_size() > `2`)
3161	return SDValue ();
3162	// Return if the constant can be composed by a single LU12I.W.
3163	if ((CN->getZExtValue() & `0xfff`) == `0`)
3164	return SDValue ();
3165	// Return if the constand can be composed by a single ADDI with
3166	// the zero register.
3167	if (CN->getSExtValue() >= -`2048` && CN->getSExtValue() < `0`)
3168	return SDValue ();
3169	}
3170
3171	lsb = SMIdx;
3172	NewOperand = FirstOperand;
3173	}
3174
3175	msb = lsb + SMLen - `1`;
3176	SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
3177	N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
3178	N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
3179	if (FirstOperandOpc == ISD::SRA \|\| FirstOperandOpc == ISD::SRL \|\| lsb == `0`)
3180	return NR0;
3181	// Try to optimize to
3182	// bstrpick $Rd, $Rs, msb, lsb
3183	// slli $Rd, $Rd, lsb
3184	return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
3185	N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
3186	}
3187
3188	static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3189	TargetLowering::DAGCombinerInfo &DCI,
3190	const LoongArchSubtarget &Subtarget) {
3191	if (DCI.isBeforeLegalizeOps())
3192	return SDValue ();
3193
3194	// $dst = srl (and $src, Mask), Shamt
3195	// =>
3196	// BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3197	// when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3198	//
3199
3200	SDValue FirstOperand = N->getOperand(Num: `0`);
3201	ConstantSDNode *CN;
3202	EVT ValTy = N->getValueType(ResNo: `0`);
3203	SDLoc DL(N);
3204	MVT GRLenVT = Subtarget.getGRLenVT();
3205	unsigned MaskIdx, MaskLen;
3206	uint64_t Shamt;
3207
3208	// The first operand must be an AND and the second operand of the AND must be
3209	// a shifted mask.
3210	if (FirstOperand.getOpcode() != ISD::AND \|\|
3211	!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: `1`))) \|\|
3212	!isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
3213	return SDValue ();
3214
3215	// The second operand (shift amount) must be an immediate.
3216	if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))))
3217	return SDValue ();
3218
3219	Shamt = CN->getZExtValue();
3220	if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - `1`)
3221	return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
3222	N1: FirstOperand ->getOperand(Num: `0`),
3223	N2: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
3224	N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
3225
3226	return SDValue ();
3227	}
3228
3229	static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3230	TargetLowering::DAGCombinerInfo &DCI,
3231	const LoongArchSubtarget &Subtarget) {
3232	MVT GRLenVT = Subtarget.getGRLenVT();
3233	EVT ValTy = N->getValueType(ResNo: `0`);
3234	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
3235	ConstantSDNode CN0, CN1;
3236	SDLoc DL(N);
3237	unsigned ValBits = ValTy.getSizeInBits();
3238	unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3239	unsigned Shamt;
3240	bool SwapAndRetried = false;
3241
3242	if (DCI.isBeforeLegalizeOps())
3243	return SDValue ();
3244
3245	if (ValBits != `32` && ValBits != `64`)
3246	return SDValue ();
3247
3248	Retry:
3249	// 1st pattern to match BSTRINS:
3250	// R = or (and X, mask0), (and (shl Y, lsb), mask1)
3251	// where mask1 = (2size - 1) << lsb, mask0 = ~mask1
3252	// =>
3253	// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3254	if (N0.getOpcode() == ISD::AND &&
3255	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
3256	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3257	N1.getOpcode() == ISD::AND && N1.getOperand(i: `0`).getOpcode() == ISD::SHL &&
3258	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
3259	isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
3260	MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3261	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
3262	(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3263	(MaskIdx0 + MaskLen0 <= ValBits)) {
3264	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3265	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
3266	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
3267	N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - `1`), DL, VT: GRLenVT),
3268	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3269	}
3270
3271	// 2nd pattern to match BSTRINS:
3272	// R = or (and X, mask0), (shl (and Y, mask1), lsb)
3273	// where mask1 = (2size - 1), mask0 = ~(mask1 << lsb)
3274	// =>
3275	// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3276	if (N0.getOpcode() == ISD::AND &&
3277	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
3278	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3279	N1.getOpcode() == ISD::SHL && N1.getOperand(i: `0`).getOpcode() == ISD::AND &&
3280	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
3281	(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3282	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
3283	isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
3284	MaskLen0 == MaskLen1 && MaskIdx1 == `0` &&
3285	(MaskIdx0 + MaskLen0 <= ValBits)) {
3286	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3287	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
3288	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
3289	N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - `1`), DL, VT: GRLenVT),
3290	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3291	}
3292
3293	// 3rd pattern to match BSTRINS:
3294	// R = or (and X, mask0), (and Y, mask1)
3295	// where ~mask0 = (2size - 1) << lsb, mask0 & mask1 = 0
3296	// =>
3297	// R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3298	// where msb = lsb + size - 1
3299	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3300	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
3301	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3302	(MaskIdx0 + MaskLen0 <= `64`) &&
3303	(CN1 = dyn_cast<ConstantSDNode>(Val: N1 ->getOperand(Num: `1`))) &&
3304	(CN1->getSExtValue() & CN0->getSExtValue()) == `0`) {
3305	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3306	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
3307	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1 ->getValueType(ResNo: `0`), N1,
3308	N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
3309	N3: DAG.getConstant(Val: ValBits == `32`
3310	? (MaskIdx0 + (MaskLen0 & `31`) - `1`)
3311	: (MaskIdx0 + MaskLen0 - `1`),
3312	DL, VT: GRLenVT),
3313	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3314	}
3315
3316	// 4th pattern to match BSTRINS:
3317	// R = or (and X, mask), (shl Y, shamt)
3318	// where mask = (2shamt - 1)
3319	// =>
3320	// R = BSTRINS X, Y, ValBits - 1, shamt
3321	// where ValBits = 32 or 64
3322	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3323	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
3324	isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3325	MaskIdx0 == `0` && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
3326	(Shamt = CN1->getZExtValue()) == MaskLen0 &&
3327	(MaskIdx0 + MaskLen0 <= ValBits)) {
3328	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3329	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
3330	N2: N1.getOperand(i: `0`),
3331	N3: DAG.getConstant(Val: (ValBits - `1`), DL, VT: GRLenVT),
3332	N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
3333	}
3334
3335	// 5th pattern to match BSTRINS:
3336	// R = or (and X, mask), const
3337	// where ~mask = (2size - 1) << lsb, mask & const = 0
3338	// =>
3339	// R = BSTRINS X, (const >> lsb), msb, lsb
3340	// where msb = lsb + size - 1
3341	if (N0.getOpcode() == ISD::AND &&
3342	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
3343	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3344	(CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
3345	(CN1->getSExtValue() & CN0->getSExtValue()) == `0`) {
3346	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3347	return DAG.getNode(
3348	Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
3349	N2: DAG.getConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
3350	N3: DAG.getConstant(Val: ValBits == `32` ? (MaskIdx0 + (MaskLen0 & `31`) - `1`)
3351	: (MaskIdx0 + MaskLen0 - `1`),
3352	DL, VT: GRLenVT),
3353	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3354	}
3355
3356	// 6th pattern.
3357	// a = b \| ((c & mask) << shamt), where all positions in b to be overwritten
3358	// by the incoming bits are known to be zero.
3359	// =>
3360	// a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3361	//
3362	// Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3363	// pattern is more common than the 1st. So we put the 1st before the 6th in
3364	// order to match as many nodes as possible.
3365	ConstantSDNode CNMask, CNShamt;
3366	unsigned MaskIdx, MaskLen;
3367	if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: `0`).getOpcode() == ISD::AND &&
3368	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
3369	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3370	MaskIdx == `0` && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
3371	CNShamt->getZExtValue() + MaskLen <= ValBits) {
3372	Shamt = CNShamt->getZExtValue();
3373	APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3374	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
3375	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3376	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
3377	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
3378	N3: DAG.getConstant(Val: Shamt + MaskLen - `1`, DL, VT: GRLenVT),
3379	N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
3380	}
3381	}
3382
3383	// 7th pattern.
3384	// a = b \| ((c << shamt) & shifted_mask), where all positions in b to be
3385	// overwritten by the incoming bits are known to be zero.
3386	// =>
3387	// a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3388	//
3389	// Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3390	// before the 7th in order to match as many nodes as possible.
3391	if (N1.getOpcode() == ISD::AND &&
3392	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
3393	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3394	N1.getOperand(i: `0`).getOpcode() == ISD::SHL &&
3395	(CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
3396	CNShamt->getZExtValue() == MaskIdx) {
3397	APInt ShMask(ValBits, CNMask->getZExtValue());
3398	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
3399	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3400	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
3401	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
3402	N3: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
3403	N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
3404	}
3405	}
3406
3407	// (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3408	if (!SwapAndRetried) {
3409	std::swap(a&: N0, b&: N1);
3410	SwapAndRetried = true;
3411	goto Retry;
3412	}
3413
3414	SwapAndRetried = false;
3415	Retry2:
3416	// 8th pattern.
3417	// a = b \| (c & shifted_mask), where all positions in b to be overwritten by
3418	// the incoming bits are known to be zero.
3419	// =>
3420	// a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3421	//
3422	// Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3423	// we put it here in order to match as many nodes as possible or generate less
3424	// instructions.
3425	if (N1.getOpcode() == ISD::AND &&
3426	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
3427	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3428	APInt ShMask(ValBits, CNMask->getZExtValue());
3429	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
3430	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3431	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
3432	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1 ->getValueType(ResNo: `0`),
3433	N1: N1 ->getOperand(Num: `0`),
3434	N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
3435	N3: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
3436	N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
3437	}
3438	}
3439	// Swap N0/N1 and retry.
3440	if (!SwapAndRetried) {
3441	std::swap(a&: N0, b&: N1);
3442	SwapAndRetried = true;
3443	goto Retry2;
3444	}
3445
3446	return SDValue ();
3447	}
3448
3449	static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3450	ExtType = ISD::NON_EXTLOAD;
3451
3452	switch (V.getNode()->getOpcode()) {
3453	case ISD::LOAD: {
3454	LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
3455	if ((LoadNode->getMemoryVT() == MVT::i8) \|\|
3456	(LoadNode->getMemoryVT() == MVT::i16)) {
3457	ExtType = LoadNode->getExtensionType();
3458	return true;
3459	}
3460	return false;
3461	}
3462	case ISD::AssertSext: {
3463	VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: `1`));
3464	if ((TypeNode->getVT() == MVT::i8) \|\| (TypeNode->getVT() == MVT::i16)) {
3465	ExtType = ISD::SEXTLOAD;
3466	return true;
3467	}
3468	return false;
3469	}
3470	case ISD::AssertZext: {
3471	VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: `1`));
3472	if ((TypeNode->getVT() == MVT::i8) \|\| (TypeNode->getVT() == MVT::i16)) {
3473	ExtType = ISD::ZEXTLOAD;
3474	return true;
3475	}
3476	return false;
3477	}
3478	default:
3479	return false;
3480	}
3481
3482	return false;
3483	}
3484
3485	// Eliminate redundant truncation and zero-extension nodes.
3486	// Case 1:*
3487	// +------------+ +------------+ +------------+
3488	// \| Input1 \| \| Input2 \| \| CC \|
3489	// +------------+ +------------+ +------------+
3490	// \| \| \|
3491	// V V +----+
3492	// +------------+ +------------+ \|
3493	// \| TRUNCATE \| \| TRUNCATE \| \|
3494	// +------------+ +------------+ \|
3495	// \| \| \|
3496	// V V \|
3497	// +------------+ +------------+ \|
3498	// \| ZERO_EXT \| \| ZERO_EXT \| \|
3499	// +------------+ +------------+ \|
3500	// \| \| \|
3501	// \| +-------------+ \|
3502	// V V \| \|
3503	// +----------------+ \| \|
3504	// \| AND \| \| \|
3505	// +----------------+ \| \|
3506	// \| \| \|
3507	// +---------------+ \| \|
3508	// \| \| \|
3509	// V V V
3510	// +-------------+
3511	// \| CMP \|
3512	// +-------------+
3513	// Case 2:*
3514	// +------------+ +------------+ +-------------+ +------------+ +------------+
3515	// \| Input1 \| \| Input2 \| \| Constant -1 \| \| Constant 0 \| \| CC \|
3516	// +------------+ +------------+ +-------------+ +------------+ +------------+
3517	// \| \| \| \| \|
3518	// V \| \| \| \|
3519	// +------------+ \| \| \| \|
3520	// \| XOR \|<---------------------+ \| \|
3521	// +------------+ \| \| \|
3522	// \| \| \| \|
3523	// V V +---------------+ \|
3524	// +------------+ +------------+ \| \|
3525	// \| TRUNCATE \| \| TRUNCATE \| \| +-------------------------+
3526	// +------------+ +------------+ \| \|
3527	// \| \| \| \|
3528	// V V \| \|
3529	// +------------+ +------------+ \| \|
3530	// \| ZERO_EXT \| \| ZERO_EXT \| \| \|
3531	// +------------+ +------------+ \| \|
3532	// \| \| \| \|
3533	// V V \| \|
3534	// +----------------+ \| \|
3535	// \| AND \| \| \|
3536	// +----------------+ \| \|
3537	// \| \| \|
3538	// +---------------+ \| \|
3539	// \| \| \|
3540	// V V V
3541	// +-------------+
3542	// \| CMP \|
3543	// +-------------+
3544	static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3545	TargetLowering::DAGCombinerInfo &DCI,
3546	const LoongArchSubtarget &Subtarget) {
3547	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
3548
3549	SDNode *AndNode = N->getOperand(Num: `0`).getNode();
3550	if (AndNode->getOpcode() != ISD::AND)
3551	return SDValue ();
3552
3553	SDValue AndInputValue2 = AndNode->getOperand(Num: `1`);
3554	if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3555	return SDValue ();
3556
3557	SDValue CmpInputValue = N->getOperand(Num: `1`);
3558	SDValue AndInputValue1 = AndNode->getOperand(Num: `0`);
3559	if (AndInputValue1.getOpcode() == ISD::XOR) {
3560	if (CC != ISD::SETEQ && CC != ISD::SETNE)
3561	return SDValue ();
3562	ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: `1`));
3563	if (!CN \|\| CN->getSExtValue() != -`1`)
3564	return SDValue ();
3565	CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
3566	if (!CN \|\| CN->getSExtValue() != `0`)
3567	return SDValue ();
3568	AndInputValue1 = AndInputValue1.getOperand(i: `0`);
3569	if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3570	return SDValue ();
3571	} else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3572	if (AndInputValue2 != CmpInputValue)
3573	return SDValue ();
3574	} else {
3575	return SDValue ();
3576	}
3577
3578	SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: `0`);
3579	if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3580	return SDValue ();
3581
3582	SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: `0`);
3583	if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3584	return SDValue ();
3585
3586	SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: `0`);
3587	SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: `0`);
3588	ISD::LoadExtType ExtType1;
3589	ISD::LoadExtType ExtType2;
3590
3591	if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) \|\|
3592	!checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
3593	return SDValue ();
3594
3595	if (TruncInputValue1 ->getValueType(ResNo: `0`) != TruncInputValue2 ->getValueType(ResNo: `0`) \|\|
3596	AndNode->getValueType(ResNo: `0`) != TruncInputValue1 ->getValueType(ResNo: `0`))
3597	return SDValue ();
3598
3599	if ((ExtType2 != ISD::ZEXTLOAD) &&
3600	((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3601	return SDValue ();
3602
3603	// These truncation and zero-extension nodes are not necessary, remove them.
3604	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N), VT: AndNode->getValueType(ResNo: `0`),
3605	N1: TruncInputValue1, N2: TruncInputValue2);
3606	SDValue NewSetCC =
3607	DAG.getSetCC(DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
3608	DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
3609	return SDValue (N, `0`);
3610	}
3611
3612	// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3613	static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3614	TargetLowering::DAGCombinerInfo &DCI,
3615	const LoongArchSubtarget &Subtarget) {
3616	if (DCI.isBeforeLegalizeOps())
3617	return SDValue ();
3618
3619	SDValue Src = N->getOperand(Num: `0`);
3620	if (Src.getOpcode() != LoongArchISD::REVB_2W)
3621	return SDValue ();
3622
3623	return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
3624	Operand: Src.getOperand(i: `0`));
3625	}
3626
3627	template <unsigned N>
3628	static SDValue legalizeIntrinsicImmArg(SDNode Node, unsigned* ImmOp,
3629	SelectionDAG &DAG,
3630	const LoongArchSubtarget &Subtarget,
3631	bool IsSigned = false) {
3632	SDLoc DL(Node);
3633	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
3634	// Check the ImmArg.
3635	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
3636	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3637	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
3638	": argument out of range.");
3639	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
3640	}
3641	return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
3642	}
3643
3644	template <unsigned N>
3645	static SDValue lowerVectorSplatImm(SDNode Node, unsigned* ImmOp,
3646	SelectionDAG &DAG, bool IsSigned = false) {
3647	SDLoc DL(Node);
3648	EVT ResTy = Node->getValueType(ResNo: `0`);
3649	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
3650
3651	// Check the ImmArg.
3652	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
3653	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3654	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
3655	": argument out of range.");
3656	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3657	}
3658	return DAG.getConstant(
3659	Val: APInt (ResTy.getScalarType().getSizeInBits(),
3660	IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3661	DL, VT: ResTy);
3662	}
3663
3664	static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3665	SDLoc DL(Node);
3666	EVT ResTy = Node->getValueType(ResNo: `0`);
3667	SDValue Vec = Node->getOperand(Num: `2`);
3668	SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - `1`, DL, VT: ResTy);
3669	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
3670	}
3671
3672	static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3673	SDLoc DL(Node);
3674	EVT ResTy = Node->getValueType(ResNo: `0`);
3675	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ResTy);
3676	SDValue Bit =
3677	DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
3678
3679	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: `1`),
3680	N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
3681	}
3682
3683	template <unsigned N>
3684	static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3685	SDLoc DL(Node);
3686	EVT ResTy = Node->getValueType(ResNo: `0`);
3687	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
3688	// Check the unsigned ImmArg.
3689	if (!isUInt<N>(CImm->getZExtValue())) {
3690	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
3691	": argument out of range.");
3692	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3693	}
3694
3695	APInt BitImm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
3696	SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
3697
3698	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: Mask);
3699	}
3700
3701	template <unsigned N>
3702	static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3703	SDLoc DL(Node);
3704	EVT ResTy = Node->getValueType(ResNo: `0`);
3705	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
3706	// Check the unsigned ImmArg.
3707	if (!isUInt<N>(CImm->getZExtValue())) {
3708	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
3709	": argument out of range.");
3710	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3711	}
3712
3713	APInt Imm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
3714	SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
3715	return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: BitImm);
3716	}
3717
3718	template <unsigned N>
3719	static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3720	SDLoc DL(Node);
3721	EVT ResTy = Node->getValueType(ResNo: `0`);
3722	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
3723	// Check the unsigned ImmArg.
3724	if (!isUInt<N>(CImm->getZExtValue())) {
3725	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
3726	": argument out of range.");
3727	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3728	}
3729
3730	APInt Imm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
3731	SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
3732	return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: BitImm);
3733	}
3734
3735	static SDValue
3736	performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3737	TargetLowering::DAGCombinerInfo &DCI,
3738	const LoongArchSubtarget &Subtarget) {
3739	SDLoc DL(N);
3740	switch (N->getConstantOperandVal(Num: `0`)) {
3741	default:
3742	break;
3743	case Intrinsic::loongarch_lsx_vadd_b:
3744	case Intrinsic::loongarch_lsx_vadd_h:
3745	case Intrinsic::loongarch_lsx_vadd_w:
3746	case Intrinsic::loongarch_lsx_vadd_d:
3747	case Intrinsic::loongarch_lasx_xvadd_b:
3748	case Intrinsic::loongarch_lasx_xvadd_h:
3749	case Intrinsic::loongarch_lasx_xvadd_w:
3750	case Intrinsic::loongarch_lasx_xvadd_d:
3751	return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3752	N2: N->getOperand(Num: `2`));
3753	case Intrinsic::loongarch_lsx_vaddi_bu:
3754	case Intrinsic::loongarch_lsx_vaddi_hu:
3755	case Intrinsic::loongarch_lsx_vaddi_wu:
3756	case Intrinsic::loongarch_lsx_vaddi_du:
3757	case Intrinsic::loongarch_lasx_xvaddi_bu:
3758	case Intrinsic::loongarch_lasx_xvaddi_hu:
3759	case Intrinsic::loongarch_lasx_xvaddi_wu:
3760	case Intrinsic::loongarch_lasx_xvaddi_du:
3761	return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3762	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
3763	case Intrinsic::loongarch_lsx_vsub_b:
3764	case Intrinsic::loongarch_lsx_vsub_h:
3765	case Intrinsic::loongarch_lsx_vsub_w:
3766	case Intrinsic::loongarch_lsx_vsub_d:
3767	case Intrinsic::loongarch_lasx_xvsub_b:
3768	case Intrinsic::loongarch_lasx_xvsub_h:
3769	case Intrinsic::loongarch_lasx_xvsub_w:
3770	case Intrinsic::loongarch_lasx_xvsub_d:
3771	return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3772	N2: N->getOperand(Num: `2`));
3773	case Intrinsic::loongarch_lsx_vsubi_bu:
3774	case Intrinsic::loongarch_lsx_vsubi_hu:
3775	case Intrinsic::loongarch_lsx_vsubi_wu:
3776	case Intrinsic::loongarch_lsx_vsubi_du:
3777	case Intrinsic::loongarch_lasx_xvsubi_bu:
3778	case Intrinsic::loongarch_lasx_xvsubi_hu:
3779	case Intrinsic::loongarch_lasx_xvsubi_wu:
3780	case Intrinsic::loongarch_lasx_xvsubi_du:
3781	return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3782	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
3783	case Intrinsic::loongarch_lsx_vneg_b:
3784	case Intrinsic::loongarch_lsx_vneg_h:
3785	case Intrinsic::loongarch_lsx_vneg_w:
3786	case Intrinsic::loongarch_lsx_vneg_d:
3787	case Intrinsic::loongarch_lasx_xvneg_b:
3788	case Intrinsic::loongarch_lasx_xvneg_h:
3789	case Intrinsic::loongarch_lasx_xvneg_w:
3790	case Intrinsic::loongarch_lasx_xvneg_d:
3791	return DAG.getNode(
3792	Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`),
3793	N1: DAG.getConstant(
3794	Val: APInt (N->getValueType(ResNo: `0`).getScalarType().getSizeInBits(), `0`,
3795	/isSigned=/true),
3796	DL: SDLoc (N), VT: N->getValueType(ResNo: `0`)),
3797	N2: N->getOperand(Num: `1`));
3798	case Intrinsic::loongarch_lsx_vmax_b:
3799	case Intrinsic::loongarch_lsx_vmax_h:
3800	case Intrinsic::loongarch_lsx_vmax_w:
3801	case Intrinsic::loongarch_lsx_vmax_d:
3802	case Intrinsic::loongarch_lasx_xvmax_b:
3803	case Intrinsic::loongarch_lasx_xvmax_h:
3804	case Intrinsic::loongarch_lasx_xvmax_w:
3805	case Intrinsic::loongarch_lasx_xvmax_d:
3806	return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3807	N2: N->getOperand(Num: `2`));
3808	case Intrinsic::loongarch_lsx_vmax_bu:
3809	case Intrinsic::loongarch_lsx_vmax_hu:
3810	case Intrinsic::loongarch_lsx_vmax_wu:
3811	case Intrinsic::loongarch_lsx_vmax_du:
3812	case Intrinsic::loongarch_lasx_xvmax_bu:
3813	case Intrinsic::loongarch_lasx_xvmax_hu:
3814	case Intrinsic::loongarch_lasx_xvmax_wu:
3815	case Intrinsic::loongarch_lasx_xvmax_du:
3816	return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3817	N2: N->getOperand(Num: `2`));
3818	case Intrinsic::loongarch_lsx_vmaxi_b:
3819	case Intrinsic::loongarch_lsx_vmaxi_h:
3820	case Intrinsic::loongarch_lsx_vmaxi_w:
3821	case Intrinsic::loongarch_lsx_vmaxi_d:
3822	case Intrinsic::loongarch_lasx_xvmaxi_b:
3823	case Intrinsic::loongarch_lasx_xvmaxi_h:
3824	case Intrinsic::loongarch_lasx_xvmaxi_w:
3825	case Intrinsic::loongarch_lasx_xvmaxi_d:
3826	return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3827	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG, /IsSigned=/true));
3828	case Intrinsic::loongarch_lsx_vmaxi_bu:
3829	case Intrinsic::loongarch_lsx_vmaxi_hu:
3830	case Intrinsic::loongarch_lsx_vmaxi_wu:
3831	case Intrinsic::loongarch_lsx_vmaxi_du:
3832	case Intrinsic::loongarch_lasx_xvmaxi_bu:
3833	case Intrinsic::loongarch_lasx_xvmaxi_hu:
3834	case Intrinsic::loongarch_lasx_xvmaxi_wu:
3835	case Intrinsic::loongarch_lasx_xvmaxi_du:
3836	return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3837	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
3838	case Intrinsic::loongarch_lsx_vmin_b:
3839	case Intrinsic::loongarch_lsx_vmin_h:
3840	case Intrinsic::loongarch_lsx_vmin_w:
3841	case Intrinsic::loongarch_lsx_vmin_d:
3842	case Intrinsic::loongarch_lasx_xvmin_b:
3843	case Intrinsic::loongarch_lasx_xvmin_h:
3844	case Intrinsic::loongarch_lasx_xvmin_w:
3845	case Intrinsic::loongarch_lasx_xvmin_d:
3846	return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3847	N2: N->getOperand(Num: `2`));
3848	case Intrinsic::loongarch_lsx_vmin_bu:
3849	case Intrinsic::loongarch_lsx_vmin_hu:
3850	case Intrinsic::loongarch_lsx_vmin_wu:
3851	case Intrinsic::loongarch_lsx_vmin_du:
3852	case Intrinsic::loongarch_lasx_xvmin_bu:
3853	case Intrinsic::loongarch_lasx_xvmin_hu:
3854	case Intrinsic::loongarch_lasx_xvmin_wu:
3855	case Intrinsic::loongarch_lasx_xvmin_du:
3856	return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3857	N2: N->getOperand(Num: `2`));
3858	case Intrinsic::loongarch_lsx_vmini_b:
3859	case Intrinsic::loongarch_lsx_vmini_h:
3860	case Intrinsic::loongarch_lsx_vmini_w:
3861	case Intrinsic::loongarch_lsx_vmini_d:
3862	case Intrinsic::loongarch_lasx_xvmini_b:
3863	case Intrinsic::loongarch_lasx_xvmini_h:
3864	case Intrinsic::loongarch_lasx_xvmini_w:
3865	case Intrinsic::loongarch_lasx_xvmini_d:
3866	return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3867	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG, /IsSigned=/true));
3868	case Intrinsic::loongarch_lsx_vmini_bu:
3869	case Intrinsic::loongarch_lsx_vmini_hu:
3870	case Intrinsic::loongarch_lsx_vmini_wu:
3871	case Intrinsic::loongarch_lsx_vmini_du:
3872	case Intrinsic::loongarch_lasx_xvmini_bu:
3873	case Intrinsic::loongarch_lasx_xvmini_hu:
3874	case Intrinsic::loongarch_lasx_xvmini_wu:
3875	case Intrinsic::loongarch_lasx_xvmini_du:
3876	return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3877	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
3878	case Intrinsic::loongarch_lsx_vmul_b:
3879	case Intrinsic::loongarch_lsx_vmul_h:
3880	case Intrinsic::loongarch_lsx_vmul_w:
3881	case Intrinsic::loongarch_lsx_vmul_d:
3882	case Intrinsic::loongarch_lasx_xvmul_b:
3883	case Intrinsic::loongarch_lasx_xvmul_h:
3884	case Intrinsic::loongarch_lasx_xvmul_w:
3885	case Intrinsic::loongarch_lasx_xvmul_d:
3886	return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3887	N2: N->getOperand(Num: `2`));
3888	case Intrinsic::loongarch_lsx_vmadd_b:
3889	case Intrinsic::loongarch_lsx_vmadd_h:
3890	case Intrinsic::loongarch_lsx_vmadd_w:
3891	case Intrinsic::loongarch_lsx_vmadd_d:
3892	case Intrinsic::loongarch_lasx_xvmadd_b:
3893	case Intrinsic::loongarch_lasx_xvmadd_h:
3894	case Intrinsic::loongarch_lasx_xvmadd_w:
3895	case Intrinsic::loongarch_lasx_xvmadd_d: {
3896	EVT ResTy = N->getValueType(ResNo: `0`);
3897	return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `1`),
3898	N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `2`),
3899	N2: N->getOperand(Num: `3`)));
3900	}
3901	case Intrinsic::loongarch_lsx_vmsub_b:
3902	case Intrinsic::loongarch_lsx_vmsub_h:
3903	case Intrinsic::loongarch_lsx_vmsub_w:
3904	case Intrinsic::loongarch_lsx_vmsub_d:
3905	case Intrinsic::loongarch_lasx_xvmsub_b:
3906	case Intrinsic::loongarch_lasx_xvmsub_h:
3907	case Intrinsic::loongarch_lasx_xvmsub_w:
3908	case Intrinsic::loongarch_lasx_xvmsub_d: {
3909	EVT ResTy = N->getValueType(ResNo: `0`);
3910	return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `1`),
3911	N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `2`),
3912	N2: N->getOperand(Num: `3`)));
3913	}
3914	case Intrinsic::loongarch_lsx_vdiv_b:
3915	case Intrinsic::loongarch_lsx_vdiv_h:
3916	case Intrinsic::loongarch_lsx_vdiv_w:
3917	case Intrinsic::loongarch_lsx_vdiv_d:
3918	case Intrinsic::loongarch_lasx_xvdiv_b:
3919	case Intrinsic::loongarch_lasx_xvdiv_h:
3920	case Intrinsic::loongarch_lasx_xvdiv_w:
3921	case Intrinsic::loongarch_lasx_xvdiv_d:
3922	return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3923	N2: N->getOperand(Num: `2`));
3924	case Intrinsic::loongarch_lsx_vdiv_bu:
3925	case Intrinsic::loongarch_lsx_vdiv_hu:
3926	case Intrinsic::loongarch_lsx_vdiv_wu:
3927	case Intrinsic::loongarch_lsx_vdiv_du:
3928	case Intrinsic::loongarch_lasx_xvdiv_bu:
3929	case Intrinsic::loongarch_lasx_xvdiv_hu:
3930	case Intrinsic::loongarch_lasx_xvdiv_wu:
3931	case Intrinsic::loongarch_lasx_xvdiv_du:
3932	return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3933	N2: N->getOperand(Num: `2`));
3934	case Intrinsic::loongarch_lsx_vmod_b:
3935	case Intrinsic::loongarch_lsx_vmod_h:
3936	case Intrinsic::loongarch_lsx_vmod_w:
3937	case Intrinsic::loongarch_lsx_vmod_d:
3938	case Intrinsic::loongarch_lasx_xvmod_b:
3939	case Intrinsic::loongarch_lasx_xvmod_h:
3940	case Intrinsic::loongarch_lasx_xvmod_w:
3941	case Intrinsic::loongarch_lasx_xvmod_d:
3942	return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3943	N2: N->getOperand(Num: `2`));
3944	case Intrinsic::loongarch_lsx_vmod_bu:
3945	case Intrinsic::loongarch_lsx_vmod_hu:
3946	case Intrinsic::loongarch_lsx_vmod_wu:
3947	case Intrinsic::loongarch_lsx_vmod_du:
3948	case Intrinsic::loongarch_lasx_xvmod_bu:
3949	case Intrinsic::loongarch_lasx_xvmod_hu:
3950	case Intrinsic::loongarch_lasx_xvmod_wu:
3951	case Intrinsic::loongarch_lasx_xvmod_du:
3952	return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3953	N2: N->getOperand(Num: `2`));
3954	case Intrinsic::loongarch_lsx_vand_v:
3955	case Intrinsic::loongarch_lasx_xvand_v:
3956	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3957	N2: N->getOperand(Num: `2`));
3958	case Intrinsic::loongarch_lsx_vor_v:
3959	case Intrinsic::loongarch_lasx_xvor_v:
3960	return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3961	N2: N->getOperand(Num: `2`));
3962	case Intrinsic::loongarch_lsx_vxor_v:
3963	case Intrinsic::loongarch_lasx_xvxor_v:
3964	return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3965	N2: N->getOperand(Num: `2`));
3966	case Intrinsic::loongarch_lsx_vnor_v:
3967	case Intrinsic::loongarch_lasx_xvnor_v: {
3968	SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3969	N2: N->getOperand(Num: `2`));
3970	return DAG.getNOT(DL, Val: Res, VT: Res ->getValueType(ResNo: `0`));
3971	}
3972	case Intrinsic::loongarch_lsx_vandi_b:
3973	case Intrinsic::loongarch_lasx_xvandi_b:
3974	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3975	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
3976	case Intrinsic::loongarch_lsx_vori_b:
3977	case Intrinsic::loongarch_lasx_xvori_b:
3978	return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3979	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
3980	case Intrinsic::loongarch_lsx_vxori_b:
3981	case Intrinsic::loongarch_lasx_xvxori_b:
3982	return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3983	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
3984	case Intrinsic::loongarch_lsx_vsll_b:
3985	case Intrinsic::loongarch_lsx_vsll_h:
3986	case Intrinsic::loongarch_lsx_vsll_w:
3987	case Intrinsic::loongarch_lsx_vsll_d:
3988	case Intrinsic::loongarch_lasx_xvsll_b:
3989	case Intrinsic::loongarch_lasx_xvsll_h:
3990	case Intrinsic::loongarch_lasx_xvsll_w:
3991	case Intrinsic::loongarch_lasx_xvsll_d:
3992	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3993	N2: truncateVecElts(Node: N, DAG));
3994	case Intrinsic::loongarch_lsx_vslli_b:
3995	case Intrinsic::loongarch_lasx_xvslli_b:
3996	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
3997	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
3998	case Intrinsic::loongarch_lsx_vslli_h:
3999	case Intrinsic::loongarch_lasx_xvslli_h:
4000	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4001	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
4002	case Intrinsic::loongarch_lsx_vslli_w:
4003	case Intrinsic::loongarch_lasx_xvslli_w:
4004	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4005	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
4006	case Intrinsic::loongarch_lsx_vslli_d:
4007	case Intrinsic::loongarch_lasx_xvslli_d:
4008	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4009	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
4010	case Intrinsic::loongarch_lsx_vsrl_b:
4011	case Intrinsic::loongarch_lsx_vsrl_h:
4012	case Intrinsic::loongarch_lsx_vsrl_w:
4013	case Intrinsic::loongarch_lsx_vsrl_d:
4014	case Intrinsic::loongarch_lasx_xvsrl_b:
4015	case Intrinsic::loongarch_lasx_xvsrl_h:
4016	case Intrinsic::loongarch_lasx_xvsrl_w:
4017	case Intrinsic::loongarch_lasx_xvsrl_d:
4018	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4019	N2: truncateVecElts(Node: N, DAG));
4020	case Intrinsic::loongarch_lsx_vsrli_b:
4021	case Intrinsic::loongarch_lasx_xvsrli_b:
4022	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4023	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
4024	case Intrinsic::loongarch_lsx_vsrli_h:
4025	case Intrinsic::loongarch_lasx_xvsrli_h:
4026	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4027	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
4028	case Intrinsic::loongarch_lsx_vsrli_w:
4029	case Intrinsic::loongarch_lasx_xvsrli_w:
4030	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4031	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
4032	case Intrinsic::loongarch_lsx_vsrli_d:
4033	case Intrinsic::loongarch_lasx_xvsrli_d:
4034	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4035	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
4036	case Intrinsic::loongarch_lsx_vsra_b:
4037	case Intrinsic::loongarch_lsx_vsra_h:
4038	case Intrinsic::loongarch_lsx_vsra_w:
4039	case Intrinsic::loongarch_lsx_vsra_d:
4040	case Intrinsic::loongarch_lasx_xvsra_b:
4041	case Intrinsic::loongarch_lasx_xvsra_h:
4042	case Intrinsic::loongarch_lasx_xvsra_w:
4043	case Intrinsic::loongarch_lasx_xvsra_d:
4044	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4045	N2: truncateVecElts(Node: N, DAG));
4046	case Intrinsic::loongarch_lsx_vsrai_b:
4047	case Intrinsic::loongarch_lasx_xvsrai_b:
4048	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4049	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
4050	case Intrinsic::loongarch_lsx_vsrai_h:
4051	case Intrinsic::loongarch_lasx_xvsrai_h:
4052	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4053	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
4054	case Intrinsic::loongarch_lsx_vsrai_w:
4055	case Intrinsic::loongarch_lasx_xvsrai_w:
4056	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4057	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
4058	case Intrinsic::loongarch_lsx_vsrai_d:
4059	case Intrinsic::loongarch_lasx_xvsrai_d:
4060	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4061	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
4062	case Intrinsic::loongarch_lsx_vclz_b:
4063	case Intrinsic::loongarch_lsx_vclz_h:
4064	case Intrinsic::loongarch_lsx_vclz_w:
4065	case Intrinsic::loongarch_lsx_vclz_d:
4066	case Intrinsic::loongarch_lasx_xvclz_b:
4067	case Intrinsic::loongarch_lasx_xvclz_h:
4068	case Intrinsic::loongarch_lasx_xvclz_w:
4069	case Intrinsic::loongarch_lasx_xvclz_d:
4070	return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: `0`), Operand: N->getOperand(Num: `1`));
4071	case Intrinsic::loongarch_lsx_vpcnt_b:
4072	case Intrinsic::loongarch_lsx_vpcnt_h:
4073	case Intrinsic::loongarch_lsx_vpcnt_w:
4074	case Intrinsic::loongarch_lsx_vpcnt_d:
4075	case Intrinsic::loongarch_lasx_xvpcnt_b:
4076	case Intrinsic::loongarch_lasx_xvpcnt_h:
4077	case Intrinsic::loongarch_lasx_xvpcnt_w:
4078	case Intrinsic::loongarch_lasx_xvpcnt_d:
4079	return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: `0`), Operand: N->getOperand(Num: `1`));
4080	case Intrinsic::loongarch_lsx_vbitclr_b:
4081	case Intrinsic::loongarch_lsx_vbitclr_h:
4082	case Intrinsic::loongarch_lsx_vbitclr_w:
4083	case Intrinsic::loongarch_lsx_vbitclr_d:
4084	case Intrinsic::loongarch_lasx_xvbitclr_b:
4085	case Intrinsic::loongarch_lasx_xvbitclr_h:
4086	case Intrinsic::loongarch_lasx_xvbitclr_w:
4087	case Intrinsic::loongarch_lasx_xvbitclr_d:
4088	return lowerVectorBitClear(Node: N, DAG);
4089	case Intrinsic::loongarch_lsx_vbitclri_b:
4090	case Intrinsic::loongarch_lasx_xvbitclri_b:
4091	return lowerVectorBitClearImm<`3`>(Node: N, DAG);
4092	case Intrinsic::loongarch_lsx_vbitclri_h:
4093	case Intrinsic::loongarch_lasx_xvbitclri_h:
4094	return lowerVectorBitClearImm<`4`>(Node: N, DAG);
4095	case Intrinsic::loongarch_lsx_vbitclri_w:
4096	case Intrinsic::loongarch_lasx_xvbitclri_w:
4097	return lowerVectorBitClearImm<`5`>(Node: N, DAG);
4098	case Intrinsic::loongarch_lsx_vbitclri_d:
4099	case Intrinsic::loongarch_lasx_xvbitclri_d:
4100	return lowerVectorBitClearImm<`6`>(Node: N, DAG);
4101	case Intrinsic::loongarch_lsx_vbitset_b:
4102	case Intrinsic::loongarch_lsx_vbitset_h:
4103	case Intrinsic::loongarch_lsx_vbitset_w:
4104	case Intrinsic::loongarch_lsx_vbitset_d:
4105	case Intrinsic::loongarch_lasx_xvbitset_b:
4106	case Intrinsic::loongarch_lasx_xvbitset_h:
4107	case Intrinsic::loongarch_lasx_xvbitset_w:
4108	case Intrinsic::loongarch_lasx_xvbitset_d: {
4109	EVT VecTy = N->getValueType(ResNo: `0`);
4110	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VecTy);
4111	return DAG.getNode(
4112	Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: `1`),
4113	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
4114	}
4115	case Intrinsic::loongarch_lsx_vbitseti_b:
4116	case Intrinsic::loongarch_lasx_xvbitseti_b:
4117	return lowerVectorBitSetImm<`3`>(Node: N, DAG);
4118	case Intrinsic::loongarch_lsx_vbitseti_h:
4119	case Intrinsic::loongarch_lasx_xvbitseti_h:
4120	return lowerVectorBitSetImm<`4`>(Node: N, DAG);
4121	case Intrinsic::loongarch_lsx_vbitseti_w:
4122	case Intrinsic::loongarch_lasx_xvbitseti_w:
4123	return lowerVectorBitSetImm<`5`>(Node: N, DAG);
4124	case Intrinsic::loongarch_lsx_vbitseti_d:
4125	case Intrinsic::loongarch_lasx_xvbitseti_d:
4126	return lowerVectorBitSetImm<`6`>(Node: N, DAG);
4127	case Intrinsic::loongarch_lsx_vbitrev_b:
4128	case Intrinsic::loongarch_lsx_vbitrev_h:
4129	case Intrinsic::loongarch_lsx_vbitrev_w:
4130	case Intrinsic::loongarch_lsx_vbitrev_d:
4131	case Intrinsic::loongarch_lasx_xvbitrev_b:
4132	case Intrinsic::loongarch_lasx_xvbitrev_h:
4133	case Intrinsic::loongarch_lasx_xvbitrev_w:
4134	case Intrinsic::loongarch_lasx_xvbitrev_d: {
4135	EVT VecTy = N->getValueType(ResNo: `0`);
4136	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VecTy);
4137	return DAG.getNode(
4138	Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: `1`),
4139	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
4140	}
4141	case Intrinsic::loongarch_lsx_vbitrevi_b:
4142	case Intrinsic::loongarch_lasx_xvbitrevi_b:
4143	return lowerVectorBitRevImm<`3`>(Node: N, DAG);
4144	case Intrinsic::loongarch_lsx_vbitrevi_h:
4145	case Intrinsic::loongarch_lasx_xvbitrevi_h:
4146	return lowerVectorBitRevImm<`4`>(Node: N, DAG);
4147	case Intrinsic::loongarch_lsx_vbitrevi_w:
4148	case Intrinsic::loongarch_lasx_xvbitrevi_w:
4149	return lowerVectorBitRevImm<`5`>(Node: N, DAG);
4150	case Intrinsic::loongarch_lsx_vbitrevi_d:
4151	case Intrinsic::loongarch_lasx_xvbitrevi_d:
4152	return lowerVectorBitRevImm<`6`>(Node: N, DAG);
4153	case Intrinsic::loongarch_lsx_vfadd_s:
4154	case Intrinsic::loongarch_lsx_vfadd_d:
4155	case Intrinsic::loongarch_lasx_xvfadd_s:
4156	case Intrinsic::loongarch_lasx_xvfadd_d:
4157	return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4158	N2: N->getOperand(Num: `2`));
4159	case Intrinsic::loongarch_lsx_vfsub_s:
4160	case Intrinsic::loongarch_lsx_vfsub_d:
4161	case Intrinsic::loongarch_lasx_xvfsub_s:
4162	case Intrinsic::loongarch_lasx_xvfsub_d:
4163	return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4164	N2: N->getOperand(Num: `2`));
4165	case Intrinsic::loongarch_lsx_vfmul_s:
4166	case Intrinsic::loongarch_lsx_vfmul_d:
4167	case Intrinsic::loongarch_lasx_xvfmul_s:
4168	case Intrinsic::loongarch_lasx_xvfmul_d:
4169	return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4170	N2: N->getOperand(Num: `2`));
4171	case Intrinsic::loongarch_lsx_vfdiv_s:
4172	case Intrinsic::loongarch_lsx_vfdiv_d:
4173	case Intrinsic::loongarch_lasx_xvfdiv_s:
4174	case Intrinsic::loongarch_lasx_xvfdiv_d:
4175	return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4176	N2: N->getOperand(Num: `2`));
4177	case Intrinsic::loongarch_lsx_vfmadd_s:
4178	case Intrinsic::loongarch_lsx_vfmadd_d:
4179	case Intrinsic::loongarch_lasx_xvfmadd_s:
4180	case Intrinsic::loongarch_lasx_xvfmadd_d:
4181	return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
4182	N2: N->getOperand(Num: `2`), N3: N->getOperand(Num: `3`));
4183	case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4184	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
4185	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
4186	N3: legalizeIntrinsicImmArg<`4`>(Node: N, ImmOp: `3`, DAG, Subtarget));
4187	case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4188	case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4189	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
4190	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
4191	N3: legalizeIntrinsicImmArg<`3`>(Node: N, ImmOp: `3`, DAG, Subtarget));
4192	case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4193	case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4194	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
4195	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
4196	N3: legalizeIntrinsicImmArg<`2`>(Node: N, ImmOp: `3`, DAG, Subtarget));
4197	case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4198	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
4199	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
4200	N3: legalizeIntrinsicImmArg<`1`>(Node: N, ImmOp: `3`, DAG, Subtarget));
4201	case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4202	case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4203	case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4204	case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4205	case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4206	case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4207	case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4208	case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4209	EVT ResTy = N->getValueType(ResNo: `0`);
4210	SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(Num: `1`));
4211	return DAG.getBuildVector(VT: ResTy, DL, Ops);
4212	}
4213	case Intrinsic::loongarch_lsx_vreplve_b:
4214	case Intrinsic::loongarch_lsx_vreplve_h:
4215	case Intrinsic::loongarch_lsx_vreplve_w:
4216	case Intrinsic::loongarch_lsx_vreplve_d:
4217	case Intrinsic::loongarch_lasx_xvreplve_b:
4218	case Intrinsic::loongarch_lasx_xvreplve_h:
4219	case Intrinsic::loongarch_lasx_xvreplve_w:
4220	case Intrinsic::loongarch_lasx_xvreplve_d:
4221	return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: `0`),
4222	N1: N->getOperand(Num: `1`),
4223	N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
4224	Operand: N->getOperand(Num: `2`)));
4225	}
4226	return SDValue ();
4227	}
4228
4229	SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4230	DAGCombinerInfo &DCI) const {
4231	SelectionDAG &DAG = DCI.DAG;
4232	switch (N->getOpcode()) {
4233	default:
4234	break;
4235	case ISD::AND:
4236	return performANDCombine(N, DAG, DCI, Subtarget);
4237	case ISD::OR:
4238	return performORCombine(N, DAG, DCI, Subtarget);
4239	case ISD::SETCC:
4240	return performSETCCCombine(N, DAG, DCI, Subtarget);
4241	case ISD::SRL:
4242	return performSRLCombine(N, DAG, DCI, Subtarget);
4243	case LoongArchISD::BITREV_W:
4244	return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4245	case ISD::INTRINSIC_WO_CHAIN:
4246	return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4247	}
4248	return SDValue ();
4249	}
4250
4251	static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4252	MachineBasicBlock *MBB) {
4253	if (!ZeroDivCheck)
4254	return MBB;
4255
4256	// Build instructions:
4257	// MBB:
4258	// div(or mod) $dst, $dividend, $divisor
4259	// bnez $divisor, SinkMBB
4260	// BreakMBB:
4261	// break 7 // BRK_DIVZERO
4262	// SinkMBB:
4263	// fallthrough
4264	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4265	MachineFunction::iterator It = ++MBB->getIterator();
4266	MachineFunction *MF = MBB->getParent();
4267	auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
4268	auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
4269	MF->insert(MBBI: It, MBB: BreakMBB);
4270	MF->insert(MBBI: It, MBB: SinkMBB);
4271
4272	// Transfer the remainder of MBB and its successor edges to SinkMBB.
4273	SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
4274	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
4275
4276	const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4277	DebugLoc DL = MI.getDebugLoc();
4278	MachineOperand &Divisor = MI.getOperand(i: `2`);
4279	Register DivisorReg = Divisor.getReg();
4280
4281	// MBB:
4282	BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNEZ))
4283	.addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill()))
4284	.addMBB(MBB: SinkMBB);
4285	MBB->addSuccessor(Succ: BreakMBB);
4286	MBB->addSuccessor(Succ: SinkMBB);
4287
4288	// BreakMBB:
4289	// See linux header file arch/loongarch/include/uapi/asm/break.h for the
4290	// definition of BRK_DIVZERO.
4291	BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: `7` /BRK_DIVZERO/);
4292	BreakMBB->addSuccessor(Succ: SinkMBB);
4293
4294	// Clear Divisor's kill flag.
4295	Divisor.setIsKill(false);
4296
4297	return SinkMBB;
4298	}
4299
4300	static MachineBasicBlock *
4301	emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4302	const LoongArchSubtarget &Subtarget) {
4303	unsigned CondOpc;
4304	switch (MI.getOpcode()) {
4305	default:
4306	llvm_unreachable("Unexpected opcode");
4307	case LoongArch::PseudoVBZ:
4308	CondOpc = LoongArch::VSETEQZ_V;
4309	break;
4310	case LoongArch::PseudoVBZ_B:
4311	CondOpc = LoongArch::VSETANYEQZ_B;
4312	break;
4313	case LoongArch::PseudoVBZ_H:
4314	CondOpc = LoongArch::VSETANYEQZ_H;
4315	break;
4316	case LoongArch::PseudoVBZ_W:
4317	CondOpc = LoongArch::VSETANYEQZ_W;
4318	break;
4319	case LoongArch::PseudoVBZ_D:
4320	CondOpc = LoongArch::VSETANYEQZ_D;
4321	break;
4322	case LoongArch::PseudoVBNZ:
4323	CondOpc = LoongArch::VSETNEZ_V;
4324	break;
4325	case LoongArch::PseudoVBNZ_B:
4326	CondOpc = LoongArch::VSETALLNEZ_B;
4327	break;
4328	case LoongArch::PseudoVBNZ_H:
4329	CondOpc = LoongArch::VSETALLNEZ_H;
4330	break;
4331	case LoongArch::PseudoVBNZ_W:
4332	CondOpc = LoongArch::VSETALLNEZ_W;
4333	break;
4334	case LoongArch::PseudoVBNZ_D:
4335	CondOpc = LoongArch::VSETALLNEZ_D;
4336	break;
4337	case LoongArch::PseudoXVBZ:
4338	CondOpc = LoongArch::XVSETEQZ_V;
4339	break;
4340	case LoongArch::PseudoXVBZ_B:
4341	CondOpc = LoongArch::XVSETANYEQZ_B;
4342	break;
4343	case LoongArch::PseudoXVBZ_H:
4344	CondOpc = LoongArch::XVSETANYEQZ_H;
4345	break;
4346	case LoongArch::PseudoXVBZ_W:
4347	CondOpc = LoongArch::XVSETANYEQZ_W;
4348	break;
4349	case LoongArch::PseudoXVBZ_D:
4350	CondOpc = LoongArch::XVSETANYEQZ_D;
4351	break;
4352	case LoongArch::PseudoXVBNZ:
4353	CondOpc = LoongArch::XVSETNEZ_V;
4354	break;
4355	case LoongArch::PseudoXVBNZ_B:
4356	CondOpc = LoongArch::XVSETALLNEZ_B;
4357	break;
4358	case LoongArch::PseudoXVBNZ_H:
4359	CondOpc = LoongArch::XVSETALLNEZ_H;
4360	break;
4361	case LoongArch::PseudoXVBNZ_W:
4362	CondOpc = LoongArch::XVSETALLNEZ_W;
4363	break;
4364	case LoongArch::PseudoXVBNZ_D:
4365	CondOpc = LoongArch::XVSETALLNEZ_D;
4366	break;
4367	}
4368
4369	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4370	const BasicBlock *LLVM_BB = BB->getBasicBlock();
4371	DebugLoc DL = MI.getDebugLoc();
4372	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4373	MachineFunction::iterator It = ++BB->getIterator();
4374
4375	MachineFunction *F = BB->getParent();
4376	MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
4377	MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
4378	MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
4379
4380	F->insert(MBBI: It, MBB: FalseBB);
4381	F->insert(MBBI: It, MBB: TrueBB);
4382	F->insert(MBBI: It, MBB: SinkBB);
4383
4384	// Transfer the remainder of MBB and its successor edges to Sink.
4385	SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
4386	SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
4387
4388	// Insert the real instruction to BB.
4389	Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
4390	BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: `1`).getReg());
4391
4392	// Insert branch.
4393	BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
4394	BB->addSuccessor(Succ: FalseBB);
4395	BB->addSuccessor(Succ: TrueBB);
4396
4397	// FalseBB.
4398	Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
4399	BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
4400	.addReg(RegNo: LoongArch::R0)
4401	.addImm(Val: `0`);
4402	BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
4403	FalseBB->addSuccessor(Succ: SinkBB);
4404
4405	// TrueBB.
4406	Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
4407	BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
4408	.addReg(RegNo: LoongArch::R0)
4409	.addImm(Val: `1`);
4410	TrueBB->addSuccessor(Succ: SinkBB);
4411
4412	// SinkBB: merge the results.
4413	BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
4414	DestReg: MI.getOperand(i: `0`).getReg())
4415	.addReg(RegNo: RD1)
4416	.addMBB(MBB: FalseBB)
4417	.addReg(RegNo: RD2)
4418	.addMBB(MBB: TrueBB);
4419
4420	// The pseudo instruction is gone now.
4421	MI.eraseFromParent();
4422	return SinkBB;
4423	}
4424
4425	static MachineBasicBlock *
4426	emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4427	const LoongArchSubtarget &Subtarget) {
4428	unsigned InsOp;
4429	unsigned HalfSize;
4430	switch (MI.getOpcode()) {
4431	default:
4432	llvm_unreachable("Unexpected opcode");
4433	case LoongArch::PseudoXVINSGR2VR_B:
4434	HalfSize = `16`;
4435	InsOp = LoongArch::VINSGR2VR_B;
4436	break;
4437	case LoongArch::PseudoXVINSGR2VR_H:
4438	HalfSize = `8`;
4439	InsOp = LoongArch::VINSGR2VR_H;
4440	break;
4441	}
4442	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4443	const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4444	const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4445	DebugLoc DL = MI.getDebugLoc();
4446	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4447	// XDst = vector_insert XSrc, Elt, Idx
4448	Register XDst = MI.getOperand(i: `0`).getReg();
4449	Register XSrc = MI.getOperand(i: `1`).getReg();
4450	Register Elt = MI.getOperand(i: `2`).getReg();
4451	unsigned Idx = MI.getOperand(i: `3`).getImm();
4452
4453	Register ScratchReg1 = XSrc;
4454	if (Idx >= HalfSize) {
4455	ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
4456	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1)
4457	.addReg(RegNo: XSrc)
4458	.addReg(RegNo: XSrc)
4459	.addImm(Val: `1`);
4460	}
4461
4462	Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
4463	Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
4464	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
4465	.addReg(RegNo: ScratchReg1, flags: `0`, SubReg: LoongArch::sub_128);
4466	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2)
4467	.addReg(RegNo: ScratchSubReg1)
4468	.addReg(RegNo: Elt)
4469	.addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx);
4470
4471	Register ScratchReg2 = XDst;
4472	if (Idx >= HalfSize)
4473	ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
4474
4475	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2)
4476	.addImm(Val: `0`)
4477	.addReg(RegNo: ScratchSubReg2)
4478	.addImm(Val: LoongArch::sub_128);
4479
4480	if (Idx >= HalfSize)
4481	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst)
4482	.addReg(RegNo: XSrc)
4483	.addReg(RegNo: ScratchReg2)
4484	.addImm(Val: `2`);
4485
4486	MI.eraseFromParent();
4487	return BB;
4488	}
4489
4490	MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4491	MachineInstr &MI, MachineBasicBlock BB) const* {
4492	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4493	DebugLoc DL = MI.getDebugLoc();
4494
4495	switch (MI.getOpcode()) {
4496	default:
4497	llvm_unreachable("Unexpected instr type to insert");
4498	case LoongArch::DIV_W:
4499	case LoongArch::DIV_WU:
4500	case LoongArch::MOD_W:
4501	case LoongArch::MOD_WU:
4502	case LoongArch::DIV_D:
4503	case LoongArch::DIV_DU:
4504	case LoongArch::MOD_D:
4505	case LoongArch::MOD_DU:
4506	return insertDivByZeroTrap(MI, MBB: BB);
4507	break;
4508	case LoongArch::WRFCSR: {
4509	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
4510	DestReg: LoongArch::FCSR0 + MI.getOperand(i: `0`).getImm())
4511	.addReg(RegNo: MI.getOperand(i: `1`).getReg());
4512	MI.eraseFromParent();
4513	return BB;
4514	}
4515	case LoongArch::RDFCSR: {
4516	MachineInstr *ReadFCSR =
4517	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
4518	DestReg: MI.getOperand(i: `0`).getReg())
4519	.addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: `1`).getImm());
4520	ReadFCSR->getOperand(i: `1`).setIsUndef();
4521	MI.eraseFromParent();
4522	return BB;
4523	}
4524	case LoongArch::PseudoVBZ:
4525	case LoongArch::PseudoVBZ_B:
4526	case LoongArch::PseudoVBZ_H:
4527	case LoongArch::PseudoVBZ_W:
4528	case LoongArch::PseudoVBZ_D:
4529	case LoongArch::PseudoVBNZ:
4530	case LoongArch::PseudoVBNZ_B:
4531	case LoongArch::PseudoVBNZ_H:
4532	case LoongArch::PseudoVBNZ_W:
4533	case LoongArch::PseudoVBNZ_D:
4534	case LoongArch::PseudoXVBZ:
4535	case LoongArch::PseudoXVBZ_B:
4536	case LoongArch::PseudoXVBZ_H:
4537	case LoongArch::PseudoXVBZ_W:
4538	case LoongArch::PseudoXVBZ_D:
4539	case LoongArch::PseudoXVBNZ:
4540	case LoongArch::PseudoXVBNZ_B:
4541	case LoongArch::PseudoXVBNZ_H:
4542	case LoongArch::PseudoXVBNZ_W:
4543	case LoongArch::PseudoXVBNZ_D:
4544	return emitVecCondBranchPseudo(MI, BB, Subtarget);
4545	case LoongArch::PseudoXVINSGR2VR_B:
4546	case LoongArch::PseudoXVINSGR2VR_H:
4547	return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4548	}
4549	}
4550
4551	bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4552	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4553	unsigned Fast) const* {
4554	if (!Subtarget.hasUAL())
4555	return false;
4556
4557	// TODO: set reasonable speed number.
4558	if (Fast)
4559	*Fast = `1`;
4560	return true;
4561	}
4562
4563	const char LoongArchTargetLowering::getTargetNodeName(unsigned* Opcode) const {
4564	switch ((LoongArchISD::NodeType)Opcode) {
4565	case LoongArchISD::FIRST_NUMBER:
4566	break;
4567
4568	#define NODE_NAME_CASE(node) \
4569	case LoongArchISD::node: \
4570	return "LoongArchISD::" #node;
4571
4572	// TODO: Add more target-dependent nodes later.
4573	NODE_NAME_CASE(CALL)
4574	NODE_NAME_CASE(CALL_MEDIUM)
4575	NODE_NAME_CASE(CALL_LARGE)
4576	NODE_NAME_CASE(RET)
4577	NODE_NAME_CASE(TAIL)
4578	NODE_NAME_CASE(TAIL_MEDIUM)
4579	NODE_NAME_CASE(TAIL_LARGE)
4580	NODE_NAME_CASE(SLL_W)
4581	NODE_NAME_CASE(SRA_W)
4582	NODE_NAME_CASE(SRL_W)
4583	NODE_NAME_CASE(BSTRINS)
4584	NODE_NAME_CASE(BSTRPICK)
4585	NODE_NAME_CASE(MOVGR2FR_W_LA64)
4586	NODE_NAME_CASE(MOVFR2GR_S_LA64)
4587	NODE_NAME_CASE(FTINT)
4588	NODE_NAME_CASE(REVB_2H)
4589	NODE_NAME_CASE(REVB_2W)
4590	NODE_NAME_CASE(BITREV_4B)
4591	NODE_NAME_CASE(BITREV_W)
4592	NODE_NAME_CASE(ROTR_W)
4593	NODE_NAME_CASE(ROTL_W)
4594	NODE_NAME_CASE(DIV_WU)
4595	NODE_NAME_CASE(MOD_WU)
4596	NODE_NAME_CASE(CLZ_W)
4597	NODE_NAME_CASE(CTZ_W)
4598	NODE_NAME_CASE(DBAR)
4599	NODE_NAME_CASE(IBAR)
4600	NODE_NAME_CASE(BREAK)
4601	NODE_NAME_CASE(SYSCALL)
4602	NODE_NAME_CASE(CRC_W_B_W)
4603	NODE_NAME_CASE(CRC_W_H_W)
4604	NODE_NAME_CASE(CRC_W_W_W)
4605	NODE_NAME_CASE(CRC_W_D_W)
4606	NODE_NAME_CASE(CRCC_W_B_W)
4607	NODE_NAME_CASE(CRCC_W_H_W)
4608	NODE_NAME_CASE(CRCC_W_W_W)
4609	NODE_NAME_CASE(CRCC_W_D_W)
4610	NODE_NAME_CASE(CSRRD)
4611	NODE_NAME_CASE(CSRWR)
4612	NODE_NAME_CASE(CSRXCHG)
4613	NODE_NAME_CASE(IOCSRRD_B)
4614	NODE_NAME_CASE(IOCSRRD_H)
4615	NODE_NAME_CASE(IOCSRRD_W)
4616	NODE_NAME_CASE(IOCSRRD_D)
4617	NODE_NAME_CASE(IOCSRWR_B)
4618	NODE_NAME_CASE(IOCSRWR_H)
4619	NODE_NAME_CASE(IOCSRWR_W)
4620	NODE_NAME_CASE(IOCSRWR_D)
4621	NODE_NAME_CASE(CPUCFG)
4622	NODE_NAME_CASE(MOVGR2FCSR)
4623	NODE_NAME_CASE(MOVFCSR2GR)
4624	NODE_NAME_CASE(CACOP_D)
4625	NODE_NAME_CASE(CACOP_W)
4626	NODE_NAME_CASE(VSHUF)
4627	NODE_NAME_CASE(VPICKEV)
4628	NODE_NAME_CASE(VPICKOD)
4629	NODE_NAME_CASE(VPACKEV)
4630	NODE_NAME_CASE(VPACKOD)
4631	NODE_NAME_CASE(VILVL)
4632	NODE_NAME_CASE(VILVH)
4633	NODE_NAME_CASE(VSHUF4I)
4634	NODE_NAME_CASE(VREPLVEI)
4635	NODE_NAME_CASE(XVPERMI)
4636	NODE_NAME_CASE(VPICK_SEXT_ELT)
4637	NODE_NAME_CASE(VPICK_ZEXT_ELT)
4638	NODE_NAME_CASE(VREPLVE)
4639	NODE_NAME_CASE(VALL_ZERO)
4640	NODE_NAME_CASE(VANY_ZERO)
4641	NODE_NAME_CASE(VALL_NONZERO)
4642	NODE_NAME_CASE(VANY_NONZERO)
4643	}
4644	#undef NODE_NAME_CASE
4645	return nullptr;
4646	}
4647
4648	//===----------------------------------------------------------------------===//
4649	// Calling Convention Implementation
4650	//===----------------------------------------------------------------------===//
4651
4652	// Eight general-purpose registers a0-a7 used for passing integer arguments,
4653	// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4654	// fixed-point arguments, and floating-point arguments when no FPR is available
4655	// or with soft float ABI.
4656	const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4657	LoongArch::R7, LoongArch::R8, LoongArch::R9,
4658	LoongArch::R10, LoongArch::R11};
4659	// Eight floating-point registers fa0-fa7 used for passing floating-point
4660	// arguments, and fa0-fa1 are also used to return values.
4661	const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4662	LoongArch::F3, LoongArch::F4, LoongArch::F5,
4663	LoongArch::F6, LoongArch::F7};
4664	// FPR32 and FPR64 alias each other.
4665	const MCPhysReg ArgFPR64s[] = {
4666	LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4667	LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4668
4669	const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4670	LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4671	LoongArch::VR6, LoongArch::VR7};
4672
4673	const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4674	LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4675	LoongArch::XR6, LoongArch::XR7};
4676
4677	// Pass a 2GRLen argument that has been split into two GRLen values through*
4678	// registers or the stack as necessary.
4679	static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4680	CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4681	unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4682	ISD::ArgFlagsTy ArgFlags2) {
4683	unsigned GRLenInBytes = GRLen / `8`;
4684	if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
4685	// At least one half can be passed via register.
4686	State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg,
4687	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
4688	} else {
4689	// Both halves must be passed on the stack, with proper alignment.
4690	Align StackAlign =
4691	std::max(a: Align (GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
4692	State.addLoc(
4693	V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
4694	Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
4695	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
4696	State.addLoc(V: CCValAssign::getMem(
4697	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align (GRLenInBytes)),
4698	LocVT: LocVT2, HTP: CCValAssign::Full));
4699	return false;
4700	}
4701	if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
4702	// The second half can also be passed via register.
4703	State.addLoc(
4704	V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
4705	} else {
4706	// The second half is passed via the stack, without additional alignment.
4707	State.addLoc(V: CCValAssign::getMem(
4708	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align (GRLenInBytes)),
4709	LocVT: LocVT2, HTP: CCValAssign::Full));
4710	}
4711	return false;
4712	}
4713
4714	// Implements the LoongArch calling convention. Returns true upon failure.
4715	static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4716	unsigned ValNo, MVT ValVT,
4717	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4718	CCState &State, bool IsFixed, bool IsRet,
4719	Type *OrigTy) {
4720	unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4721	assert((GRLen == `32` \|\| GRLen == `64`) && "Unspport GRLen");
4722	MVT GRLenVT = GRLen == `32` ? MVT::i32 : MVT::i64;
4723	MVT LocVT = ValVT;
4724
4725	// Any return value split into more than two values can't be returned
4726	// directly.
4727	if (IsRet && ValNo > `1`)
4728	return true;
4729
4730	// If passing a variadic argument, or if no FPR is available.
4731	bool UseGPRForFloat = true;
4732
4733	switch (ABI) {
4734	default:
4735	llvm_unreachable("Unexpected ABI");
4736	break;
4737	case LoongArchABI::ABI_ILP32F:
4738	case LoongArchABI::ABI_LP64F:
4739	case LoongArchABI::ABI_ILP32D:
4740	case LoongArchABI::ABI_LP64D:
4741	UseGPRForFloat = !IsFixed;
4742	break;
4743	case LoongArchABI::ABI_ILP32S:
4744	case LoongArchABI::ABI_LP64S:
4745	break;
4746	}
4747
4748	// FPR32 and FPR64 alias each other.
4749	if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
4750	UseGPRForFloat = true;
4751
4752	if (UseGPRForFloat && ValVT == MVT::f32) {
4753	LocVT = GRLenVT;
4754	LocInfo = CCValAssign::BCvt;
4755	} else if (UseGPRForFloat && GRLen == `64` && ValVT == MVT::f64) {
4756	LocVT = MVT::i64;
4757	LocInfo = CCValAssign::BCvt;
4758	} else if (UseGPRForFloat && GRLen == `32` && ValVT == MVT::f64) {
4759	// TODO: Handle passing f64 on LA32 with D feature.
4760	report_fatal_error(reason: "Passing f64 with GPR on LA32 is undefined");
4761	}
4762
4763	// If this is a variadic argument, the LoongArch calling convention requires
4764	// that it is assigned an 'even' or 'aligned' register if it has (2GRLen)/8*
4765	// byte alignment. An aligned register should be used regardless of whether
4766	// the original argument was split during legalisation or not. The argument
4767	// will not be passed by registers if the original type is larger than
4768	// 2GRLen, so the register alignment rule does not apply.*
4769	unsigned TwoGRLenInBytes = (`2` * GRLen) / `8`;
4770	if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4771	DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
4772	unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
4773	// Skip 'odd' register if necessary.
4774	if (RegIdx != std::size(ArgGPRs) && RegIdx % `2` == `1`)
4775	State.AllocateReg(Regs: ArgGPRs);
4776	}
4777
4778	SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4779	SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4780	State.getPendingArgFlags();
4781
4782	assert(PendingLocs.size() == PendingArgFlags.size() &&
4783	"PendingLocs and PendingArgFlags out of sync");
4784
4785	// Split arguments might be passed indirectly, so keep track of the pending
4786	// values.
4787	if (ValVT.isScalarInteger() && (ArgFlags.isSplit() \|\| !PendingLocs.empty())) {
4788	LocVT = GRLenVT;
4789	LocInfo = CCValAssign::Indirect;
4790	PendingLocs.push_back(
4791	Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
4792	PendingArgFlags.push_back(Elt: ArgFlags);
4793	if (!ArgFlags.isSplitEnd()) {
4794	return false;
4795	}
4796	}
4797
4798	// If the split argument only had two elements, it should be passed directly
4799	// in registers or on the stack.
4800	if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4801	PendingLocs.size() <= `2`) {
4802	assert(PendingLocs.size() == `2` && "Unexpected PendingLocs.size()");
4803	// Apply the normal calling convention rules to the first half of the
4804	// split argument.
4805	CCValAssign VA = PendingLocs [`0`];
4806	ISD::ArgFlagsTy AF = PendingArgFlags [`0`];
4807	PendingLocs.clear();
4808	PendingArgFlags.clear();
4809	return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
4810	ArgFlags2: ArgFlags);
4811	}
4812
4813	// Allocate to a register if possible, or else a stack slot.
4814	Register Reg;
4815	unsigned StoreSizeBytes = GRLen / `8`;
4816	Align StackAlign = Align (GRLen / `8`);
4817
4818	if (ValVT == MVT::f32 && !UseGPRForFloat)
4819	Reg = State.AllocateReg(Regs: ArgFPR32s);
4820	else if (ValVT == MVT::f64 && !UseGPRForFloat)
4821	Reg = State.AllocateReg(Regs: ArgFPR64s);
4822	else if (ValVT.is128BitVector())
4823	Reg = State.AllocateReg(Regs: ArgVRs);
4824	else if (ValVT.is256BitVector())
4825	Reg = State.AllocateReg(Regs: ArgXRs);
4826	else
4827	Reg = State.AllocateReg(Regs: ArgGPRs);
4828
4829	unsigned StackOffset =
4830	Reg ? `0` : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
4831
4832	// If we reach this point and PendingLocs is non-empty, we must be at the
4833	// end of a split argument that must be passed indirectly.
4834	if (!PendingLocs.empty()) {
4835	assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4836	assert(PendingLocs.size() > `2` && "Unexpected PendingLocs.size()");
4837	for (auto &It : PendingLocs) {
4838	if (Reg)
4839	It.convertToReg(RegNo: Reg);
4840	else
4841	It.convertToMem(Offset: StackOffset);
4842	State.addLoc(V: It);
4843	}
4844	PendingLocs.clear();
4845	PendingArgFlags.clear();
4846	return false;
4847	}
4848	assert((!UseGPRForFloat \|\| LocVT == GRLenVT) &&
4849	"Expected an GRLenVT at this stage");
4850
4851	if (Reg) {
4852	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
4853	return false;
4854	}
4855
4856	// When a floating-point value is passed on the stack, no bit-cast is needed.
4857	if (ValVT.isFloatingPoint()) {
4858	LocVT = ValVT;
4859	LocInfo = CCValAssign::Full;
4860	}
4861
4862	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
4863	return false;
4864	}
4865
4866	void LoongArchTargetLowering::analyzeInputArgs(
4867	MachineFunction &MF, CCState &CCInfo,
4868	const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4869	LoongArchCCAssignFn Fn) const {
4870	FunctionType *FType = MF.getFunction().getFunctionType();
4871	for (unsigned i = `0`, e = Ins.size(); i != e; ++i) {
4872	MVT ArgVT = Ins [i].VT;
4873	Type ArgTy = nullptr*;
4874	if (IsRet)
4875	ArgTy = FType->getReturnType();
4876	else if (Ins [i].isOrigArg())
4877	ArgTy = FType->getParamType(i: Ins [i].getOrigArgIndex());
4878	LoongArchABI::ABI ABI =
4879	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4880	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins [i].Flags,
4881	CCInfo, /IsFixed=/true, IsRet, ArgTy)) {
4882	LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4883	<< `'\n'`);
4884	llvm_unreachable("");
4885	}
4886	}
4887	}
4888
4889	void LoongArchTargetLowering::analyzeOutputArgs(
4890	MachineFunction &MF, CCState &CCInfo,
4891	const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4892	CallLoweringInfo CLI, LoongArchCCAssignFn Fn) const* {
4893	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
4894	MVT ArgVT = Outs [i].VT;
4895	Type OrigTy = CLI ? CLI->getArgs()[Outs [i].OrigArgIndex].Ty : nullptr*;
4896	LoongArchABI::ABI ABI =
4897	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4898	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs [i].Flags,
4899	CCInfo, Outs [i].IsFixed, IsRet, OrigTy)) {
4900	LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4901	<< "\n");
4902	llvm_unreachable("");
4903	}
4904	}
4905	}
4906
4907	// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4908	// values.
4909	static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
4910	const CCValAssign &VA, const SDLoc &DL) {
4911	switch (VA.getLocInfo()) {
4912	default:
4913	llvm_unreachable("Unexpected CCValAssign::LocInfo");
4914	case CCValAssign::Full:
4915	case CCValAssign::Indirect:
4916	break;
4917	case CCValAssign::BCvt:
4918	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4919	Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
4920	else
4921	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
4922	break;
4923	}
4924	return Val;
4925	}
4926
4927	static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
4928	const CCValAssign &VA, const SDLoc &DL,
4929	const ISD::InputArg &In,
4930	const LoongArchTargetLowering &TLI) {
4931	MachineFunction &MF = DAG.getMachineFunction();
4932	MachineRegisterInfo &RegInfo = MF.getRegInfo();
4933	EVT LocVT = VA.getLocVT();
4934	SDValue Val;
4935	const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
4936	Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
4937	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
4938	Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
4939
4940	// If input is sign extended from 32 bits, note it for the OptW pass.
4941	if (In.isOrigArg()) {
4942	Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
4943	if (OrigArg->getType()->isIntegerTy()) {
4944	unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4945	// An input zero extended from i31 can also be considered sign extended.
4946	if ((BitWidth <= `32` && In.Flags.isSExt()) \|\|
4947	(BitWidth < `32` && In.Flags.isZExt())) {
4948	LoongArchMachineFunctionInfo *LAFI =
4949	MF.getInfo<LoongArchMachineFunctionInfo>();
4950	LAFI->addSExt32Register(Reg: VReg);
4951	}
4952	}
4953	}
4954
4955	return convertLocVTToValVT(DAG, Val, VA, DL);
4956	}
4957
4958	// The caller is responsible for loading the full value if the argument is
4959	// passed with CCValAssign::Indirect.
4960	static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
4961	const CCValAssign &VA, const SDLoc &DL) {
4962	MachineFunction &MF = DAG.getMachineFunction();
4963	MachineFrameInfo &MFI = MF.getFrameInfo();
4964	EVT ValVT = VA.getValVT();
4965	int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
4966	/IsImmutable=/true);
4967	SDValue FIN = DAG.getFrameIndex(
4968	FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: `0`)));
4969
4970	ISD::LoadExtType ExtType;
4971	switch (VA.getLocInfo()) {
4972	default:
4973	llvm_unreachable("Unexpected CCValAssign::LocInfo");
4974	case CCValAssign::Full:
4975	case CCValAssign::Indirect:
4976	case CCValAssign::BCvt:
4977	ExtType = ISD::NON_EXTLOAD;
4978	break;
4979	}
4980	return DAG.getExtLoad(
4981	ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
4982	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
4983	}
4984
4985	static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
4986	const CCValAssign &VA, const SDLoc &DL) {
4987	EVT LocVT = VA.getLocVT();
4988
4989	switch (VA.getLocInfo()) {
4990	default:
4991	llvm_unreachable("Unexpected CCValAssign::LocInfo");
4992	case CCValAssign::Full:
4993	break;
4994	case CCValAssign::BCvt:
4995	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4996	Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
4997	else
4998	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
4999	break;
5000	}
5001	return Val;
5002	}
5003
5004	static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5005	CCValAssign::LocInfo LocInfo,
5006	ISD::ArgFlagsTy ArgFlags, CCState &State) {
5007	if (LocVT == MVT::i32 \|\| LocVT == MVT::i64) {
5008	// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5009	// s0 s1 s2 s3 s4 s5 s6 s7 s8
5010	static const MCPhysReg GPRList[] = {
5011	LoongArch::R23, LoongArch::R24, LoongArch::R25,
5012	LoongArch::R26, LoongArch::R27, LoongArch::R28,
5013	LoongArch::R29, LoongArch::R30, LoongArch::R31};
5014	if (unsigned Reg = State.AllocateReg(Regs: GPRList)) {
5015	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
5016	return false;
5017	}
5018	}
5019
5020	if (LocVT == MVT::f32) {
5021	// Pass in STG registers: F1, F2, F3, F4
5022	// fs0,fs1,fs2,fs3
5023	static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5024	LoongArch::F26, LoongArch::F27};
5025	if (unsigned Reg = State.AllocateReg(Regs: FPR32List)) {
5026	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
5027	return false;
5028	}
5029	}
5030
5031	if (LocVT == MVT::f64) {
5032	// Pass in STG registers: D1, D2, D3, D4
5033	// fs4,fs5,fs6,fs7
5034	static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5035	LoongArch::F30_64, LoongArch::F31_64};
5036	if (unsigned Reg = State.AllocateReg(Regs: FPR64List)) {
5037	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
5038	return false;
5039	}
5040	}
5041
5042	report_fatal_error(reason: "No registers left in GHC calling convention");
5043	return true;
5044	}
5045
5046	// Transform physical registers into virtual registers.
5047	SDValue LoongArchTargetLowering::LowerFormalArguments(
5048	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5049	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5050	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5051
5052	MachineFunction &MF = DAG.getMachineFunction();
5053
5054	switch (CallConv) {
5055	default:
5056	llvm_unreachable("Unsupported calling convention");
5057	case CallingConv::C:
5058	case CallingConv::Fast:
5059	break;
5060	case CallingConv::GHC:
5061	if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) \|\|
5062	!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
5063	report_fatal_error(
5064	reason: "GHC calling convention requires the F and D extensions");
5065	}
5066
5067	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
5068	MVT GRLenVT = Subtarget.getGRLenVT();
5069	unsigned GRLenInBytes = Subtarget.getGRLen() / `8`;
5070	// Used with varargs to acumulate store chains.
5071	std::vector<SDValue> OutChains;
5072
5073	// Assign locations to all of the incoming arguments.
5074	SmallVector<CCValAssign> ArgLocs;
5075	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5076
5077	if (CallConv == CallingConv::GHC)
5078	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
5079	else
5080	analyzeInputArgs(MF, CCInfo, Ins, /IsRet=/false, Fn: CC_LoongArch);
5081
5082	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
5083	CCValAssign &VA = ArgLocs [i];
5084	SDValue ArgValue;
5085	if (VA.isRegLoc())
5086	ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins [i], TLI: *this);
5087	else
5088	ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5089	if (VA.getLocInfo() == CCValAssign::Indirect) {
5090	// If the original argument was split and passed by reference, we need to
5091	// load all parts of it here (using the same address).
5092	InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
5093	PtrInfo: MachinePointerInfo ()));
5094	unsigned ArgIndex = Ins [i].OrigArgIndex;
5095	unsigned ArgPartOffset = Ins [i].PartOffset;
5096	assert(ArgPartOffset == `0`);
5097	while (i + `1` != e && Ins [i + `1`].OrigArgIndex == ArgIndex) {
5098	CCValAssign &PartVA = ArgLocs [i + `1`];
5099	unsigned PartOffset = Ins [i + `1`].PartOffset - ArgPartOffset;
5100	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
5101	SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
5102	InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
5103	PtrInfo: MachinePointerInfo ()));
5104	++i;
5105	}
5106	continue;
5107	}
5108	InVals.push_back(Elt: ArgValue);
5109	}
5110
5111	if (IsVarArg) {
5112	ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5113	unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
5114	const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5115	MachineFrameInfo &MFI = MF.getFrameInfo();
5116	MachineRegisterInfo &RegInfo = MF.getRegInfo();
5117	auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5118
5119	// Offset of the first variable argument from stack pointer, and size of
5120	// the vararg save area. For now, the varargs save area is either zero or
5121	// large enough to hold a0-a7.
5122	int VaArgOffset, VarArgsSaveSize;
5123
5124	// If all registers are allocated, then all varargs must be passed on the
5125	// stack and we don't need to save any argregs.
5126	if (ArgRegs.size() == Idx) {
5127	VaArgOffset = CCInfo.getStackSize();
5128	VarArgsSaveSize = `0`;
5129	} else {
5130	VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5131	VaArgOffset = -VarArgsSaveSize;
5132	}
5133
5134	// Record the frame index of the first variable argument
5135	// which is a value necessary to VASTART.
5136	int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
5137	LoongArchFI->setVarArgsFrameIndex(FI);
5138
5139	// If saving an odd number of registers then create an extra stack slot to
5140	// ensure that the frame pointer is 2GRLen-aligned, which in turn ensures*
5141	// offsets to even-numbered registered remain 2GRLen-aligned.*
5142	if (Idx % `2`) {
5143	MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
5144	IsImmutable: true);
5145	VarArgsSaveSize += GRLenInBytes;
5146	}
5147
5148	// Copy the integer registers that may have been used for passing varargs
5149	// to the vararg save area.
5150	for (unsigned I = Idx; I < ArgRegs.size();
5151	++I, VaArgOffset += GRLenInBytes) {
5152	const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
5153	RegInfo.addLiveIn(Reg: ArgRegs [I], vreg: Reg);
5154	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
5155	FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
5156	SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
5157	SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
5158	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
5159	cast<StoreSDNode>(Val: Store.getNode())
5160	->getMemOperand()
5161	->setValue((Value )nullptr*);
5162	OutChains.push_back(x: Store);
5163	}
5164	LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5165	}
5166
5167	// All stores are grouped in one node to allow the matching between
5168	// the size of Ins and InVals. This only happens for vararg functions.
5169	if (!OutChains.empty()) {
5170	OutChains.push_back(x: Chain);
5171	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
5172	}
5173
5174	return Chain;
5175	}
5176
5177	bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
5178	return CI->isTailCall();
5179	}
5180
5181	// Check if the return value is used as only a return value, as otherwise
5182	// we can't perform a tail-call.
5183	bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5184	SDValue &Chain) const {
5185	if (N->getNumValues() != `1`)
5186	return false;
5187	if (!N->hasNUsesOfValue(NUses: `1`, Value: `0`))
5188	return false;
5189
5190	SDNode Copy = N->use_begin();
5191	if (Copy->getOpcode() != ISD::CopyToReg)
5192	return false;
5193
5194	// If the ISD::CopyToReg has a glue operand, we conservatively assume it
5195	// isn't safe to perform a tail call.
5196	if (Copy->getGluedNode())
5197	return false;
5198
5199	// The copy must be used by a LoongArchISD::RET, and nothing else.
5200	bool HasRet = false;
5201	for (SDNode *Node : Copy->uses()) {
5202	if (Node->getOpcode() != LoongArchISD::RET)
5203	return false;
5204	HasRet = true;
5205	}
5206
5207	if (!HasRet)
5208	return false;
5209
5210	Chain = Copy->getOperand(Num: `0`);
5211	return true;
5212	}
5213
5214	// Check whether the call is eligible for tail call optimization.
5215	bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5216	CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5217	const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5218
5219	auto CalleeCC = CLI.CallConv;
5220	auto &Outs = CLI.Outs;
5221	auto &Caller = MF.getFunction();
5222	auto CallerCC = Caller.getCallingConv();
5223
5224	// Do not tail call opt if the stack is used to pass parameters.
5225	if (CCInfo.getStackSize() != `0`)
5226	return false;
5227
5228	// Do not tail call opt if any parameters need to be passed indirectly.
5229	for (auto &VA : ArgLocs)
5230	if (VA.getLocInfo() == CCValAssign::Indirect)
5231	return false;
5232
5233	// Do not tail call opt if either caller or callee uses struct return
5234	// semantics.
5235	auto IsCallerStructRet = Caller.hasStructRetAttr();
5236	auto IsCalleeStructRet = Outs.empty() ? false : Outs [`0`].Flags.isSRet();
5237	if (IsCallerStructRet \|\| IsCalleeStructRet)
5238	return false;
5239
5240	// Do not tail call opt if either the callee or caller has a byval argument.
5241	for (auto &Arg : Outs)
5242	if (Arg.Flags.isByVal())
5243	return false;
5244
5245	// The callee has to preserve all registers the caller needs to preserve.
5246	const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5247	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5248	if (CalleeCC != CallerCC) {
5249	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5250	if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
5251	return false;
5252	}
5253	return true;
5254	}
5255
5256	static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5257	return DAG.getDataLayout().getPrefTypeAlign(
5258	Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
5259	}
5260
5261	// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5262	// and output parameter nodes.
5263	SDValue
5264	LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5265	SmallVectorImpl<SDValue> &InVals) const {
5266	SelectionDAG &DAG = CLI.DAG;
5267	SDLoc &DL = CLI.DL;
5268	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5269	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5270	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5271	SDValue Chain = CLI.Chain;
5272	SDValue Callee = CLI.Callee;
5273	CallingConv::ID CallConv = CLI.CallConv;
5274	bool IsVarArg = CLI.IsVarArg;
5275	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
5276	MVT GRLenVT = Subtarget.getGRLenVT();
5277	bool &IsTailCall = CLI.IsTailCall;
5278
5279	MachineFunction &MF = DAG.getMachineFunction();
5280
5281	// Analyze the operands of the call, assigning locations to each operand.
5282	SmallVector<CCValAssign> ArgLocs;
5283	CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5284
5285	if (CallConv == CallingConv::GHC)
5286	ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
5287	else
5288	analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /IsRet=/false, CLI: &CLI, Fn: CC_LoongArch);
5289
5290	// Check if it's really possible to do a tail call.
5291	if (IsTailCall)
5292	IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
5293
5294	if (IsTailCall)
5295	++NumTailCalls;
5296	else if (CLI.CB && CLI.CB->isMustTailCall())
5297	report_fatal_error(reason: "failed to perform tail call elimination on a call "
5298	"site marked musttail");
5299
5300	// Get a count of how many bytes are to be pushed on the stack.
5301	unsigned NumBytes = ArgCCInfo.getStackSize();
5302
5303	// Create local copies for byval args.
5304	SmallVector<SDValue> ByValArgs;
5305	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
5306	ISD::ArgFlagsTy Flags = Outs [i].Flags;
5307	if (!Flags.isByVal())
5308	continue;
5309
5310	SDValue Arg = OutVals [i];
5311	unsigned Size = Flags.getByValSize();
5312	Align Alignment = Flags.getNonZeroByValAlign();
5313
5314	int FI =
5315	MF.getFrameInfo().CreateStackObject(Size, Alignment, /isSS=/isSpillSlot: false);
5316	SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
5317	SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
5318
5319	Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
5320	/IsVolatile=/isVol: false,
5321	/AlwaysInline=/false, /CI=/nullptr, OverrideTailCall: std::nullopt,
5322	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
5323	ByValArgs.push_back(Elt: FIPtr);
5324	}
5325
5326	if (!IsTailCall)
5327	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: CLI.DL);
5328
5329	// Copy argument values to their designated locations.
5330	SmallVector<std::pair<Register, SDValue>> RegsToPass;
5331	SmallVector<SDValue> MemOpChains;
5332	SDValue StackPtr;
5333	for (unsigned i = `0`, j = `0`, e = ArgLocs.size(); i != e; ++i) {
5334	CCValAssign &VA = ArgLocs [i];
5335	SDValue ArgValue = OutVals [i];
5336	ISD::ArgFlagsTy Flags = Outs [i].Flags;
5337
5338	// Promote the value if needed.
5339	// For now, only handle fully promoted and indirect arguments.
5340	if (VA.getLocInfo() == CCValAssign::Indirect) {
5341	// Store the argument in a stack slot and pass its address.
5342	Align StackAlign =
5343	std::max(a: getPrefTypeAlign(VT: Outs [i].ArgVT, DAG),
5344	b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
5345	TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5346	// If the original argument was split and passed by reference, we need to
5347	// store the required parts of it here (and pass just one address).
5348	unsigned ArgIndex = Outs [i].OrigArgIndex;
5349	unsigned ArgPartOffset = Outs [i].PartOffset;
5350	assert(ArgPartOffset == `0`);
5351	// Calculate the total size to store. We don't have access to what we're
5352	// actually storing other than performing the loop and collecting the
5353	// info.
5354	SmallVector<std::pair<SDValue, SDValue>> Parts;
5355	while (i + `1` != e && Outs [i + `1`].OrigArgIndex == ArgIndex) {
5356	SDValue PartValue = OutVals [i + `1`];
5357	unsigned PartOffset = Outs [i + `1`].PartOffset - ArgPartOffset;
5358	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
5359	EVT PartVT = PartValue.getValueType();
5360
5361	StoredSize += PartVT.getStoreSize();
5362	StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
5363	Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
5364	++i;
5365	}
5366	SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
5367	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
5368	MemOpChains.push_back(
5369	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
5370	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
5371	for (const auto &Part : Parts) {
5372	SDValue PartValue = Part.first;
5373	SDValue PartOffset = Part.second;
5374	SDValue Address =
5375	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
5376	MemOpChains.push_back(
5377	Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
5378	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
5379	}
5380	ArgValue = SpillSlot;
5381	} else {
5382	ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
5383	}
5384
5385	// Use local copy if it is a byval arg.
5386	if (Flags.isByVal())
5387	ArgValue = ByValArgs [j++];
5388
5389	if (VA.isRegLoc()) {
5390	// Queue up the argument copies and emit them at the end.
5391	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
5392	} else {
5393	assert(VA.isMemLoc() && "Argument not register or memory");
5394	assert(!IsTailCall && "Tail call not allowed if stack is used "
5395	"for passing parameters");
5396
5397	// Work out the address of the stack slot.
5398	if (!StackPtr.getNode())
5399	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
5400	SDValue Address =
5401	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
5402	N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
5403
5404	// Emit the store.
5405	MemOpChains.push_back(
5406	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo ()));
5407	}
5408	}
5409
5410	// Join the stores, which are independent of one another.
5411	if (!MemOpChains.empty())
5412	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
5413
5414	SDValue Glue;
5415
5416	// Build a sequence of copy-to-reg nodes, chained and glued together.
5417	for (auto &Reg : RegsToPass) {
5418	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
5419	Glue = Chain.getValue(R: `1`);
5420	}
5421
5422	// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5423	// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5424	// split it and then direct call can be matched by PseudoCALL.
5425	if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
5426	const GlobalValue *GV = S->getGlobal();
5427	unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5428	? LoongArchII::MO_CALL
5429	: LoongArchII::MO_CALL_PLT;
5430	Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: `0`, TargetFlags: OpFlags);
5431	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
5432	unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
5433	? LoongArchII::MO_CALL
5434	: LoongArchII::MO_CALL_PLT;
5435	Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
5436	}
5437
5438	// The first call operand is the chain and the second is the target address.
5439	SmallVector<SDValue> Ops;
5440	Ops.push_back(Elt: Chain);
5441	Ops.push_back(Elt: Callee);
5442
5443	// Add argument registers to the end of the list so that they are
5444	// known live into the call.
5445	for (auto &Reg : RegsToPass)
5446	Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
5447
5448	if (!IsTailCall) {
5449	// Add a register mask operand representing the call-preserved registers.
5450	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5451	const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5452	assert(Mask && "Missing call preserved mask for calling convention");
5453	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5454	}
5455
5456	// Glue the call to the argument copies, if any.
5457	if (Glue.getNode())
5458	Ops.push_back(Elt: Glue);
5459
5460	// Emit the call.
5461	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
5462	unsigned Op;
5463	switch (DAG.getTarget().getCodeModel()) {
5464	default:
5465	report_fatal_error(reason: "Unsupported code model");
5466	case CodeModel::Small:
5467	Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5468	break;
5469	case CodeModel::Medium:
5470	assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5471	Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5472	break;
5473	case CodeModel::Large:
5474	assert(Subtarget.is64Bit() && "Large code model requires LA64");
5475	Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5476	break;
5477	}
5478
5479	if (IsTailCall) {
5480	MF.getFrameInfo().setHasTailCall();
5481	SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
5482	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
5483	return Ret;
5484	}
5485
5486	Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
5487	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
5488	Glue = Chain.getValue(R: `1`);
5489
5490	// Mark the end of the call, which is glued to the call itself.
5491	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue, DL);
5492	Glue = Chain.getValue(R: `1`);
5493
5494	// Assign locations to each value returned by this call.
5495	SmallVector<CCValAssign> RVLocs;
5496	CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5497	analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /IsRet=/true, Fn: CC_LoongArch);
5498
5499	// Copy all of the result registers out of their specified physreg.
5500	for (auto &VA : RVLocs) {
5501	// Copy the value out.
5502	SDValue RetValue =
5503	DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
5504	// Glue the RetValue to the end of the call sequence.
5505	Chain = RetValue.getValue(R: `1`);
5506	Glue = RetValue.getValue(R: `2`);
5507
5508	RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
5509
5510	InVals.push_back(Elt: RetValue);
5511	}
5512
5513	return Chain;
5514	}
5515
5516	bool LoongArchTargetLowering::CanLowerReturn(
5517	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5518	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5519	SmallVector<CCValAssign> RVLocs;
5520	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5521
5522	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
5523	LoongArchABI::ABI ABI =
5524	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5525	if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs [i].VT, LocInfo: CCValAssign::Full,
5526	ArgFlags: Outs [i].Flags, State&: CCInfo, /IsFixed=/true, /IsRet=/true,
5527	OrigTy: nullptr))
5528	return false;
5529	}
5530	return true;
5531	}
5532
5533	SDValue LoongArchTargetLowering::LowerReturn(
5534	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5535	const SmallVectorImpl<ISD::OutputArg> &Outs,
5536	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5537	SelectionDAG &DAG) const {
5538	// Stores the assignment of the return value to a location.
5539	SmallVector<CCValAssign> RVLocs;
5540
5541	// Info about the registers and stack slot.
5542	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5543	*DAG.getContext());
5544
5545	analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /IsRet=/true,
5546	CLI: nullptr, Fn: CC_LoongArch);
5547	if (CallConv == CallingConv::GHC && !RVLocs.empty())
5548	report_fatal_error(reason: "GHC functions return void only");
5549	SDValue Glue;
5550	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
5551
5552	// Copy the result values into the output registers.
5553	for (unsigned i = `0`, e = RVLocs.size(); i < e; ++i) {
5554	CCValAssign &VA = RVLocs [i];
5555	assert(VA.isRegLoc() && "Can only return in registers!");
5556
5557	// Handle a 'normal' return.
5558	SDValue Val = convertValVTToLocVT(DAG, Val: OutVals [i], VA, DL);
5559	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
5560
5561	// Guarantee that all emitted copies are stuck together.
5562	Glue = Chain.getValue(R: `1`);
5563	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
5564	}
5565
5566	RetOps [`0`] = Chain; // Update chain.
5567
5568	// Add the glue node if we have it.
5569	if (Glue.getNode())
5570	RetOps.push_back(Elt: Glue);
5571
5572	return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
5573	}
5574
5575	bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5576	bool ForCodeSize) const {
5577	// TODO: Maybe need more checks here after vector extension is supported.
5578	if (VT == MVT::f32 && !Subtarget.hasBasicF())
5579	return false;
5580	if (VT == MVT::f64 && !Subtarget.hasBasicD())
5581	return false;
5582	return (Imm.isZero() \|\| Imm.isExactlyValue(V: +`1.0`));
5583	}
5584
5585	bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type ) const* {
5586	return true;
5587	}
5588
5589	bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type ) const* {
5590	return true;
5591	}
5592
5593	bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5594	const Instruction I) const* {
5595	if (!Subtarget.is64Bit())
5596	return isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I);
5597
5598	if (isa<LoadInst>(Val: I))
5599	return true;
5600
5601	// On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5602	// require fences beacuse we can use amswap_db.[w/d].
5603	if (isa<StoreInst>(Val: I)) {
5604	unsigned Size = I->getOperand(i: `0`)->getType()->getIntegerBitWidth();
5605	return (Size == `8` \|\| Size == `16`);
5606	}
5607
5608	return false;
5609	}
5610
5611	EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5612	LLVMContext &Context,
5613	EVT VT) const {
5614	if (!VT.isVector())
5615	return getPointerTy(DL);
5616	return VT.changeVectorElementTypeToInteger();
5617	}
5618
5619	bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5620	// TODO: Support vectors.
5621	return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
5622	}
5623
5624	bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5625	const CallInst &I,
5626	MachineFunction &MF,
5627	unsigned Intrinsic) const {
5628	switch (Intrinsic) {
5629	default:
5630	return false;
5631	case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5632	case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5633	case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5634	case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5635	Info.opc = ISD::INTRINSIC_W_CHAIN;
5636	Info.memVT = MVT::i32;
5637	Info.ptrVal = I.getArgOperand(i: `0`);
5638	Info.offset = `0`;
5639	Info.align = Align (`4`);
5640	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
5641	MachineMemOperand::MOVolatile;
5642	return true;
5643	// TODO: Add more Intrinsics later.
5644	}
5645	}
5646
5647	TargetLowering::AtomicExpansionKind
5648	LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
5649	// TODO: Add more AtomicRMWInst that needs to be extended.
5650
5651	// Since floating-point operation requires a non-trivial set of data
5652	// operations, use CmpXChg to expand.
5653	if (AI->isFloatingPointOperation() \|\|
5654	AI->getOperation() == AtomicRMWInst::UIncWrap \|\|
5655	AI->getOperation() == AtomicRMWInst::UDecWrap)
5656	return AtomicExpansionKind::CmpXChg;
5657
5658	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5659	if (Size == `8` \|\| Size == `16`)
5660	return AtomicExpansionKind::MaskedIntrinsic;
5661	return AtomicExpansionKind::None;
5662	}
5663
5664	static Intrinsic::ID
5665	getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5666	AtomicRMWInst::BinOp BinOp) {
5667	if (GRLen == `64`) {
5668	switch (BinOp) {
5669	default:
5670	llvm_unreachable("Unexpected AtomicRMW BinOp");
5671	case AtomicRMWInst::Xchg:
5672	return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5673	case AtomicRMWInst::Add:
5674	return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5675	case AtomicRMWInst::Sub:
5676	return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5677	case AtomicRMWInst::Nand:
5678	return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5679	case AtomicRMWInst::UMax:
5680	return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5681	case AtomicRMWInst::UMin:
5682	return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5683	case AtomicRMWInst::Max:
5684	return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5685	case AtomicRMWInst::Min:
5686	return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5687	// TODO: support other AtomicRMWInst.
5688	}
5689	}
5690
5691	if (GRLen == `32`) {
5692	switch (BinOp) {
5693	default:
5694	llvm_unreachable("Unexpected AtomicRMW BinOp");
5695	case AtomicRMWInst::Xchg:
5696	return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5697	case AtomicRMWInst::Add:
5698	return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5699	case AtomicRMWInst::Sub:
5700	return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5701	case AtomicRMWInst::Nand:
5702	return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5703	// TODO: support other AtomicRMWInst.
5704	}
5705	}
5706
5707	llvm_unreachable("Unexpected GRLen\n");
5708	}
5709
5710	TargetLowering::AtomicExpansionKind
5711	LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5712	AtomicCmpXchgInst CI) const* {
5713	unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5714	if (Size == `8` \|\| Size == `16`)
5715	return AtomicExpansionKind::MaskedIntrinsic;
5716	return AtomicExpansionKind::None;
5717	}
5718
5719	Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5720	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
5721	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
5722	AtomicOrdering FailOrd = CI->getFailureOrdering();
5723	Value *FailureOrdering =
5724	Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
5725
5726	// TODO: Support cmpxchg on LA32.
5727	Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5728	CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
5729	NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
5730	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
5731	Type *Tys[] = {AlignedAddr->getType()};
5732	Function *MaskedCmpXchg =
5733	Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys);
5734	Value *Result = Builder.CreateCall(
5735	Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5736	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5737	return Result;
5738	}
5739
5740	Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5741	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
5742	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
5743	// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5744	// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5745	// mask, as this produces better code than the LL/SC loop emitted by
5746	// int_loongarch_masked_atomicrmw_xchg.
5747	if (AI->getOperation() == AtomicRMWInst::Xchg &&
5748	isa<ConstantInt>(Val: AI->getValOperand())) {
5749	ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
5750	if (CVal->isZero())
5751	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
5752	Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
5753	Align: AI->getAlign(), Ordering: Ord);
5754	if (CVal->isMinusOne())
5755	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
5756	Align: AI->getAlign(), Ordering: Ord);
5757	}
5758
5759	unsigned GRLen = Subtarget.getGRLen();
5760	Value *Ordering =
5761	Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
5762	Type *Tys[] = {AlignedAddr->getType()};
5763	Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5764	M: AI->getModule(),
5765	id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
5766
5767	if (GRLen == `64`) {
5768	Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
5769	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
5770	ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
5771	}
5772
5773	Value *Result;
5774
5775	// Must pass the shift amount needed to sign extend the loaded value prior
5776	// to performing a signed comparison for min/max. ShiftAmt is the number of
5777	// bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5778	// is the number of bits to left+right shift the value in order to
5779	// sign-extend.
5780	if (AI->getOperation() == AtomicRMWInst::Min \|\|
5781	AI->getOperation() == AtomicRMWInst::Max) {
5782	const DataLayout &DL = AI->getDataLayout();
5783	unsigned ValWidth =
5784	DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
5785	Value *SextShamt =
5786	Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
5787	Result = Builder.CreateCall(Callee: LlwOpScwLoop,
5788	Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5789	} else {
5790	Result =
5791	Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
5792	}
5793
5794	if (GRLen == `64`)
5795	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5796	return Result;
5797	}
5798
5799	bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5800	const MachineFunction &MF, EVT VT) const {
5801	VT = VT.getScalarType();
5802
5803	if (!VT.isSimple())
5804	return false;
5805
5806	switch (VT.getSimpleVT().SimpleTy) {
5807	case MVT::f32:
5808	case MVT::f64:
5809	return true;
5810	default:
5811	break;
5812	}
5813
5814	return false;
5815	}
5816
5817	Register LoongArchTargetLowering::getExceptionPointerRegister(
5818	const Constant PersonalityFn) const* {
5819	return LoongArch::R4;
5820	}
5821
5822	Register LoongArchTargetLowering::getExceptionSelectorRegister(
5823	const Constant PersonalityFn) const* {
5824	return LoongArch::R5;
5825	}
5826
5827	//===----------------------------------------------------------------------===//
5828	// LoongArch Inline Assembly Support
5829	//===----------------------------------------------------------------------===//
5830
5831	LoongArchTargetLowering::ConstraintType
5832	LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5833	// LoongArch specific constraints in GCC: config/loongarch/constraints.md
5834	//
5835	// 'f': A floating-point register (if available).
5836	// 'k': A memory operand whose address is formed by a base register and
5837	// (optionally scaled) index register.
5838	// 'l': A signed 16-bit constant.
5839	// 'm': A memory operand whose address is formed by a base register and
5840	// offset that is suitable for use in instructions with the same
5841	// addressing mode as st.w and ld.w.
5842	// 'I': A signed 12-bit constant (for arithmetic instructions).
5843	// 'J': Integer zero.
5844	// 'K': An unsigned 12-bit constant (for logic instructions).
5845	// "ZB": An address that is held in a general-purpose register. The offset is
5846	// zero.
5847	// "ZC": A memory operand whose address is formed by a base register and
5848	// offset that is suitable for use in instructions with the same
5849	// addressing mode as ll.w and sc.w.
5850	if (Constraint.size() == `1`) {
5851	switch (Constraint [`0`]) {
5852	default:
5853	break;
5854	case `'f'`:
5855	return C_RegisterClass;
5856	case `'l'`:
5857	case `'I'`:
5858	case `'J'`:
5859	case `'K'`:
5860	return C_Immediate;
5861	case `'k'`:
5862	return C_Memory;
5863	}
5864	}
5865
5866	if (Constraint == "ZC" \|\| Constraint == "ZB")
5867	return C_Memory;
5868
5869	// 'm' is handled here.
5870	return TargetLowering::getConstraintType(Constraint);
5871	}
5872
5873	InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5874	StringRef ConstraintCode) const {
5875	return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5876	.Case(S: "k", Value: InlineAsm::ConstraintCode::k)
5877	.Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
5878	.Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
5879	.Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
5880	}
5881
5882	std::pair<unsigned, const TargetRegisterClass *>
5883	LoongArchTargetLowering::getRegForInlineAsmConstraint(
5884	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
5885	// First, see if this is a constraint that directly corresponds to a LoongArch
5886	// register class.
5887	if (Constraint.size() == `1`) {
5888	switch (Constraint [`0`]) {
5889	case `'r'`:
5890	// TODO: Support fixed vectors up to GRLen?
5891	if (VT.isVector())
5892	break;
5893	return std::make_pair(x: `0U`, y: &LoongArch::GPRRegClass);
5894	case `'f'`:
5895	if (Subtarget.hasBasicF() && VT == MVT::f32)
5896	return std::make_pair(x: `0U`, y: &LoongArch::FPR32RegClass);
5897	if (Subtarget.hasBasicD() && VT == MVT::f64)
5898	return std::make_pair(x: `0U`, y: &LoongArch::FPR64RegClass);
5899	if (Subtarget.hasExtLSX() &&
5900	TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
5901	return std::make_pair(x: `0U`, y: &LoongArch::LSX128RegClass);
5902	if (Subtarget.hasExtLASX() &&
5903	TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
5904	return std::make_pair(x: `0U`, y: &LoongArch::LASX256RegClass);
5905	break;
5906	default:
5907	break;
5908	}
5909	}
5910
5911	// TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5912	// record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5913	// constraints while the official register name is prefixed with a '$'. So we
5914	// clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5915	// before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5916	// case insensitive, so no need to convert the constraint to upper case here.
5917	//
5918	// For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5919	// decode the usage of register name aliases into their official names. And
5920	// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5921	// official register names.
5922	if (Constraint.starts_with(Prefix: "{$r") \|\| Constraint.starts_with(Prefix: "{$f") \|\|
5923	Constraint.starts_with(Prefix: "{$vr") \|\| Constraint.starts_with(Prefix: "{$xr")) {
5924	bool IsFP = Constraint [`2`] == `'f'`;
5925	std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: `'$'`);
5926	std::pair<unsigned, const TargetRegisterClass *> R;
5927	R = TargetLowering::getRegForInlineAsmConstraint(
5928	TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
5929	// Match those names to the widest floating point register type available.
5930	if (IsFP) {
5931	unsigned RegNo = R.first;
5932	if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5933	if (Subtarget.hasBasicD() && (VT == MVT::f64 \|\| VT == MVT::Other)) {
5934	unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5935	return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
5936	}
5937	}
5938	}
5939	return R;
5940	}
5941
5942	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5943	}
5944
5945	void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5946	SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5947	SelectionDAG &DAG) const {
5948	// Currently only support length 1 constraints.
5949	if (Constraint.size() == `1`) {
5950	switch (Constraint [`0`]) {
5951	case `'l'`:
5952	// Validate & create a 16-bit signed immediate operand.
5953	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
5954	uint64_t CVal = C->getSExtValue();
5955	if (isInt<`16`>(x: CVal))
5956	Ops.push_back(
5957	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
5958	}
5959	return;
5960	case `'I'`:
5961	// Validate & create a 12-bit signed immediate operand.
5962	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
5963	uint64_t CVal = C->getSExtValue();
5964	if (isInt<`12`>(x: CVal))
5965	Ops.push_back(
5966	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
5967	}
5968	return;
5969	case `'J'`:
5970	// Validate & create an integer zero operand.
5971	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
5972	if (C->getZExtValue() == `0`)
5973	Ops.push_back(
5974	x: DAG.getTargetConstant(Val: `0`, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
5975	return;
5976	case `'K'`:
5977	// Validate & create a 12-bit unsigned immediate operand.
5978	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
5979	uint64_t CVal = C->getZExtValue();
5980	if (isUInt<`12`>(x: CVal))
5981	Ops.push_back(
5982	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
5983	}
5984	return;
5985	default:
5986	break;
5987	}
5988	}
5989	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5990	}
5991
5992	#define GET_REGISTER_MATCHER
5993	#include "LoongArchGenAsmMatcher.inc"
5994
5995	Register
5996	LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5997	const MachineFunction &MF) const {
5998	std::pair<StringRef, StringRef> Name = StringRef (RegName).split(Separator: `'$'`);
5999	std::string NewRegName = Name.second.str();
6000	Register Reg = MatchRegisterAltName(Name: NewRegName);
6001	if (Reg == LoongArch::NoRegister)
6002	Reg = MatchRegisterName(Name: NewRegName);
6003	if (Reg == LoongArch::NoRegister)
6004	report_fatal_error(
6005	reason: Twine("Invalid register name \"" + StringRef (RegName) + "\"."));
6006	BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6007	if (!ReservedRegs.test(Idx: Reg))
6008	report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
6009	StringRef (RegName) + "\"."));
6010	return Reg;
6011	}
6012
6013	bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6014	EVT VT, SDValue C) const {
6015	// TODO: Support vectors.
6016	if (!VT.isScalarInteger())
6017	return false;
6018
6019	// Omit the optimization if the data size exceeds GRLen.
6020	if (VT.getSizeInBits() > Subtarget.getGRLen())
6021	return false;
6022
6023	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
6024	const APInt &Imm = ConstNode->getAPIntValue();
6025	// Break MUL into (SLLI + ADD/SUB) or ALSL.
6026	if ((Imm + `1`).isPowerOf2() \|\| (Imm - `1`).isPowerOf2() \|\|
6027	(`1` - Imm).isPowerOf2() \|\| (-`1` - Imm).isPowerOf2())
6028	return true;
6029	// Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6030	if (ConstNode->hasOneUse() &&
6031	((Imm - `2`).isPowerOf2() \|\| (Imm - `4`).isPowerOf2() \|\|
6032	(Imm - `8`).isPowerOf2() \|\| (Imm - `16`).isPowerOf2()))
6033	return true;
6034	// Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6035	// in which the immediate has two set bits. Or Break (MUL x, imm)
6036	// into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6037	// equals to (1 << s0) - (1 << s1).
6038	if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -`2048`) && Imm.sle(RHS: `4095`))) {
6039	unsigned Shifts = Imm.countr_zero();
6040	// Reject immediates which can be composed via a single LUI.
6041	if (Shifts >= `12`)
6042	return false;
6043	// Reject multiplications can be optimized to
6044	// (SLLI (ALSL x, x, 1/2/3/4), s).
6045	APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
6046	if (ImmPop == `3` \|\| ImmPop == `5` \|\| ImmPop == `9` \|\| ImmPop == `17`)
6047	return false;
6048	// We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6049	// since it needs one more instruction than other 3 cases.
6050	APInt ImmSmall = APInt (Imm.getBitWidth(), `1ULL` << Shifts, true);
6051	if ((Imm - ImmSmall).isPowerOf2() \|\| (Imm + ImmSmall).isPowerOf2() \|\|
6052	(ImmSmall - Imm).isPowerOf2())
6053	return true;
6054	}
6055	}
6056
6057	return false;
6058	}
6059
6060	bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6061	const AddrMode &AM,
6062	Type Ty, unsigned* AS,
6063	Instruction I) const* {
6064	// LoongArch has four basic addressing modes:
6065	// 1. reg
6066	// 2. reg + 12-bit signed offset
6067	// 3. reg + 14-bit signed offset left-shifted by 2
6068	// 4. reg1 + reg2
6069	// TODO: Add more checks after support vector extension.
6070
6071	// No global is ever allowed as a base.
6072	if (AM.BaseGV)
6073	return false;
6074
6075	// Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6076	// with `UAL` feature.
6077	if (!isInt<`12`>(x: AM.BaseOffs) &&
6078	!(isShiftedInt<`14`, `2`>(x: AM.BaseOffs) && Subtarget.hasUAL()))
6079	return false;
6080
6081	switch (AM.Scale) {
6082	case `0`:
6083	// "r+i" or just "i", depending on HasBaseReg.
6084	break;
6085	case `1`:
6086	// "r+r+i" is not allowed.
6087	if (AM.HasBaseReg && AM.BaseOffs)
6088	return false;
6089	// Otherwise we have "r+r" or "r+i".
6090	break;
6091	case `2`:
6092	// "2r+r" or "2r+i" is not allowed.
6093	if (AM.HasBaseReg \|\| AM.BaseOffs)
6094	return false;
6095	// Allow "2r" as "r+r".*
6096	break;
6097	default:
6098	return false;
6099	}
6100
6101	return true;
6102	}
6103
6104	bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6105	return isInt<`12`>(x: Imm);
6106	}
6107
6108	bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6109	return isInt<`12`>(x: Imm);
6110	}
6111
6112	bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6113	// Zexts are free if they can be combined with a load.
6114	// Don't advertise i32->i64 zextload as being free for LA64. It interacts
6115	// poorly with type legalization of compares preferring sext.
6116	if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6117	EVT MemVT = LD->getMemoryVT();
6118	if ((MemVT == MVT::i8 \|\| MemVT == MVT::i16) &&
6119	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
6120	LD->getExtensionType() == ISD::ZEXTLOAD))
6121	return true;
6122	}
6123
6124	return TargetLowering::isZExtFree(Val, VT2);
6125	}
6126
6127	bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6128	EVT DstVT) const {
6129	return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6130	}
6131
6132	bool LoongArchTargetLowering::signExtendConstant(const ConstantInt CI) const* {
6133	return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: `32`);
6134	}
6135
6136	bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6137	// TODO: Support vectors.
6138	if (Y.getValueType().isVector())
6139	return false;
6140
6141	return !isa<ConstantSDNode>(Val: Y);
6142	}
6143
6144	ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6145	// TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6146	return ISD::SIGN_EXTEND;
6147	}
6148
6149	bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6150	EVT Type, bool IsSigned) const {
6151	if (Subtarget.is64Bit() && Type == MVT::i32)
6152	return true;
6153
6154	return IsSigned;
6155	}
6156
6157	bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6158	// Return false to suppress the unnecessary extensions if the LibCall
6159	// arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6160	if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6161	Type.getSizeInBits() < Subtarget.getGRLen()))
6162	return false;
6163	return true;
6164	}
6165

Browse the source code of llvm_projects/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp