LoongArchISelLowering.cpp source code [llvm_projects/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp]

1	//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that LoongArch uses to lower LLVM code into
10	// a selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "LoongArchISelLowering.h"
15	#include "LoongArch.h"
16	#include "LoongArchMachineFunctionInfo.h"
17	#include "LoongArchRegisterInfo.h"
18	#include "LoongArchSelectionDAGInfo.h"
19	#include "LoongArchSubtarget.h"
20	#include "MCTargetDesc/LoongArchBaseInfo.h"
21	#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22	#include "MCTargetDesc/LoongArchMatInt.h"
23	#include "llvm/ADT/SmallSet.h"
24	#include "llvm/ADT/Statistic.h"
25	#include "llvm/ADT/StringExtras.h"
26	#include "llvm/CodeGen/ISDOpcodes.h"
27	#include "llvm/CodeGen/MachineInstrBuilder.h"
28	#include "llvm/CodeGen/RuntimeLibcallUtil.h"
29	#include "llvm/CodeGen/SelectionDAGNodes.h"
30	#include "llvm/IR/IRBuilder.h"
31	#include "llvm/IR/IntrinsicInst.h"
32	#include "llvm/IR/IntrinsicsLoongArch.h"
33	#include "llvm/Support/CodeGen.h"
34	#include "llvm/Support/Debug.h"
35	#include "llvm/Support/ErrorHandling.h"
36	#include "llvm/Support/KnownBits.h"
37	#include "llvm/Support/MathExtras.h"
38	#include <llvm/Analysis/VectorUtils.h>
39
40	using namespace llvm;
41
42	#define DEBUG_TYPE "loongarch-isel-lowering"
43
44	STATISTIC(NumTailCalls, "Number of tail calls");
45
46	enum MaterializeFPImm {
47	NoMaterializeFPImm = `0`,
48	MaterializeFPImm2Ins = `2`,
49	MaterializeFPImm3Ins = `3`,
50	MaterializeFPImm4Ins = `4`,
51	MaterializeFPImm5Ins = `5`,
52	MaterializeFPImm6Ins = `6`
53	};
54
55	static cl::opt<MaterializeFPImm> MaterializeFPImmInsNum(
56	"loongarch-materialize-float-imm", cl::Hidden,
57	cl::desc ("Maximum number of instructions used (including code sequence "
58	"to generate the value and moving the value to FPR) when "
59	"materializing floating-point immediates (default = 3)"),
60	cl::init(Val: MaterializeFPImm3Ins),
61	cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62	clEnumValN(MaterializeFPImm2Ins, "2",
63	"Materialize FP immediate within 2 instructions"),
64	clEnumValN(MaterializeFPImm3Ins, "3",
65	"Materialize FP immediate within 3 instructions"),
66	clEnumValN(MaterializeFPImm4Ins, "4",
67	"Materialize FP immediate within 4 instructions"),
68	clEnumValN(MaterializeFPImm5Ins, "5",
69	"Materialize FP immediate within 5 instructions"),
70	clEnumValN(MaterializeFPImm6Ins, "6",
71	"Materialize FP immediate within 6 instructions "
72	"(behaves same as 5 on loongarch64)")));
73
74	static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75	cl::desc ("Trap on integer division by zero."),
76	cl::init(Val: false));
77
78	LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
79	const LoongArchSubtarget &STI)
80	: TargetLowering (TM, STI), Subtarget(STI) {
81
82	MVT GRLenVT = Subtarget.getGRLenVT();
83
84	// Set up the register classes.
85
86	addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
87	if (Subtarget.hasBasicF())
88	addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
89	if (Subtarget.hasBasicD())
90	addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
91
92	static const MVT::SimpleValueType LSXVTs[] = {
93	MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94	static const MVT::SimpleValueType LASXVTs[] = {
95	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97	if (Subtarget.hasExtLSX())
98	for (MVT VT : LSXVTs)
99	addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
100
101	if (Subtarget.hasExtLASX())
102	for (MVT VT : LASXVTs)
103	addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
104
105	// Set operations for LA32 and LA64.
106
107	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
108	MemVT: MVT::i1, Action: Promote);
109
110	setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
111	setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
112	setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
113	setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
114	setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
115	setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
116
117	setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
118	ISD::JumpTable, ISD::GlobalTLSAddress},
119	VT: GRLenVT, Action: Custom);
120
121	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
122
123	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
124	setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
125	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
126	setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
127
128	setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
129	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
130
131	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
132	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
133	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
134
135	setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
136
137	// BITREV/REVB requires the 32S feature.
138	if (STI.has32S()) {
139	// Expand bitreverse.i16 with native-width bitrev and shift for now, before
140	// we get to know which of sll and revb.2h is faster.
141	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
142	setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
143
144	// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145	// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146	// and i32 could still be byte-swapped relatively cheaply.
147	setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
148	} else {
149	setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand);
150	setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand);
151	setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand);
152	setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand);
153	setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom);
154	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
155	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
156	}
157
158	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
159	setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
160	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom);
161	setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
162	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
163	setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
164
165	setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
166	setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
167
168	// Set operations for LA64 only.
169
170	if (Subtarget.is64Bit()) {
171	setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
172	setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
173	setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
174	setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
175	setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
176	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
177	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
178	setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
179	setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
180	setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
181	setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
182	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
183	setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
184	setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
185	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
186	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
187	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
188
189	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
190	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
191	setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32,
192	Action: Custom);
193	setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
194	}
195
196	// Set operations for LA32 only.
197
198	if (!Subtarget.is64Bit()) {
199	setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
200	setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
201	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
202	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
203	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
204	if (Subtarget.hasBasicD())
205	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
206	}
207
208	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
209
210	static const ISD::CondCode FPCCToExpand[] = {
211	ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
212	ISD::SETGE, ISD::SETNE, ISD::SETGT};
213
214	// Set operations for 'F' feature.
215
216	if (Subtarget.hasBasicF()) {
217	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
218	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
219	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
220	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
221	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
222
223	setOperationAction(Op: ISD::ConstantFP, VT: MVT::f32, Action: Custom);
224	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
225	setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
226	setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
227	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
228	setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
229	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
230	setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
231	setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
232	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
233	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
234	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
235	setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
236	setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
237	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
238	setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
239	setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
240	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32,
241	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
242	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32,
243	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
244	setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
245	setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
246	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248	if (Subtarget.is64Bit())
249	setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
250
251	if (!Subtarget.hasBasicD()) {
252	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
253	if (Subtarget.is64Bit()) {
254	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
255	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
256	}
257	}
258	}
259
260	// Set operations for 'D' feature.
261
262	if (Subtarget.hasBasicD()) {
263	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
264	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
265	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
266	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
267	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
268	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
269	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
270
271	setOperationAction(Op: ISD::ConstantFP, VT: MVT::f64, Action: Custom);
272	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
273	setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
274	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
275	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
276	setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
277	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
278	setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
279	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
280	setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
281	setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
282	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
283	setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
284	setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
285	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
286	setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
287	setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
288	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
289	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64,
290	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
291	setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
292	setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
293	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295	if (Subtarget.is64Bit())
296	setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
297	}
298
299	// Set operations for 'LSX' feature.
300
301	if (Subtarget.hasExtLSX()) {
302	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
303	// Expand all truncating stores and extending loads.
304	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305	setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
306	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
307	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
308	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
309	}
310	// By default everything must be expanded. Then we will selectively turn
311	// on ones that can be effectively codegen'd.
312	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
313	setOperationAction(Op, VT, Action: Expand);
314	}
315
316	for (MVT VT : LSXVTs) {
317	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
318	setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
319	setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
320
321	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
322	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
323	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
324
325	setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
326	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
327	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
328	setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT, Action: Legal);
329	}
330	for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
331	setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
332	setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
333	Action: Legal);
334	setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
335	VT, Action: Legal);
336	setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
337	setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
338	setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
339	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
340	setCondCodeAction(
341	CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
342	Action: Expand);
343	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
344	setOperationAction(Op: ISD::ABS, VT, Action: Legal);
345	setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
346	setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
347	setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
348	setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
349	setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
350	setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
351	setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
352	setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
353	}
354	for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
355	setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
356	for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
357	setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
358	for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
359	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
360	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
361	}
362	for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
363	setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
364	setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
365	setOperationAction(Op: ISD::FMA, VT, Action: Legal);
366	setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
367	setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
368	setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
369	ISD::SETUGE, ISD::SETUGT},
370	VT, Action: Expand);
371	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
372	setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
373	setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
374	setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
375	setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
376	setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
377	setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
378	}
379	setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal);
380	setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal);
381	setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal);
382	setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal);
383	setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal);
384
385	for (MVT VT :
386	{MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387	MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
388	setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom);
389	setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
390	setOperationAction(Op: ISD::VECREDUCE_AND, VT, Action: Custom);
391	setOperationAction(Op: ISD::VECREDUCE_OR, VT, Action: Custom);
392	setOperationAction(Op: ISD::VECREDUCE_XOR, VT, Action: Custom);
393	setOperationAction(Op: ISD::VECREDUCE_SMAX, VT, Action: Custom);
394	setOperationAction(Op: ISD::VECREDUCE_SMIN, VT, Action: Custom);
395	setOperationAction(Op: ISD::VECREDUCE_UMAX, VT, Action: Custom);
396	setOperationAction(Op: ISD::VECREDUCE_UMIN, VT, Action: Custom);
397	}
398	}
399
400	// Set operations for 'LASX' feature.
401
402	if (Subtarget.hasExtLASX()) {
403	for (MVT VT : LASXVTs) {
404	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
405	setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
406	setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
407
408	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
409	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
410	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
411	setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Custom);
412	setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT, Action: Legal);
413
414	setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
415	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
416	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
417	}
418	for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
419	setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
420	setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
421	Action: Legal);
422	setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
423	VT, Action: Legal);
424	setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
425	setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
426	setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
427	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
428	setCondCodeAction(
429	CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
430	Action: Expand);
431	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
432	setOperationAction(Op: ISD::ABS, VT, Action: Legal);
433	setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
434	setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
435	setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
436	setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
437	setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
438	setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
439	setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
440	setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
441	setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
442	}
443	for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
444	setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
445	for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
446	setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
447	for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
448	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
449	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
450	}
451	for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
452	setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
453	setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
454	setOperationAction(Op: ISD::FMA, VT, Action: Legal);
455	setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
456	setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
457	setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
458	ISD::SETUGE, ISD::SETUGT},
459	VT, Action: Expand);
460	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
461	setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
462	setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
463	setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
464	setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
465	setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
466	setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
467	}
468	}
469
470	// Set DAG combine for LA32 and LA64.
471	if (Subtarget.hasBasicF()) {
472	setTargetDAGCombine(ISD::SINT_TO_FP);
473	}
474
475	setTargetDAGCombine(ISD::AND);
476	setTargetDAGCombine(ISD::OR);
477	setTargetDAGCombine(ISD::SRL);
478	setTargetDAGCombine(ISD::SETCC);
479
480	// Set DAG combine for 'LSX' feature.
481
482	if (Subtarget.hasExtLSX()) {
483	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
484	setTargetDAGCombine(ISD::BITCAST);
485	}
486
487	// Set DAG combine for 'LASX' feature.
488	if (Subtarget.hasExtLASX()) {
489	setTargetDAGCombine(ISD::ANY_EXTEND);
490	setTargetDAGCombine(ISD::ZERO_EXTEND);
491	setTargetDAGCombine(ISD::SIGN_EXTEND);
492	}
493
494	// Compute derived properties from the register classes.
495	computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
496
497	setStackPointerRegisterToSaveRestore(LoongArch::R3);
498
499	setBooleanContents(ZeroOrOneBooleanContent);
500	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
501
502	setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
503
504	setMinCmpXchgSizeInBits(`32`);
505
506	// Function alignments.
507	setMinFunctionAlignment(Align (`4`));
508	// Set preferred alignments.
509	setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
510	setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
511	setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
512
513	// cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
514	if (Subtarget.hasLAMCAS())
515	setMinCmpXchgSizeInBits(`8`);
516
517	if (Subtarget.hasSCQ()) {
518	setMaxAtomicSizeInBitsSupported(`128`);
519	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom);
520	}
521
522	// Disable strict node mutation.
523	IsStrictFPEnabled = true;
524	}
525
526	bool LoongArchTargetLowering::isOffsetFoldingLegal(
527	const GlobalAddressSDNode GA) const* {
528	// In order to maximise the opportunity for common subexpression elimination,
529	// keep a separate ADD node for the global address offset instead of folding
530	// it in the global address node. Later peephole optimisations may choose to
531	// fold it back in when profitable.
532	return false;
533	}
534
535	SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
536	SelectionDAG &DAG) const {
537	switch (Op.getOpcode()) {
538	case ISD::ATOMIC_FENCE:
539	return lowerATOMIC_FENCE(Op, DAG);
540	case ISD::EH_DWARF_CFA:
541	return lowerEH_DWARF_CFA(Op, DAG);
542	case ISD::GlobalAddress:
543	return lowerGlobalAddress(Op, DAG);
544	case ISD::GlobalTLSAddress:
545	return lowerGlobalTLSAddress(Op, DAG);
546	case ISD::INTRINSIC_WO_CHAIN:
547	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
548	case ISD::INTRINSIC_W_CHAIN:
549	return lowerINTRINSIC_W_CHAIN(Op, DAG);
550	case ISD::INTRINSIC_VOID:
551	return lowerINTRINSIC_VOID(Op, DAG);
552	case ISD::BlockAddress:
553	return lowerBlockAddress(Op, DAG);
554	case ISD::JumpTable:
555	return lowerJumpTable(Op, DAG);
556	case ISD::SHL_PARTS:
557	return lowerShiftLeftParts(Op, DAG);
558	case ISD::SRA_PARTS:
559	return lowerShiftRightParts(Op, DAG, IsSRA: true);
560	case ISD::SRL_PARTS:
561	return lowerShiftRightParts(Op, DAG, IsSRA: false);
562	case ISD::ConstantPool:
563	return lowerConstantPool(Op, DAG);
564	case ISD::FP_TO_SINT:
565	return lowerFP_TO_SINT(Op, DAG);
566	case ISD::BITCAST:
567	return lowerBITCAST(Op, DAG);
568	case ISD::UINT_TO_FP:
569	return lowerUINT_TO_FP(Op, DAG);
570	case ISD::SINT_TO_FP:
571	return lowerSINT_TO_FP(Op, DAG);
572	case ISD::VASTART:
573	return lowerVASTART(Op, DAG);
574	case ISD::FRAMEADDR:
575	return lowerFRAMEADDR(Op, DAG);
576	case ISD::RETURNADDR:
577	return lowerRETURNADDR(Op, DAG);
578	case ISD::WRITE_REGISTER:
579	return lowerWRITE_REGISTER(Op, DAG);
580	case ISD::INSERT_VECTOR_ELT:
581	return lowerINSERT_VECTOR_ELT(Op, DAG);
582	case ISD::EXTRACT_VECTOR_ELT:
583	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
584	case ISD::BUILD_VECTOR:
585	return lowerBUILD_VECTOR(Op, DAG);
586	case ISD::CONCAT_VECTORS:
587	return lowerCONCAT_VECTORS(Op, DAG);
588	case ISD::VECTOR_SHUFFLE:
589	return lowerVECTOR_SHUFFLE(Op, DAG);
590	case ISD::BITREVERSE:
591	return lowerBITREVERSE(Op, DAG);
592	case ISD::SCALAR_TO_VECTOR:
593	return lowerSCALAR_TO_VECTOR(Op, DAG);
594	case ISD::PREFETCH:
595	return lowerPREFETCH(Op, DAG);
596	case ISD::SELECT:
597	return lowerSELECT(Op, DAG);
598	case ISD::BRCOND:
599	return lowerBRCOND(Op, DAG);
600	case ISD::FP_TO_FP16:
601	return lowerFP_TO_FP16(Op, DAG);
602	case ISD::FP16_TO_FP:
603	return lowerFP16_TO_FP(Op, DAG);
604	case ISD::FP_TO_BF16:
605	return lowerFP_TO_BF16(Op, DAG);
606	case ISD::BF16_TO_FP:
607	return lowerBF16_TO_FP(Op, DAG);
608	case ISD::VECREDUCE_ADD:
609	return lowerVECREDUCE_ADD(Op, DAG);
610	case ISD::ROTL:
611	case ISD::ROTR:
612	return lowerRotate(Op, DAG);
613	case ISD::VECREDUCE_AND:
614	case ISD::VECREDUCE_OR:
615	case ISD::VECREDUCE_XOR:
616	case ISD::VECREDUCE_SMAX:
617	case ISD::VECREDUCE_SMIN:
618	case ISD::VECREDUCE_UMAX:
619	case ISD::VECREDUCE_UMIN:
620	return lowerVECREDUCE(Op, DAG);
621	case ISD::ConstantFP:
622	return lowerConstantFP(Op, DAG);
623	case ISD::SETCC:
624	return lowerSETCC(Op, DAG);
625	}
626	return SDValue ();
627	}
628
629	// Helper to attempt to return a cheaper, bit-inverted version of \p V.
630	static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
631	// TODO: don't always ignore oneuse constraints.
632	V = peekThroughBitcasts(V);
633	EVT VT = V.getValueType();
634
635	// Match not(xor X, -1) -> X.
636	if (V.getOpcode() == ISD::XOR &&
637	(ISD::isBuildVectorAllOnes(N: V.getOperand(i: `1`).getNode()) \|\|
638	isAllOnesConstant(V: V.getOperand(i: `1`))))
639	return V.getOperand(i: `0`);
640
641	// Match not(extract_subvector(not(X)) -> extract_subvector(X).
642	if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
643	(isNullConstant(V: V.getOperand(i: `1`)) \|\| V.getOperand(i: `0`).hasOneUse())) {
644	if (SDValue Not = isNOT(V: V.getOperand(i: `0`), DAG)) {
645	Not = DAG.getBitcast(VT: V.getOperand(i: `0`).getValueType(), V: Not);
646	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc (Not), VT, N1: Not,
647	N2: V.getOperand(i: `1`));
648	}
649	}
650
651	// Match not(SplatVector(not(X)) -> SplatVector(X).
652	if (V.getOpcode() == ISD::BUILD_VECTOR) {
653	if (SDValue SplatValue =
654	cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
655	if (!V ->isOnlyUserOf(N: SplatValue.getNode()))
656	return SDValue ();
657
658	if (SDValue Not = isNOT(V: SplatValue, DAG)) {
659	Not = DAG.getBitcast(VT: V.getOperand(i: `0`).getValueType(), V: Not);
660	return DAG.getSplat(VT, DL: SDLoc (Not), Op: Not);
661	}
662	}
663	}
664
665	// Match not(or(not(X),not(Y))) -> and(X, Y).
666	if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
667	V.getOperand(i: `0`).hasOneUse() && V.getOperand(i: `1`).hasOneUse()) {
668	// TODO: Handle cases with single NOT operand -> VANDN
669	if (SDValue Op1 = isNOT(V: V.getOperand(i: `1`), DAG))
670	if (SDValue Op0 = isNOT(V: V.getOperand(i: `0`), DAG))
671	return DAG.getNode(Opcode: ISD::AND, DL: SDLoc (V), VT, N1: DAG.getBitcast(VT, V: Op0),
672	N2: DAG.getBitcast(VT, V: Op1));
673	}
674
675	// TODO: Add more matching patterns. Such as,
676	// not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
677	// not(slt(C, X)) -> slt(X - 1, C)
678
679	return SDValue ();
680	}
681
682	SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
683	SelectionDAG &DAG) const {
684	EVT VT = Op.getValueType();
685	ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Val&: Op);
686	const APFloat &FPVal = CFP->getValueAPF();
687	SDLoc DL(CFP);
688
689	assert((VT == MVT::f32 && Subtarget.hasBasicF()) \|\|
690	(VT == MVT::f64 && Subtarget.hasBasicD()));
691
692	// If value is 0.0 or -0.0, just ignore it.
693	if (FPVal.isZero())
694	return SDValue ();
695
696	// If lsx enabled, use cheaper 'vldi' instruction if possible.
697	if (isFPImmVLDILegal(Imm: FPVal, VT))
698	return SDValue ();
699
700	// Construct as integer, and move to float register.
701	APInt INTVal = FPVal.bitcastToAPInt();
702
703	// If more than MaterializeFPImmInsNum instructions will be used to
704	// generate the INTVal and move it to float register, fallback to
705	// use floating point load from the constant pool.
706	auto Seq = LoongArchMatInt::generateInstSeq(Val: INTVal.getSExtValue());
707	int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? `2` : `1`);
708	if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(V: +`1.0`))
709	return SDValue ();
710
711	switch (VT.getSimpleVT().SimpleTy) {
712	default:
713	llvm_unreachable("Unexpected floating point type!");
714	break;
715	case MVT::f32: {
716	SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i32);
717	if (Subtarget.is64Bit())
718	NewVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: NewVal);
719	return DAG.getNode(Opcode: Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
720	: LoongArchISD::MOVGR2FR_W,
721	DL, VT, Operand: NewVal);
722	}
723	case MVT::f64: {
724	if (Subtarget.is64Bit()) {
725	SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i64);
726	return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D, DL, VT, Operand: NewVal);
727	}
728	SDValue Lo = DAG.getConstant(Val: INTVal.trunc(width: `32`), DL, VT: MVT::i32);
729	SDValue Hi = DAG.getConstant(Val: INTVal.lshr(shiftAmt: `32`).trunc(width: `32`), DL, VT: MVT::i32);
730	return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, N1: Lo, N2: Hi);
731	}
732	}
733
734	return SDValue ();
735	}
736
737	// Ensure SETCC result and operand have the same bit width; isel does not
738	// support mismatched widths.
739	SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
740	SelectionDAG &DAG) const {
741	SDLoc DL(Op);
742	EVT ResultVT = Op.getValueType();
743	EVT OperandVT = Op.getOperand(i: `0`).getValueType();
744
745	EVT SetCCResultVT =
746	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT);
747
748	if (ResultVT == SetCCResultVT)
749	return Op;
750
751	assert(Op.getOperand(`0`).getValueType() == Op.getOperand(`1`).getValueType() &&
752	"SETCC operands must have the same type!");
753
754	SDValue SetCCNode =
755	DAG.getNode(Opcode: ISD::SETCC, DL, VT: SetCCResultVT, N1: Op.getOperand(i: `0`),
756	N2: Op.getOperand(i: `1`), N3: Op.getOperand(i: `2`));
757
758	if (ResultVT.bitsGT(VT: SetCCResultVT))
759	SetCCNode = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: ResultVT, Operand: SetCCNode);
760	else if (ResultVT.bitsLT(VT: SetCCResultVT))
761	SetCCNode = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: SetCCNode);
762
763	return SetCCNode;
764	}
765
766	// Lower vecreduce_add using vhaddw instructions.
767	// For Example:
768	// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
769	// can be lowered to:
770	// VHADDW_D_W vr0, vr0, vr0
771	// VHADDW_Q_D vr0, vr0, vr0
772	// VPICKVE2GR_D a0, vr0, 0
773	// ADDI_W a0, a0, 0
774	SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
775	SelectionDAG &DAG) const {
776
777	SDLoc DL(Op);
778	MVT OpVT = Op.getSimpleValueType();
779	SDValue Val = Op.getOperand(i: `0`);
780
781	unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
782	unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
783	unsigned ResBits = OpVT.getScalarSizeInBits();
784
785	unsigned LegalVecSize = `128`;
786	bool isLASX256Vector =
787	Subtarget.hasExtLASX() && Val.getValueSizeInBits() == `256`;
788
789	// Ensure operand type legal or enable it legal.
790	while (!isTypeLegal(VT: Val.getSimpleValueType())) {
791	Val = DAG.WidenVector(N: Val, DL);
792	}
793
794	// NumEles is designed for iterations count, v4i32 for LSX
795	// and v8i32 for LASX should have the same count.
796	if (isLASX256Vector) {
797	NumEles /= `2`;
798	LegalVecSize = `256`;
799	}
800
801	for (unsigned i = `1`; i < NumEles; i = `2`, EleBits = `2`) {
802	MVT IntTy = MVT::getIntegerVT(BitWidth: EleBits);
803	MVT VecTy = MVT::getVectorVT(VT: IntTy, NumElements: LegalVecSize / EleBits);
804	Val = DAG.getNode(Opcode: LoongArchISD::VHADDW, DL, VT: VecTy, N1: Val, N2: Val);
805	}
806
807	if (isLASX256Vector) {
808	SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: Val,
809	N2: DAG.getConstant(Val: `2`, DL, VT: Subtarget.getGRLenVT()));
810	Val = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i64, N1: Tmp, N2: Val);
811	}
812
813	Val = DAG.getBitcast(VT: MVT::getVectorVT(VT: OpVT, NumElements: LegalVecSize / ResBits), V: Val);
814	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
815	N2: DAG.getConstant(Val: `0`, DL, VT: Subtarget.getGRLenVT()));
816	}
817
818	// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
819	// For Example:
820	// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
821	// can be lowered to:
822	// VBSRL_V vr1, vr0, 8
823	// VMAX_W vr0, vr1, vr0
824	// VBSRL_V vr1, vr0, 4
825	// VMAX_W vr0, vr1, vr0
826	// VPICKVE2GR_W a0, vr0, 0
827	// For 256 bit vector, it is illegal and will be spilt into
828	// two 128 bit vector by default then processed by this.
829	SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
830	SelectionDAG &DAG) const {
831	SDLoc DL(Op);
832
833	MVT OpVT = Op.getSimpleValueType();
834	SDValue Val = Op.getOperand(i: `0`);
835
836	unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
837	unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
838
839	// Ensure operand type legal or enable it legal.
840	while (!isTypeLegal(VT: Val.getSimpleValueType())) {
841	Val = DAG.WidenVector(N: Val, DL);
842	}
843
844	unsigned Opcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
845	MVT VecTy = Val.getSimpleValueType();
846	MVT GRLenVT = Subtarget.getGRLenVT();
847
848	for (int i = NumEles; i > `1`; i /= `2`) {
849	SDValue ShiftAmt = DAG.getConstant(Val: i * EleBits / `16`, DL, VT: GRLenVT);
850	SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: VecTy, N1: Val, N2: ShiftAmt);
851	Val = DAG.getNode(Opcode, DL, VT: VecTy, N1: Tmp, N2: Val);
852	}
853
854	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
855	N2: DAG.getConstant(Val: `0`, DL, VT: GRLenVT));
856	}
857
858	SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
859	SelectionDAG &DAG) const {
860	unsigned IsData = Op.getConstantOperandVal(i: `4`);
861
862	// We don't support non-data prefetch.
863	// Just preserve the chain.
864	if (!IsData)
865	return Op.getOperand(i: `0`);
866
867	return Op;
868	}
869
870	SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
871	SelectionDAG &DAG) const {
872	MVT VT = Op.getSimpleValueType();
873	assert(VT.isVector() && "Unexpected type");
874
875	SDLoc DL(Op);
876	SDValue R = Op.getOperand(i: `0`);
877	SDValue Amt = Op.getOperand(i: `1`);
878	unsigned Opcode = Op.getOpcode();
879	unsigned EltSizeInBits = VT.getScalarSizeInBits();
880
881	auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
882	if (V.getOpcode() != ISD::BUILD_VECTOR)
883	return false;
884	if (SDValue SplatValue =
885	cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
886	if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
887	CstSplatValue = C->getAPIntValue();
888	return true;
889	}
890	}
891	return false;
892	};
893
894	// Check for constant splat rotation amount.
895	APInt CstSplatValue;
896	bool IsCstSplat = checkCstSplat (Amt, CstSplatValue);
897	bool isROTL = Opcode == ISD::ROTL;
898
899	// Check for splat rotate by zero.
900	if (IsCstSplat && CstSplatValue.urem(RHS: EltSizeInBits) == `0`)
901	return R;
902
903	// LoongArch targets always prefer ISD::ROTR.
904	if (isROTL) {
905	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
906	return DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: R,
907	N2: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Amt));
908	}
909
910	// Rotate by a immediate.
911	if (IsCstSplat) {
912	// ISD::ROTR: Attemp to rotate by a positive immediate.
913	SDValue Bits = DAG.getConstant(Val: EltSizeInBits, DL, VT);
914	if (SDValue Urem =
915	DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL, VT, Ops: {Amt, Bits}))
916	return DAG.getNode(Opcode, DL, VT, N1: R, N2: Urem);
917	}
918
919	return Op;
920	}
921
922	// Return true if Val is equal to (setcc LHS, RHS, CC).
923	// Return false if Val is the inverse of (setcc LHS, RHS, CC).
924	// Otherwise, return std::nullopt.
925	static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
926	ISD::CondCode CC, SDValue Val) {
927	assert(Val->getOpcode() == ISD::SETCC);
928	SDValue LHS2 = Val.getOperand(i: `0`);
929	SDValue RHS2 = Val.getOperand(i: `1`);
930	ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: `2`))->get();
931
932	if (LHS == LHS2 && RHS == RHS2) {
933	if (CC == CC2)
934	return true;
935	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
936	return false;
937	} else if (LHS == RHS2 && RHS == LHS2) {
938	CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
939	if (CC == CC2)
940	return true;
941	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
942	return false;
943	}
944
945	return std::nullopt;
946	}
947
948	static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
949	const LoongArchSubtarget &Subtarget) {
950	SDValue CondV = N->getOperand(Num: `0`);
951	SDValue TrueV = N->getOperand(Num: `1`);
952	SDValue FalseV = N->getOperand(Num: `2`);
953	MVT VT = N->getSimpleValueType(ResNo: `0`);
954	SDLoc DL(N);
955
956	// (select c, -1, y) -> -c \| y
957	if (isAllOnesConstant(V: TrueV)) {
958	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
959	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
960	}
961	// (select c, y, -1) -> (c-1) \| y
962	if (isAllOnesConstant(V: FalseV)) {
963	SDValue Neg =
964	DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
965	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
966	}
967
968	// (select c, 0, y) -> (c-1) & y
969	if (isNullConstant(V: TrueV)) {
970	SDValue Neg =
971	DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
972	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
973	}
974	// (select c, y, 0) -> -c & y
975	if (isNullConstant(V: FalseV)) {
976	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
977	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
978	}
979
980	// select c, ~x, x --> xor -c, x
981	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
982	const APInt &TrueVal = TrueV ->getAsAPIntVal();
983	const APInt &FalseVal = FalseV ->getAsAPIntVal();
984	if (~TrueVal == FalseVal) {
985	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
986	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
987	}
988	}
989
990	// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
991	// when both truev and falsev are also setcc.
992	if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
993	FalseV.getOpcode() == ISD::SETCC) {
994	SDValue LHS = CondV.getOperand(i: `0`);
995	SDValue RHS = CondV.getOperand(i: `1`);
996	ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
997
998	// (select x, x, y) -> x \| y
999	// (select !x, x, y) -> x & y
1000	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
1001	return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
1002	N2: DAG.getFreeze(V: FalseV));
1003	}
1004	// (select x, y, x) -> x & y
1005	// (select !x, y, x) -> x \| y
1006	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
1007	return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
1008	N1: DAG.getFreeze(V: TrueV), N2: FalseV);
1009	}
1010	}
1011
1012	return SDValue ();
1013	}
1014
1015	// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1016	// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1017	// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1018	// being `0` or `-1`. In such cases we can replace `select` with `and`.
1019	// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1020	// than `c0`?
1021	static SDValue
1022	foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
1023	const LoongArchSubtarget &Subtarget) {
1024	unsigned SelOpNo = `0`;
1025	SDValue Sel = BO->getOperand(Num: `0`);
1026	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
1027	SelOpNo = `1`;
1028	Sel = BO->getOperand(Num: `1`);
1029	}
1030
1031	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
1032	return SDValue ();
1033
1034	unsigned ConstSelOpNo = `1`;
1035	unsigned OtherSelOpNo = `2`;
1036	if (!isa<ConstantSDNode>(Val: Sel ->getOperand(Num: ConstSelOpNo))) {
1037	ConstSelOpNo = `2`;
1038	OtherSelOpNo = `1`;
1039	}
1040	SDValue ConstSelOp = Sel ->getOperand(Num: ConstSelOpNo);
1041	ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
1042	if (!ConstSelOpNode \|\| ConstSelOpNode->isOpaque())
1043	return SDValue ();
1044
1045	SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ `1`);
1046	ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
1047	if (!ConstBinOpNode \|\| ConstBinOpNode->isOpaque())
1048	return SDValue ();
1049
1050	SDLoc DL(Sel);
1051	EVT VT = BO->getValueType(ResNo: `0`);
1052
1053	SDValue NewConstOps[`2`] = {ConstSelOp, ConstBinOp};
1054	if (SelOpNo == `1`)
1055	std::swap(a&: NewConstOps[`0`], b&: NewConstOps[`1`]);
1056
1057	SDValue NewConstOp =
1058	DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
1059	if (!NewConstOp)
1060	return SDValue ();
1061
1062	const APInt &NewConstAPInt = NewConstOp ->getAsAPIntVal();
1063	if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1064	return SDValue ();
1065
1066	SDValue OtherSelOp = Sel ->getOperand(Num: OtherSelOpNo);
1067	SDValue NewNonConstOps[`2`] = {OtherSelOp, ConstBinOp};
1068	if (SelOpNo == `1`)
1069	std::swap(a&: NewNonConstOps[`0`], b&: NewNonConstOps[`1`]);
1070	SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
1071
1072	SDValue NewT = (ConstSelOpNo == `1`) ? NewConstOp : NewNonConstOp;
1073	SDValue NewF = (ConstSelOpNo == `1`) ? NewNonConstOp : NewConstOp;
1074	return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: `0`), LHS: NewT, RHS: NewF);
1075	}
1076
1077	// Changes the condition code and swaps operands if necessary, so the SetCC
1078	// operation matches one of the comparisons supported directly by branches
1079	// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1080	// compare with 1/-1.
1081	static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1082	ISD::CondCode &CC, SelectionDAG &DAG) {
1083	// If this is a single bit test that can't be handled by ANDI, shift the
1084	// bit to be tested to the MSB and perform a signed compare with 0.
1085	if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
1086	LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1087	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`))) {
1088	uint64_t Mask = LHS.getConstantOperandVal(i: `1`);
1089	if ((isPowerOf2_64(Value: Mask) \|\| isMask_64(Value: Mask)) && !isInt<`12`>(x: Mask)) {
1090	unsigned ShAmt = `0`;
1091	if (isPowerOf2_64(Value: Mask)) {
1092	CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1093	ShAmt = LHS.getValueSizeInBits() - `1` - Log2_64(Value: Mask);
1094	} else {
1095	ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
1096	}
1097
1098	LHS = LHS.getOperand(i: `0`);
1099	if (ShAmt != `0`)
1100	LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
1101	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
1102	return;
1103	}
1104	}
1105
1106	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
1107	int64_t C = RHSC->getSExtValue();
1108	switch (CC) {
1109	default:
1110	break;
1111	case ISD::SETGT:
1112	// Convert X > -1 to X >= 0.
1113	if (C == -`1`) {
1114	RHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
1115	CC = ISD::SETGE;
1116	return;
1117	}
1118	break;
1119	case ISD::SETLT:
1120	// Convert X < 1 to 0 >= X.
1121	if (C == `1`) {
1122	RHS = LHS;
1123	LHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
1124	CC = ISD::SETGE;
1125	return;
1126	}
1127	break;
1128	}
1129	}
1130
1131	switch (CC) {
1132	default:
1133	break;
1134	case ISD::SETGT:
1135	case ISD::SETLE:
1136	case ISD::SETUGT:
1137	case ISD::SETULE:
1138	CC = ISD::getSetCCSwappedOperands(Operation: CC);
1139	std::swap(a&: LHS, b&: RHS);
1140	break;
1141	}
1142	}
1143
1144	SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1145	SelectionDAG &DAG) const {
1146	SDValue CondV = Op.getOperand(i: `0`);
1147	SDValue TrueV = Op.getOperand(i: `1`);
1148	SDValue FalseV = Op.getOperand(i: `2`);
1149	SDLoc DL(Op);
1150	MVT VT = Op.getSimpleValueType();
1151	MVT GRLenVT = Subtarget.getGRLenVT();
1152
1153	if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
1154	return V;
1155
1156	if (Op.hasOneUse()) {
1157	unsigned UseOpc = Op ->user_begin()->getOpcode();
1158	if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
1159	SDNode BinOp = Op ->user_begin();
1160	if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op ->user_begin(),
1161	DAG, Subtarget)) {
1162	DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
1163	// Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1164	// may return a constant node and cause crash in lowerSELECT.
1165	if (NewSel.getOpcode() == ISD::SELECT)
1166	return lowerSELECT(Op: NewSel, DAG);
1167	return NewSel;
1168	}
1169	}
1170	}
1171
1172	// If the condition is not an integer SETCC which operates on GRLenVT, we need
1173	// to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1174	// (select condv, truev, falsev)
1175	// -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1176	if (CondV.getOpcode() != ISD::SETCC \|\|
1177	CondV.getOperand(i: `0`).getSimpleValueType() != GRLenVT) {
1178	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: GRLenVT);
1179	SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
1180
1181	SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1182
1183	return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1184	}
1185
1186	// If the CondV is the output of a SETCC node which operates on GRLenVT
1187	// inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1188	// to take advantage of the integer compare+branch instructions. i.e.: (select
1189	// (setcc lhs, rhs, cc), truev, falsev)
1190	// -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1191	SDValue LHS = CondV.getOperand(i: `0`);
1192	SDValue RHS = CondV.getOperand(i: `1`);
1193	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
1194
1195	// Special case for a select of 2 constants that have a difference of 1.
1196	// Normally this is done by DAGCombine, but if the select is introduced by
1197	// type legalization or op legalization, we miss it. Restricting to SETLT
1198	// case for now because that is what signed saturating add/sub need.
1199	// FIXME: We don't need the condition to be SETLT or even a SETCC,
1200	// but we would probably want to swap the true/false values if the condition
1201	// is SETGE/SETLE to avoid an XORI.
1202	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
1203	CCVal == ISD::SETLT) {
1204	const APInt &TrueVal = TrueV ->getAsAPIntVal();
1205	const APInt &FalseVal = FalseV ->getAsAPIntVal();
1206	if (TrueVal - `1` == FalseVal)
1207	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
1208	if (TrueVal + `1` == FalseVal)
1209	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
1210	}
1211
1212	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1213	// 1 < x ? x : 1 -> 0 < x ? x : 1
1214	if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT \|\| CCVal == ISD::SETULT) &&
1215	RHS == TrueV && LHS == FalseV) {
1216	LHS = DAG.getConstant(Val: `0`, DL, VT);
1217	// 0 <u x is the same as x != 0.
1218	if (CCVal == ISD::SETULT) {
1219	std::swap(a&: LHS, b&: RHS);
1220	CCVal = ISD::SETNE;
1221	}
1222	}
1223
1224	// x <s -1 ? x : -1 -> x <s 0 ? x : -1
1225	if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1226	RHS == FalseV) {
1227	RHS = DAG.getConstant(Val: `0`, DL, VT);
1228	}
1229
1230	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1231
1232	if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
1233	// (select (setcc lhs, rhs, CC), constant, falsev)
1234	// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1235	std::swap(a&: TrueV, b&: FalseV);
1236	TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
1237	}
1238
1239	SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1240	return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1241	}
1242
1243	SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1244	SelectionDAG &DAG) const {
1245	SDValue CondV = Op.getOperand(i: `1`);
1246	SDLoc DL(Op);
1247	MVT GRLenVT = Subtarget.getGRLenVT();
1248
1249	if (CondV.getOpcode() == ISD::SETCC) {
1250	if (CondV.getOperand(i: `0`).getValueType() == GRLenVT) {
1251	SDValue LHS = CondV.getOperand(i: `0`);
1252	SDValue RHS = CondV.getOperand(i: `1`);
1253	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
1254
1255	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1256
1257	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1258	return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1259	N1: Op.getOperand(i: `0`), N2: LHS, N3: RHS, N4: TargetCC,
1260	N5: Op.getOperand(i: `2`));
1261	} else if (CondV.getOperand(i: `0`).getValueType().isFloatingPoint()) {
1262	return DAG.getNode(Opcode: LoongArchISD::BRCOND, DL, VT: Op.getValueType(),
1263	N1: Op.getOperand(i: `0`), N2: CondV, N3: Op.getOperand(i: `2`));
1264	}
1265	}
1266
1267	return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1268	N1: Op.getOperand(i: `0`), N2: CondV, N3: DAG.getConstant(Val: `0`, DL, VT: GRLenVT),
1269	N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: `2`));
1270	}
1271
1272	SDValue
1273	LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1274	SelectionDAG &DAG) const {
1275	SDLoc DL(Op);
1276	MVT OpVT = Op.getSimpleValueType();
1277
1278	SDValue Vector = DAG.getUNDEF(VT: OpVT);
1279	SDValue Val = Op.getOperand(i: `0`);
1280	SDValue Idx = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getGRLenVT());
1281
1282	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx);
1283	}
1284
1285	SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1286	SelectionDAG &DAG) const {
1287	EVT ResTy = Op ->getValueType(ResNo: `0`);
1288	SDValue Src = Op ->getOperand(Num: `0`);
1289	SDLoc DL(Op);
1290
1291	// LoongArchISD::BITREV_8B is not supported on LA32.
1292	if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 \|\| ResTy == MVT::v32i8))
1293	return SDValue ();
1294
1295	EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1296	unsigned int OrigEltNum = ResTy.getVectorNumElements();
1297	unsigned int NewEltNum = NewVT.getVectorNumElements();
1298
1299	SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src);
1300
1301	SmallVector<SDValue, `8`> Ops;
1302	for (unsigned int i = `0`; i < NewEltNum; i++) {
1303	SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc,
1304	N2: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
1305	unsigned RevOp = (ResTy == MVT::v16i8 \|\| ResTy == MVT::v32i8)
1306	? (unsigned)LoongArchISD::BITREV_8B
1307	: (unsigned)ISD::BITREVERSE;
1308	Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op));
1309	}
1310	SDValue Res =
1311	DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops));
1312
1313	switch (ResTy.getSimpleVT().SimpleTy) {
1314	default:
1315	return SDValue ();
1316	case MVT::v16i8:
1317	case MVT::v32i8:
1318	return Res;
1319	case MVT::v8i16:
1320	case MVT::v16i16:
1321	case MVT::v4i32:
1322	case MVT::v8i32: {
1323	SmallVector<int, `32`> Mask;
1324	for (unsigned int i = `0`; i < NewEltNum; i++)
1325	for (int j = OrigEltNum / NewEltNum - `1`; j >= `0`; j--)
1326	Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i);
1327	return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask);
1328	}
1329	}
1330	}
1331
1332	// Widen element type to get a new mask value (if possible).
1333	// For example:
1334	// shufflevector <4 x i32> %a, <4 x i32> %b,
1335	// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1336	// is equivalent to:
1337	// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1338	// can be lowered to:
1339	// VPACKOD_D vr0, vr0, vr1
1340	static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1341	SDValue V1, SDValue V2, SelectionDAG &DAG) {
1342	unsigned EltBits = VT.getScalarSizeInBits();
1343
1344	if (EltBits > `32` \|\| EltBits == `1`)
1345	return SDValue ();
1346
1347	SmallVector<int, `8`> NewMask;
1348	if (widenShuffleMaskElts(M: Mask, NewMask)) {
1349	MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * `2`)
1350	: MVT::getIntegerVT(BitWidth: EltBits * `2`);
1351	MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / `2`);
1352	if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) {
1353	SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1);
1354	SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2);
1355	return DAG.getBitcast(
1356	VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask));
1357	}
1358	}
1359
1360	return SDValue ();
1361	}
1362
1363	/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1364	/// instruction.
1365	// The funciton matches elements from one of the input vector shuffled to the
1366	// left or right with zeroable elements 'shifted in'. It handles both the
1367	// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1368	// lane.
1369	// Mostly copied from X86.
1370	static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1371	unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1372	int MaskOffset, const APInt &Zeroable) {
1373	int Size = Mask.size();
1374	unsigned SizeInBits = Size * ScalarSizeInBits;
1375
1376	auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1377	for (int i = `0`; i < Size; i += Scale)
1378	for (int j = `0`; j < Shift; ++j)
1379	if (!Zeroable [i + j + (Left ? `0` : (Scale - Shift))])
1380	return false;
1381
1382	return true;
1383	};
1384
1385	auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1386	int Step = `1`) {
1387	for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1388	if (!(Mask [i] == -`1` \|\| Mask [i] == Low))
1389	return false;
1390	return true;
1391	};
1392
1393	auto MatchShift = [&](int Shift, int Scale, bool Left) {
1394	for (int i = `0`; i != Size; i += Scale) {
1395	unsigned Pos = Left ? i + Shift : i;
1396	unsigned Low = Left ? i : i + Shift;
1397	unsigned Len = Scale - Shift;
1398	if (!isSequentialOrUndefInRange (Pos, Len, Low + MaskOffset))
1399	return -`1`;
1400	}
1401
1402	int ShiftEltBits = ScalarSizeInBits * Scale;
1403	bool ByteShift = ShiftEltBits > `64`;
1404	Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1405	: (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1406	int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? `8` : `1`);
1407
1408	// Normalize the scale for byte shifts to still produce an i64 element
1409	// type.
1410	Scale = ByteShift ? Scale / `2` : Scale;
1411
1412	// We need to round trip through the appropriate type for the shift.
1413	MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale);
1414	ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / `8`)
1415	: MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale);
1416	return (int)ShiftAmt;
1417	};
1418
1419	unsigned MaxWidth = `128`;
1420	for (int Scale = `2`; Scale * ScalarSizeInBits <= MaxWidth; Scale *= `2`)
1421	for (int Shift = `1`; Shift != Scale; ++Shift)
1422	for (bool Left : {true, false})
1423	if (CheckZeros (Shift, Scale, Left)) {
1424	int ShiftAmt = MatchShift (Shift, Scale, Left);
1425	if (`0` < ShiftAmt)
1426	return ShiftAmt;
1427	}
1428
1429	// no match
1430	return -`1`;
1431	}
1432
1433	/// Lower VECTOR_SHUFFLE as shift (if possible).
1434	///
1435	/// For example:
1436	/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1437	/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1438	/// is lowered to:
1439	/// (VBSLL_V $v0, $v0, 4)
1440	///
1441	/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1442	/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1443	/// is lowered to:
1444	/// (VSLLI_D $v0, $v0, 32)
1445	static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1446	MVT VT, SDValue V1, SDValue V2,
1447	SelectionDAG &DAG,
1448	const LoongArchSubtarget &Subtarget,
1449	const APInt &Zeroable) {
1450	int Size = Mask.size();
1451	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1452
1453	MVT ShiftVT;
1454	SDValue V = V1;
1455	unsigned Opcode;
1456
1457	// Try to match shuffle against V1 shift.
1458	int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1459	Mask, MaskOffset: `0`, Zeroable);
1460
1461	// If V1 failed, try to match shuffle against V2 shift.
1462	if (ShiftAmt < `0`) {
1463	ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1464	Mask, MaskOffset: Size, Zeroable);
1465	V = V2;
1466	}
1467
1468	if (ShiftAmt < `0`)
1469	return SDValue ();
1470
1471	assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1472	"Illegal integer vector type");
1473	V = DAG.getBitcast(VT: ShiftVT, V);
1474	V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V,
1475	N2: DAG.getConstant(Val: ShiftAmt, DL, VT: Subtarget.getGRLenVT()));
1476	return DAG.getBitcast(VT, V);
1477	}
1478
1479	/// Determine whether a range fits a regular pattern of values.
1480	/// This function accounts for the possibility of jumping over the End iterator.
1481	template <typename ValType>
1482	static bool
1483	fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1484	unsigned CheckStride,
1485	typename SmallVectorImpl<ValType>::const_iterator End,
1486	ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1487	auto &I = Begin;
1488
1489	while (I != End) {
1490	if (I != -`1` && I != ExpectedIndex)
1491	return false;
1492	ExpectedIndex += ExpectedIndexStride;
1493
1494	// Incrementing past End is undefined behaviour so we must increment one
1495	// step at a time and check for End at each step.
1496	for (unsigned n = `0`; n < CheckStride && I != End; ++n, ++I)
1497	; // Empty loop body.
1498	}
1499	return true;
1500	}
1501
1502	/// Compute whether each element of a shuffle is zeroable.
1503	///
1504	/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1505	static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1506	SDValue V2, APInt &KnownUndef,
1507	APInt &KnownZero) {
1508	int Size = Mask.size();
1509	KnownUndef = KnownZero = APInt::getZero(numBits: Size);
1510
1511	V1 = peekThroughBitcasts(V: V1);
1512	V2 = peekThroughBitcasts(V: V2);
1513
1514	bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode());
1515	bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode());
1516
1517	int VectorSizeInBits = V1.getValueSizeInBits();
1518	int ScalarSizeInBits = VectorSizeInBits / Size;
1519	assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1520	(void)ScalarSizeInBits;
1521
1522	for (int i = `0`; i < Size; ++i) {
1523	int M = Mask [i];
1524	if (M < `0`) {
1525	KnownUndef.setBit(i);
1526	continue;
1527	}
1528	if ((M >= `0` && M < Size && V1IsZero) \|\| (M >= Size && V2IsZero)) {
1529	KnownZero.setBit(i);
1530	continue;
1531	}
1532	}
1533	}
1534
1535	/// Test whether a shuffle mask is equivalent within each sub-lane.
1536	///
1537	/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1538	/// non-trivial to compute in the face of undef lanes. The representation is
1539	/// suitable for use with existing 128-bit shuffles as entries from the second
1540	/// vector have been remapped to [LaneSize, 2LaneSize).*
1541	static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1542	ArrayRef<int> Mask,
1543	SmallVectorImpl<int> &RepeatedMask) {
1544	auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1545	RepeatedMask.assign(NumElts: LaneSize, Elt: -`1`);
1546	int Size = Mask.size();
1547	for (int i = `0`; i < Size; ++i) {
1548	assert(Mask[i] == -`1` \|\| Mask[i] >= `0`);
1549	if (Mask [i] < `0`)
1550	continue;
1551	if ((Mask [i] % Size) / LaneSize != i / LaneSize)
1552	// This entry crosses lanes, so there is no way to model this shuffle.
1553	return false;
1554
1555	// Ok, handle the in-lane shuffles by detecting if and when they repeat.
1556	// Adjust second vector indices to start at LaneSize instead of Size.
1557	int LocalM =
1558	Mask [i] < Size ? Mask [i] % LaneSize : Mask [i] % LaneSize + LaneSize;
1559	if (RepeatedMask [i % LaneSize] < `0`)
1560	// This is the first non-undef entry in this slot of a 128-bit lane.
1561	RepeatedMask [i % LaneSize] = LocalM;
1562	else if (RepeatedMask [i % LaneSize] != LocalM)
1563	// Found a mismatch with the repeated mask.
1564	return false;
1565	}
1566	return true;
1567	}
1568
1569	/// Attempts to match vector shuffle as byte rotation.
1570	static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1571	ArrayRef<int> Mask) {
1572
1573	SDValue Lo, Hi;
1574	SmallVector<int, `16`> RepeatedMask;
1575
1576	if (!isRepeatedShuffleMask(LaneSizeInBits: `128`, VT, Mask, RepeatedMask))
1577	return -`1`;
1578
1579	int NumElts = RepeatedMask.size();
1580	int Rotation = `0`;
1581	int Scale = `16` / NumElts;
1582
1583	for (int i = `0`; i < NumElts; ++i) {
1584	int M = RepeatedMask [i];
1585	assert((M == -`1` \|\| (`0` <= M && M < (`2` * NumElts))) &&
1586	"Unexpected mask index.");
1587	if (M < `0`)
1588	continue;
1589
1590	// Determine where a rotated vector would have started.
1591	int StartIdx = i - (M % NumElts);
1592	if (StartIdx == `0`)
1593	return -`1`;
1594
1595	// If we found the tail of a vector the rotation must be the missing
1596	// front. If we found the head of a vector, it must be how much of the
1597	// head.
1598	int CandidateRotation = StartIdx < `0` ? -StartIdx : NumElts - StartIdx;
1599
1600	if (Rotation == `0`)
1601	Rotation = CandidateRotation;
1602	else if (Rotation != CandidateRotation)
1603	return -`1`;
1604
1605	// Compute which value this mask is pointing at.
1606	SDValue MaskV = M < NumElts ? V1 : V2;
1607
1608	// Compute which of the two target values this index should be assigned
1609	// to. This reflects whether the high elements are remaining or the low
1610	// elements are remaining.
1611	SDValue &TargetV = StartIdx < `0` ? Hi : Lo;
1612
1613	// Either set up this value if we've not encountered it before, or check
1614	// that it remains consistent.
1615	if (!TargetV)
1616	TargetV = MaskV;
1617	else if (TargetV != MaskV)
1618	return -`1`;
1619	}
1620
1621	// Check that we successfully analyzed the mask, and normalize the results.
1622	assert(Rotation != `0` && "Failed to locate a viable rotation!");
1623	assert((Lo \|\| Hi) && "Failed to find a rotated input vector!");
1624	if (!Lo)
1625	Lo = Hi;
1626	else if (!Hi)
1627	Hi = Lo;
1628
1629	V1 = Lo;
1630	V2 = Hi;
1631
1632	return Rotation * Scale;
1633	}
1634
1635	/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1636	///
1637	/// For example:
1638	/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1639	/// <2 x i32> <i32 3, i32 0>
1640	/// is lowered to:
1641	/// (VBSRL_V $v1, $v1, 8)
1642	/// (VBSLL_V $v0, $v0, 8)
1643	/// (VOR_V $v0, $V0, $v1)
1644	static SDValue
1645	lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1646	SDValue V1, SDValue V2, SelectionDAG &DAG,
1647	const LoongArchSubtarget &Subtarget) {
1648
1649	SDValue Lo = V1, Hi = V2;
1650	int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask);
1651	if (ByteRotation <= `0`)
1652	return SDValue ();
1653
1654	MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / `8`);
1655	Lo = DAG.getBitcast(VT: ByteVT, V: Lo);
1656	Hi = DAG.getBitcast(VT: ByteVT, V: Hi);
1657
1658	int LoByteShift = `16` - ByteRotation;
1659	int HiByteShift = ByteRotation;
1660	MVT GRLenVT = Subtarget.getGRLenVT();
1661
1662	SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo,
1663	N2: DAG.getConstant(Val: LoByteShift, DL, VT: GRLenVT));
1664	SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi,
1665	N2: DAG.getConstant(Val: HiByteShift, DL, VT: GRLenVT));
1666	return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift));
1667	}
1668
1669	/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1670	///
1671	/// For example:
1672	/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1673	/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1674	/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1675	/// is lowered to:
1676	/// (VREPLI $v1, 0)
1677	/// (VILVL $v0, $v1, $v0)
1678	static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1679	ArrayRef<int> Mask, MVT VT,
1680	SDValue V1, SDValue V2,
1681	SelectionDAG &DAG,
1682	const APInt &Zeroable) {
1683	int Bits = VT.getSizeInBits();
1684	int EltBits = VT.getScalarSizeInBits();
1685	int NumElements = VT.getVectorNumElements();
1686
1687	if (Zeroable.isAllOnes())
1688	return DAG.getConstant(Val: `0`, DL, VT);
1689
1690	// Define a helper function to check a particular ext-scale and lower to it if
1691	// valid.
1692	auto Lower = [&](int Scale) -> SDValue {
1693	SDValue InputV;
1694	bool AnyExt = true;
1695	int Offset = `0`;
1696	for (int i = `0`; i < NumElements; i++) {
1697	int M = Mask [i];
1698	if (M < `0`)
1699	continue;
1700	if (i % Scale != `0`) {
1701	// Each of the extended elements need to be zeroable.
1702	if (!Zeroable [i])
1703	return SDValue ();
1704
1705	AnyExt = false;
1706	continue;
1707	}
1708
1709	// Each of the base elements needs to be consecutive indices into the
1710	// same input vector.
1711	SDValue V = M < NumElements ? V1 : V2;
1712	M = M % NumElements;
1713	if (!InputV) {
1714	InputV = V;
1715	Offset = M - (i / Scale);
1716
1717	// These offset can't be handled
1718	if (Offset % (NumElements / Scale))
1719	return SDValue ();
1720	} else if (InputV != V)
1721	return SDValue ();
1722
1723	if (M != (Offset + (i / Scale)))
1724	return SDValue (); // Non-consecutive strided elements.
1725	}
1726
1727	// If we fail to find an input, we have a zero-shuffle which should always
1728	// have already been handled.
1729	if (!InputV)
1730	return SDValue ();
1731
1732	do {
1733	unsigned VilVLoHi = LoongArchISD::VILVL;
1734	if (Offset >= (NumElements / `2`)) {
1735	VilVLoHi = LoongArchISD::VILVH;
1736	Offset -= (NumElements / `2`);
1737	}
1738
1739	MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements);
1740	SDValue Ext =
1741	AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: `0`, DL, VT: InputVT);
1742	InputV = DAG.getBitcast(VT: InputVT, V: InputV);
1743	InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV);
1744	Scale /= `2`;
1745	EltBits *= `2`;
1746	NumElements /= `2`;
1747	} while (Scale > `1`);
1748	return DAG.getBitcast(VT, V: InputV);
1749	};
1750
1751	// Each iteration, try extending the elements half as much, but into twice as
1752	// many elements.
1753	for (int NumExtElements = Bits / `64`; NumExtElements < NumElements;
1754	NumExtElements *= `2`) {
1755	if (SDValue V = Lower (NumElements / NumExtElements))
1756	return V;
1757	}
1758	return SDValue ();
1759	}
1760
1761	/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1762	///
1763	/// VREPLVEI performs vector broadcast based on an element specified by an
1764	/// integer immediate, with its mask being similar to:
1765	/// <x, x, x, ...>
1766	/// where x is any valid index.
1767	///
1768	/// When undef's appear in the mask they are treated as if they were whatever
1769	/// value is necessary in order to fit the above form.
1770	static SDValue
1771	lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1772	SDValue V1, SelectionDAG &DAG,
1773	const LoongArchSubtarget &Subtarget) {
1774	int SplatIndex = -`1`;
1775	for (const auto &M : Mask) {
1776	if (M != -`1`) {
1777	SplatIndex = M;
1778	break;
1779	}
1780	}
1781
1782	if (SplatIndex == -`1`)
1783	return DAG.getUNDEF(VT);
1784
1785	assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1786	if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: `1`, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: `0`)) {
1787	return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1788	N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
1789	}
1790
1791	return SDValue ();
1792	}
1793
1794	/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1795	///
1796	/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1797	/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1798	///
1799	/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1800	/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1801	/// When undef's appear they are treated as if they were whatever value is
1802	/// necessary in order to fit the above forms.
1803	///
1804	/// For example:
1805	/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1806	/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1807	/// i32 7, i32 6, i32 5, i32 4>
1808	/// is lowered to:
1809	/// (VSHUF4I_H $v0, $v1, 27)
1810	/// where the 27 comes from:
1811	/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1812	static SDValue
1813	lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1814	SDValue V1, SDValue V2, SelectionDAG &DAG,
1815	const LoongArchSubtarget &Subtarget) {
1816
1817	unsigned SubVecSize = `4`;
1818	if (VT == MVT::v2f64 \|\| VT == MVT::v2i64)
1819	SubVecSize = `2`;
1820
1821	int SubMask[`4`] = {-`1`, -`1`, -`1`, -`1`};
1822	for (unsigned i = `0`; i < SubVecSize; ++i) {
1823	for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1824	int M = Mask [j];
1825
1826	// Convert from vector index to 4-element subvector index
1827	// If an index refers to an element outside of the subvector then give up
1828	if (M != -`1`) {
1829	M -= `4` * (j / SubVecSize);
1830	if (M < `0` \|\| M >= `4`)
1831	return SDValue ();
1832	}
1833
1834	// If the mask has an undef, replace it with the current index.
1835	// Note that it might still be undef if the current index is also undef
1836	if (SubMask[i] == -`1`)
1837	SubMask[i] = M;
1838	// Check that non-undef values are the same as in the mask. If they
1839	// aren't then give up
1840	else if (M != -`1` && M != SubMask[i])
1841	return SDValue ();
1842	}
1843	}
1844
1845	// Calculate the immediate. Replace any remaining undefs with zero
1846	int Imm = `0`;
1847	for (int i = SubVecSize - `1`; i >= `0`; --i) {
1848	int M = SubMask[i];
1849
1850	if (M == -`1`)
1851	M = `0`;
1852
1853	Imm <<= `2`;
1854	Imm \|= M & `0x3`;
1855	}
1856
1857	MVT GRLenVT = Subtarget.getGRLenVT();
1858
1859	// Return vshuf4i.d
1860	if (VT == MVT::v2f64 \|\| VT == MVT::v2i64)
1861	return DAG.getNode(Opcode: LoongArchISD::VSHUF4I_D, DL, VT, N1: V1, N2: V2,
1862	N3: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
1863
1864	return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
1865	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
1866	}
1867
1868	/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1869	///
1870	/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1871	/// reverse whose mask likes:
1872	/// <7, 6, 5, 4, 3, 2, 1, 0>
1873	///
1874	/// When undef's appear in the mask they are treated as if they were whatever
1875	/// value is necessary in order to fit the above forms.
1876	static SDValue
1877	lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1878	SDValue V1, SelectionDAG &DAG,
1879	const LoongArchSubtarget &Subtarget) {
1880	// Only vectors with i8/i16 elements which cannot match other patterns
1881	// directly needs to do this.
1882	if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1883	VT != MVT::v16i16)
1884	return SDValue ();
1885
1886	if (!ShuffleVectorInst::isReverseMask(Mask, NumSrcElts: Mask.size()))
1887	return SDValue ();
1888
1889	int WidenNumElts = VT.getVectorNumElements() / `4`;
1890	SmallVector<int, `16`> WidenMask(WidenNumElts, -`1`);
1891	for (int i = `0`; i < WidenNumElts; ++i)
1892	WidenMask [i] = WidenNumElts - `1` - i;
1893
1894	MVT WidenVT = MVT::getVectorVT(
1895	VT: VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, NumElements: WidenNumElts);
1896	SDValue NewV1 = DAG.getBitcast(VT: WidenVT, V: V1);
1897	SDValue WidenRev = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: NewV1,
1898	N2: DAG.getUNDEF(VT: WidenVT), Mask: WidenMask);
1899
1900	return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT,
1901	N1: DAG.getBitcast(VT, V: WidenRev),
1902	N2: DAG.getConstant(Val: `27`, DL, VT: Subtarget.getGRLenVT()));
1903	}
1904
1905	/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1906	///
1907	/// VPACKEV interleaves the even elements from each vector.
1908	///
1909	/// It is possible to lower into VPACKEV when the mask consists of two of the
1910	/// following forms interleaved:
1911	/// <0, 2, 4, ...>
1912	/// <n, n+2, n+4, ...>
1913	/// where n is the number of elements in the vector.
1914	/// For example:
1915	/// <0, 0, 2, 2, 4, 4, ...>
1916	/// <0, n, 2, n+2, 4, n+4, ...>
1917	///
1918	/// When undef's appear in the mask they are treated as if they were whatever
1919	/// value is necessary in order to fit the above forms.
1920	static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1921	MVT VT, SDValue V1, SDValue V2,
1922	SelectionDAG &DAG) {
1923
1924	const auto &Begin = Mask.begin();
1925	const auto &End = Mask.end();
1926	SDValue OriV1 = V1, OriV2 = V2;
1927
1928	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
1929	V1 = OriV1;
1930	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
1931	V1 = OriV2;
1932	else
1933	return SDValue ();
1934
1935	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
1936	V2 = OriV1;
1937	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
1938	V2 = OriV2;
1939	else
1940	return SDValue ();
1941
1942	return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
1943	}
1944
1945	/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1946	///
1947	/// VPACKOD interleaves the odd elements from each vector.
1948	///
1949	/// It is possible to lower into VPACKOD when the mask consists of two of the
1950	/// following forms interleaved:
1951	/// <1, 3, 5, ...>
1952	/// <n+1, n+3, n+5, ...>
1953	/// where n is the number of elements in the vector.
1954	/// For example:
1955	/// <1, 1, 3, 3, 5, 5, ...>
1956	/// <1, n+1, 3, n+3, 5, n+5, ...>
1957	///
1958	/// When undef's appear in the mask they are treated as if they were whatever
1959	/// value is necessary in order to fit the above forms.
1960	static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1961	MVT VT, SDValue V1, SDValue V2,
1962	SelectionDAG &DAG) {
1963
1964	const auto &Begin = Mask.begin();
1965	const auto &End = Mask.end();
1966	SDValue OriV1 = V1, OriV2 = V2;
1967
1968	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
1969	V1 = OriV1;
1970	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
1971	V1 = OriV2;
1972	else
1973	return SDValue ();
1974
1975	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
1976	V2 = OriV1;
1977	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
1978	V2 = OriV2;
1979	else
1980	return SDValue ();
1981
1982	return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
1983	}
1984
1985	/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1986	///
1987	/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1988	/// of each vector.
1989	///
1990	/// It is possible to lower into VILVH when the mask consists of two of the
1991	/// following forms interleaved:
1992	/// <x, x+1, x+2, ...>
1993	/// <n+x, n+x+1, n+x+2, ...>
1994	/// where n is the number of elements in the vector and x is half n.
1995	/// For example:
1996	/// <x, x, x+1, x+1, x+2, x+2, ...>
1997	/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1998	///
1999	/// When undef's appear in the mask they are treated as if they were whatever
2000	/// value is necessary in order to fit the above forms.
2001	static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
2002	MVT VT, SDValue V1, SDValue V2,
2003	SelectionDAG &DAG) {
2004
2005	const auto &Begin = Mask.begin();
2006	const auto &End = Mask.end();
2007	unsigned HalfSize = Mask.size() / `2`;
2008	SDValue OriV1 = V1, OriV2 = V2;
2009
2010	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
2011	V1 = OriV1;
2012	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
2013	V1 = OriV2;
2014	else
2015	return SDValue ();
2016
2017	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
2018	V2 = OriV1;
2019	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size() + HalfSize,
2020	ExpectedIndexStride: `1`))
2021	V2 = OriV2;
2022	else
2023	return SDValue ();
2024
2025	return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2026	}
2027
2028	/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2029	///
2030	/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2031	/// of each vector.
2032	///
2033	/// It is possible to lower into VILVL when the mask consists of two of the
2034	/// following forms interleaved:
2035	/// <0, 1, 2, ...>
2036	/// <n, n+1, n+2, ...>
2037	/// where n is the number of elements in the vector.
2038	/// For example:
2039	/// <0, 0, 1, 1, 2, 2, ...>
2040	/// <0, n, 1, n+1, 2, n+2, ...>
2041	///
2042	/// When undef's appear in the mask they are treated as if they were whatever
2043	/// value is necessary in order to fit the above forms.
2044	static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
2045	MVT VT, SDValue V1, SDValue V2,
2046	SelectionDAG &DAG) {
2047
2048	const auto &Begin = Mask.begin();
2049	const auto &End = Mask.end();
2050	SDValue OriV1 = V1, OriV2 = V2;
2051
2052	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `1`))
2053	V1 = OriV1;
2054	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`))
2055	V1 = OriV2;
2056	else
2057	return SDValue ();
2058
2059	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `1`))
2060	V2 = OriV1;
2061	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`))
2062	V2 = OriV2;
2063	else
2064	return SDValue ();
2065
2066	return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2067	}
2068
2069	/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2070	///
2071	/// VPICKEV copies the even elements of each vector into the result vector.
2072	///
2073	/// It is possible to lower into VPICKEV when the mask consists of two of the
2074	/// following forms concatenated:
2075	/// <0, 2, 4, ...>
2076	/// <n, n+2, n+4, ...>
2077	/// where n is the number of elements in the vector.
2078	/// For example:
2079	/// <0, 2, 4, ..., 0, 2, 4, ...>
2080	/// <0, 2, 4, ..., n, n+2, n+4, ...>
2081	///
2082	/// When undef's appear in the mask they are treated as if they were whatever
2083	/// value is necessary in order to fit the above forms.
2084	static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2085	MVT VT, SDValue V1, SDValue V2,
2086	SelectionDAG &DAG) {
2087
2088	const auto &Begin = Mask.begin();
2089	const auto &Mid = Mask.begin() + Mask.size() / `2`;
2090	const auto &End = Mask.end();
2091	SDValue OriV1 = V1, OriV2 = V2;
2092
2093	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
2094	V1 = OriV1;
2095	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
2096	V1 = OriV2;
2097	else
2098	return SDValue ();
2099
2100	if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
2101	V2 = OriV1;
2102	else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
2103	V2 = OriV2;
2104
2105	else
2106	return SDValue ();
2107
2108	return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2109	}
2110
2111	/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2112	///
2113	/// VPICKOD copies the odd elements of each vector into the result vector.
2114	///
2115	/// It is possible to lower into VPICKOD when the mask consists of two of the
2116	/// following forms concatenated:
2117	/// <1, 3, 5, ...>
2118	/// <n+1, n+3, n+5, ...>
2119	/// where n is the number of elements in the vector.
2120	/// For example:
2121	/// <1, 3, 5, ..., 1, 3, 5, ...>
2122	/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2123	///
2124	/// When undef's appear in the mask they are treated as if they were whatever
2125	/// value is necessary in order to fit the above forms.
2126	static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2127	MVT VT, SDValue V1, SDValue V2,
2128	SelectionDAG &DAG) {
2129
2130	const auto &Begin = Mask.begin();
2131	const auto &Mid = Mask.begin() + Mask.size() / `2`;
2132	const auto &End = Mask.end();
2133	SDValue OriV1 = V1, OriV2 = V2;
2134
2135	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
2136	V1 = OriV1;
2137	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
2138	V1 = OriV2;
2139	else
2140	return SDValue ();
2141
2142	if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
2143	V2 = OriV1;
2144	else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
2145	V2 = OriV2;
2146	else
2147	return SDValue ();
2148
2149	return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2150	}
2151
2152	/// Lower VECTOR_SHUFFLE into VSHUF.
2153	///
2154	/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2155	/// adding it as an operand to the resulting VSHUF.
2156	static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2157	MVT VT, SDValue V1, SDValue V2,
2158	SelectionDAG &DAG,
2159	const LoongArchSubtarget &Subtarget) {
2160
2161	SmallVector<SDValue, `16`> Ops;
2162	for (auto M : Mask)
2163	Ops.push_back(Elt: DAG.getSignedConstant(Val: M, DL, VT: Subtarget.getGRLenVT()));
2164
2165	EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2166	SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
2167
2168	// VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2169	// <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2170	// VSHF concatenates the vectors in a bitwise fashion:
2171	// <0b00, 0b01> + <0b10, 0b11> ->
2172	// 0b0100 + 0b1110 -> 0b01001110
2173	// <0b10, 0b11, 0b00, 0b01>
2174	// We must therefore swap the operands to get the correct result.
2175	return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2176	}
2177
2178	/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2179	///
2180	/// This routine breaks down the specific type of 128-bit shuffle and
2181	/// dispatches to the lowering routines accordingly.
2182	static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2183	SDValue V1, SDValue V2, SelectionDAG &DAG,
2184	const LoongArchSubtarget &Subtarget) {
2185	assert((VT.SimpleTy == MVT::v16i8 \|\| VT.SimpleTy == MVT::v8i16 \|\|
2186	VT.SimpleTy == MVT::v4i32 \|\| VT.SimpleTy == MVT::v2i64 \|\|
2187	VT.SimpleTy == MVT::v4f32 \|\| VT.SimpleTy == MVT::v2f64) &&
2188	"Vector type is unsupported for lsx!");
2189	assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2190	"Two operands have different types!");
2191	assert(VT.getVectorNumElements() == Mask.size() &&
2192	"Unexpected mask size for shuffle!");
2193	assert(Mask.size() % `2` == `0` && "Expected even mask size.");
2194
2195	APInt KnownUndef, KnownZero;
2196	computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2197	APInt Zeroable = KnownUndef \| KnownZero;
2198
2199	SDValue Result;
2200	// TODO: Add more comparison patterns.
2201	if (V2.isUndef()) {
2202	if ((Result =
2203	lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2204	return Result;
2205	if ((Result =
2206	lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2207	return Result;
2208	if ((Result =
2209	lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2210	return Result;
2211
2212	// TODO: This comment may be enabled in the future to better match the
2213	// pattern for instruction selection.
2214	/ V2 = V1; /
2215	}
2216
2217	// It is recommended not to change the pattern comparison order for better
2218	// performance.
2219	if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2220	return Result;
2221	if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2222	return Result;
2223	if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2224	return Result;
2225	if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2226	return Result;
2227	if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2228	return Result;
2229	if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2230	return Result;
2231	if ((VT.SimpleTy == MVT::v2i64 \|\| VT.SimpleTy == MVT::v2f64) &&
2232	(Result =
2233	lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2234	return Result;
2235	if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2236	Zeroable)))
2237	return Result;
2238	if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2239	Zeroable)))
2240	return Result;
2241	if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2242	Subtarget)))
2243	return Result;
2244	if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2245	return NewShuffle;
2246	if ((Result =
2247	lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2248	return Result;
2249	return SDValue ();
2250	}
2251
2252	/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2253	///
2254	/// It is a XVREPLVEI when the mask is:
2255	/// <x, x, x, ..., x+n, x+n, x+n, ...>
2256	/// where the number of x is equal to n and n is half the length of vector.
2257	///
2258	/// When undef's appear in the mask they are treated as if they were whatever
2259	/// value is necessary in order to fit the above form.
2260	static SDValue
2261	lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2262	SDValue V1, SelectionDAG &DAG,
2263	const LoongArchSubtarget &Subtarget) {
2264	int SplatIndex = -`1`;
2265	for (const auto &M : Mask) {
2266	if (M != -`1`) {
2267	SplatIndex = M;
2268	break;
2269	}
2270	}
2271
2272	if (SplatIndex == -`1`)
2273	return DAG.getUNDEF(VT);
2274
2275	const auto &Begin = Mask.begin();
2276	const auto &End = Mask.end();
2277	int HalfSize = Mask.size() / `2`;
2278
2279	if (SplatIndex >= HalfSize)
2280	return SDValue ();
2281
2282	assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2283	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: `0`) &&
2284	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `1`, End, ExpectedIndex: SplatIndex + HalfSize,
2285	ExpectedIndexStride: `0`)) {
2286	return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
2287	N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
2288	}
2289
2290	return SDValue ();
2291	}
2292
2293	/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2294	static SDValue
2295	lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2296	SDValue V1, SDValue V2, SelectionDAG &DAG,
2297	const LoongArchSubtarget &Subtarget) {
2298	// When the size is less than or equal to 4, lower cost instructions may be
2299	// used.
2300	if (Mask.size() <= `4`)
2301	return SDValue ();
2302	return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2303	}
2304
2305	/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2306	static SDValue
2307	lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2308	SDValue V1, SelectionDAG &DAG,
2309	const LoongArchSubtarget &Subtarget) {
2310	// Only consider XVPERMI_D.
2311	if (Mask.size() != `4` \|\| (VT != MVT::v4i64 && VT != MVT::v4f64))
2312	return SDValue ();
2313
2314	unsigned MaskImm = `0`;
2315	for (unsigned i = `0`; i < Mask.size(); ++i) {
2316	if (Mask [i] == -`1`)
2317	continue;
2318	MaskImm \|= Mask [i] << (i * `2`);
2319	}
2320
2321	return DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT, N1: V1,
2322	N2: DAG.getConstant(Val: MaskImm, DL, VT: Subtarget.getGRLenVT()));
2323	}
2324
2325	/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2326	static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
2327	MVT VT, SDValue V1, SelectionDAG &DAG,
2328	const LoongArchSubtarget &Subtarget) {
2329	// LoongArch LASX only have XVPERM_W.
2330	if (Mask.size() != `8` \|\| (VT != MVT::v8i32 && VT != MVT::v8f32))
2331	return SDValue ();
2332
2333	unsigned NumElts = VT.getVectorNumElements();
2334	unsigned HalfSize = NumElts / `2`;
2335	bool FrontLo = true, FrontHi = true;
2336	bool BackLo = true, BackHi = true;
2337
2338	auto inRange = [](int val, int low, int high) {
2339	return (val == -`1`) \|\| (val >= low && val < high);
2340	};
2341
2342	for (unsigned i = `0`; i < HalfSize; ++i) {
2343	int Fronti = Mask [i];
2344	int Backi = Mask [i + HalfSize];
2345
2346	FrontLo &= inRange (Fronti, `0`, HalfSize);
2347	FrontHi &= inRange (Fronti, HalfSize, NumElts);
2348	BackLo &= inRange (Backi, `0`, HalfSize);
2349	BackHi &= inRange (Backi, HalfSize, NumElts);
2350	}
2351
2352	// If both the lower and upper 128-bit parts access only one half of the
2353	// vector (either lower or upper), avoid using xvperm.w. The latency of
2354	// xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2355	if ((FrontLo \|\| FrontHi) && (BackLo \|\| BackHi))
2356	return SDValue ();
2357
2358	SmallVector<SDValue, `8`> Masks;
2359	MVT GRLenVT = Subtarget.getGRLenVT();
2360	for (unsigned i = `0`; i < NumElts; ++i)
2361	Masks.push_back(Elt: Mask [i] == -`1` ? DAG.getUNDEF(VT: GRLenVT)
2362	: DAG.getConstant(Val: Mask [i], DL, VT: GRLenVT));
2363	SDValue MaskVec = DAG.getBuildVector(VT: MVT::v8i32, DL, Ops: Masks);
2364
2365	return DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT, N1: V1, N2: MaskVec);
2366	}
2367
2368	/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2369	static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
2370	MVT VT, SDValue V1, SDValue V2,
2371	SelectionDAG &DAG) {
2372	return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2373	}
2374
2375	/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2376	static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
2377	MVT VT, SDValue V1, SDValue V2,
2378	SelectionDAG &DAG) {
2379	return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2380	}
2381
2382	/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2383	static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
2384	MVT VT, SDValue V1, SDValue V2,
2385	SelectionDAG &DAG) {
2386
2387	const auto &Begin = Mask.begin();
2388	const auto &End = Mask.end();
2389	unsigned HalfSize = Mask.size() / `2`;
2390	unsigned LeftSize = HalfSize / `2`;
2391	SDValue OriV1 = V1, OriV2 = V2;
2392
2393	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2394	ExpectedIndexStride: `1`) &&
2395	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: `1`))
2396	V1 = OriV1;
2397	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize,
2398	ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: `1`) &&
2399	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End,
2400	ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: `1`))
2401	V1 = OriV2;
2402	else
2403	return SDValue ();
2404
2405	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2406	ExpectedIndexStride: `1`) &&
2407	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize + LeftSize,
2408	ExpectedIndexStride: `1`))
2409	V2 = OriV1;
2410	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize,
2411	ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: `1`) &&
2412	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End,
2413	ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: `1`))
2414	V2 = OriV2;
2415	else
2416	return SDValue ();
2417
2418	return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2419	}
2420
2421	/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2422	static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
2423	MVT VT, SDValue V1, SDValue V2,
2424	SelectionDAG &DAG) {
2425
2426	const auto &Begin = Mask.begin();
2427	const auto &End = Mask.end();
2428	unsigned HalfSize = Mask.size() / `2`;
2429	SDValue OriV1 = V1, OriV2 = V2;
2430
2431	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: `0`, ExpectedIndexStride: `1`) &&
2432	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
2433	V1 = OriV1;
2434	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`) &&
2435	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End,
2436	ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
2437	V1 = OriV2;
2438	else
2439	return SDValue ();
2440
2441	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: `0`, ExpectedIndexStride: `1`) &&
2442	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
2443	V2 = OriV1;
2444	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: Mask.size(),
2445	ExpectedIndexStride: `1`) &&
2446	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End,
2447	ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
2448	V2 = OriV2;
2449	else
2450	return SDValue ();
2451
2452	return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2453	}
2454
2455	/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2456	static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2457	MVT VT, SDValue V1, SDValue V2,
2458	SelectionDAG &DAG) {
2459
2460	const auto &Begin = Mask.begin();
2461	const auto &LeftMid = Mask.begin() + Mask.size() / `4`;
2462	const auto &Mid = Mask.begin() + Mask.size() / `2`;
2463	const auto &RightMid = Mask.end() - Mask.size() / `4`;
2464	const auto &End = Mask.end();
2465	unsigned HalfSize = Mask.size() / `2`;
2466	SDValue OriV1 = V1, OriV2 = V2;
2467
2468	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: `0`, ExpectedIndexStride: `2`) &&
2469	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: `2`))
2470	V1 = OriV1;
2471	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`) &&
2472	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `2`))
2473	V1 = OriV2;
2474	else
2475	return SDValue ();
2476
2477	if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: `0`, ExpectedIndexStride: `2`) &&
2478	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `2`))
2479	V2 = OriV1;
2480	else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`) &&
2481	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `2`))
2482	V2 = OriV2;
2483
2484	else
2485	return SDValue ();
2486
2487	return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2488	}
2489
2490	/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2491	static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2492	MVT VT, SDValue V1, SDValue V2,
2493	SelectionDAG &DAG) {
2494
2495	const auto &Begin = Mask.begin();
2496	const auto &LeftMid = Mask.begin() + Mask.size() / `4`;
2497	const auto &Mid = Mask.begin() + Mask.size() / `2`;
2498	const auto &RightMid = Mask.end() - Mask.size() / `4`;
2499	const auto &End = Mask.end();
2500	unsigned HalfSize = Mask.size() / `2`;
2501	SDValue OriV1 = V1, OriV2 = V2;
2502
2503	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: `1`, ExpectedIndexStride: `2`) &&
2504	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: HalfSize + `1`, ExpectedIndexStride: `2`))
2505	V1 = OriV1;
2506	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`) &&
2507	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + `1`,
2508	ExpectedIndexStride: `2`))
2509	V1 = OriV2;
2510	else
2511	return SDValue ();
2512
2513	if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: `1`, ExpectedIndexStride: `2`) &&
2514	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: HalfSize + `1`, ExpectedIndexStride: `2`))
2515	V2 = OriV1;
2516	else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`) &&
2517	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + HalfSize + `1`,
2518	ExpectedIndexStride: `2`))
2519	V2 = OriV2;
2520	else
2521	return SDValue ();
2522
2523	return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2524	}
2525
2526	/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2527	static SDValue
2528	lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2529	SDValue V1, SDValue V2, SelectionDAG &DAG,
2530	const LoongArchSubtarget &Subtarget) {
2531	// LoongArch LASX only supports xvinsve0.{w/d}.
2532	if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2533	VT != MVT::v4f64)
2534	return SDValue ();
2535
2536	MVT GRLenVT = Subtarget.getGRLenVT();
2537	int MaskSize = Mask.size();
2538	assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2539
2540	// Check if exactly one element of the Mask is replaced by 'Replaced', while
2541	// all other elements are either 'Base + i' or undef (-1). On success, return
2542	// the index of the replaced element. Otherwise, just return -1.
2543	auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2544	int Idx = -`1`;
2545	for (int i = `0`; i < MaskSize; ++i) {
2546	if (Mask [i] == Base + i \|\| Mask [i] == -`1`)
2547	continue;
2548	if (Mask [i] != Replaced)
2549	return -`1`;
2550	if (Idx == -`1`)
2551	Idx = i;
2552	else
2553	return -`1`;
2554	}
2555	return Idx;
2556	};
2557
2558	// Case 1: the lowest element of V2 replaces one element in V1.
2559	int Idx = checkReplaceOne (`0`, MaskSize);
2560	if (Idx != -`1`)
2561	return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V1, N2: V2,
2562	N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
2563
2564	// Case 2: the lowest element of V1 replaces one element in V2.
2565	Idx = checkReplaceOne (MaskSize, `0`);
2566	if (Idx != -`1`)
2567	return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V2, N2: V1,
2568	N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
2569
2570	return SDValue ();
2571	}
2572
2573	/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2574	static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2575	MVT VT, SDValue V1, SDValue V2,
2576	SelectionDAG &DAG) {
2577
2578	int MaskSize = Mask.size();
2579	int HalfSize = Mask.size() / `2`;
2580	const auto &Begin = Mask.begin();
2581	const auto &Mid = Mask.begin() + HalfSize;
2582	const auto &End = Mask.end();
2583
2584	// VECTOR_SHUFFLE concatenates the vectors:
2585	// <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2586	// shuffling ->
2587	// <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2588	//
2589	// XVSHUF concatenates the vectors:
2590	// <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2591	// shuffling ->
2592	// <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2593	SmallVector<SDValue, `8`> MaskAlloc;
2594	for (auto it = Begin; it < Mid; it++) {
2595	if (it < `0`) // UNDEF*
2596	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i64));
2597	else if ((it >= `0` && it < HalfSize) \|\|
2598	(it >= MaskSize && it < MaskSize + HalfSize)) {
2599	int M = it < HalfSize ? it : *it - HalfSize;
2600	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2601	} else
2602	return SDValue ();
2603	}
2604	assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2605
2606	for (auto it = Mid; it < End; it++) {
2607	if (it < `0`) // UNDEF*
2608	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i64));
2609	else if ((it >= HalfSize && it < MaskSize) \|\|
2610	(it >= MaskSize + HalfSize && it < MaskSize * `2`)) {
2611	int M = it < MaskSize ? it - HalfSize : *it - MaskSize;
2612	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2613	} else
2614	return SDValue ();
2615	}
2616	assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2617
2618	EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2619	SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
2620	return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2621	}
2622
2623	/// Shuffle vectors by lane to generate more optimized instructions.
2624	/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2625	///
2626	/// Therefore, except for the following four cases, other cases are regarded
2627	/// as cross-lane shuffles, where optimization is relatively limited.
2628	///
2629	/// - Shuffle high, low lanes of two inputs vector
2630	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2631	/// - Shuffle low, high lanes of two inputs vector
2632	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2633	/// - Shuffle low, low lanes of two inputs vector
2634	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2635	/// - Shuffle high, high lanes of two inputs vector
2636	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2637	///
2638	/// The first case is the closest to LoongArch instructions and the other
2639	/// cases need to be converted to it for processing.
2640	///
2641	/// This function will return true for the last three cases above and will
2642	/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2643	/// cross-lane shuffle cases.
2644	static bool canonicalizeShuffleVectorByLane(
2645	const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2646	SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2647
2648	enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2649
2650	int MaskSize = Mask.size();
2651	int HalfSize = Mask.size() / `2`;
2652	MVT GRLenVT = Subtarget.getGRLenVT();
2653
2654	HalfMaskType preMask = None, postMask = None;
2655
2656	if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2657	return M < `0` \|\| (M >= `0` && M < HalfSize) \|\|
2658	(M >= MaskSize && M < MaskSize + HalfSize);
2659	}))
2660	preMask = HighLaneTy;
2661	else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2662	return M < `0` \|\| (M >= HalfSize && M < MaskSize) \|\|
2663	(M >= MaskSize + HalfSize && M < MaskSize * `2`);
2664	}))
2665	preMask = LowLaneTy;
2666
2667	if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2668	return M < `0` \|\| (M >= HalfSize && M < MaskSize) \|\|
2669	(M >= MaskSize + HalfSize && M < MaskSize * `2`);
2670	}))
2671	postMask = LowLaneTy;
2672	else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2673	return M < `0` \|\| (M >= `0` && M < HalfSize) \|\|
2674	(M >= MaskSize && M < MaskSize + HalfSize);
2675	}))
2676	postMask = HighLaneTy;
2677
2678	// The pre-half of mask is high lane type, and the post-half of mask
2679	// is low lane type, which is closest to the LoongArch instructions.
2680	//
2681	// Note: In the LoongArch architecture, the high lane of mask corresponds
2682	// to the lower 128-bit of vector register, and the low lane of mask
2683	// corresponds the higher 128-bit of vector register.
2684	if (preMask == HighLaneTy && postMask == LowLaneTy) {
2685	return false;
2686	}
2687	if (preMask == LowLaneTy && postMask == HighLaneTy) {
2688	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2689	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2690	N2: DAG.getConstant(Val: `0b01001110`, DL, VT: GRLenVT));
2691	V1 = DAG.getBitcast(VT, V: V1);
2692
2693	if (!V2.isUndef()) {
2694	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2695	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2696	N2: DAG.getConstant(Val: `0b01001110`, DL, VT: GRLenVT));
2697	V2 = DAG.getBitcast(VT, V: V2);
2698	}
2699
2700	for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2701	it = it < `0` ? it : it - HalfSize;
2702	}
2703	for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2704	it = it < `0` ? it : it + HalfSize;
2705	}
2706	} else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2707	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2708	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2709	N2: DAG.getConstant(Val: `0b11101110`, DL, VT: GRLenVT));
2710	V1 = DAG.getBitcast(VT, V: V1);
2711
2712	if (!V2.isUndef()) {
2713	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2714	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2715	N2: DAG.getConstant(Val: `0b11101110`, DL, VT: GRLenVT));
2716	V2 = DAG.getBitcast(VT, V: V2);
2717	}
2718
2719	for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2720	it = it < `0` ? it : it - HalfSize;
2721	}
2722	} else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2723	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2724	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2725	N2: DAG.getConstant(Val: `0b01000100`, DL, VT: GRLenVT));
2726	V1 = DAG.getBitcast(VT, V: V1);
2727
2728	if (!V2.isUndef()) {
2729	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2730	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2731	N2: DAG.getConstant(Val: `0b01000100`, DL, VT: GRLenVT));
2732	V2 = DAG.getBitcast(VT, V: V2);
2733	}
2734
2735	for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2736	it = it < `0` ? it : it + HalfSize;
2737	}
2738	} else { // cross-lane
2739	return false;
2740	}
2741
2742	return true;
2743	}
2744
2745	/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2746	/// Only for 256-bit vector.
2747	///
2748	/// For example:
2749	/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2750	/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2751	/// is lowerded to:
2752	/// (XVPERMI $xr2, $xr0, 78)
2753	/// (XVSHUF $xr1, $xr2, $xr0)
2754	/// (XVORI $xr0, $xr1, 0)
2755	static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2756	ArrayRef<int> Mask,
2757	MVT VT, SDValue V1,
2758	SDValue V2,
2759	SelectionDAG &DAG) {
2760	assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2761	int Size = Mask.size();
2762	int LaneSize = Size / `2`;
2763
2764	bool LaneCrossing[`2`] = {false, false};
2765	for (int i = `0`; i < Size; ++i)
2766	if (Mask [i] >= `0` && ((Mask [i] % Size) / LaneSize) != (i / LaneSize))
2767	LaneCrossing[(Mask [i] % Size) / LaneSize] = true;
2768
2769	// Ensure that all lanes ared involved.
2770	if (!LaneCrossing[`0`] && !LaneCrossing[`1`])
2771	return SDValue ();
2772
2773	SmallVector<int> InLaneMask;
2774	InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end());
2775	for (int i = `0`; i < Size; ++i) {
2776	int &M = InLaneMask [i];
2777	if (M < `0`)
2778	continue;
2779	if (((M % Size) / LaneSize) != (i / LaneSize))
2780	M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2781	}
2782
2783	SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2784	Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped,
2785	N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {`2`, `3`, `0`, `1`});
2786	Flipped = DAG.getBitcast(VT, V: Flipped);
2787	return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask);
2788	}
2789
2790	/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2791	///
2792	/// This routine breaks down the specific type of 256-bit shuffle and
2793	/// dispatches to the lowering routines accordingly.
2794	static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2795	SDValue V1, SDValue V2, SelectionDAG &DAG,
2796	const LoongArchSubtarget &Subtarget) {
2797	assert((VT.SimpleTy == MVT::v32i8 \|\| VT.SimpleTy == MVT::v16i16 \|\|
2798	VT.SimpleTy == MVT::v8i32 \|\| VT.SimpleTy == MVT::v4i64 \|\|
2799	VT.SimpleTy == MVT::v8f32 \|\| VT.SimpleTy == MVT::v4f64) &&
2800	"Vector type is unsupported for lasx!");
2801	assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2802	"Two operands have different types!");
2803	assert(VT.getVectorNumElements() == Mask.size() &&
2804	"Unexpected mask size for shuffle!");
2805	assert(Mask.size() % `2` == `0` && "Expected even mask size.");
2806	assert(Mask.size() >= `4` && "Mask size is less than 4.");
2807
2808	APInt KnownUndef, KnownZero;
2809	computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2810	APInt Zeroable = KnownUndef \| KnownZero;
2811
2812	SDValue Result;
2813	// TODO: Add more comparison patterns.
2814	if (V2.isUndef()) {
2815	if ((Result =
2816	lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2817	return Result;
2818	if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2819	Subtarget)))
2820	return Result;
2821	// Try to widen vectors to gain more optimization opportunities.
2822	if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2823	return NewShuffle;
2824	if ((Result =
2825	lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2826	return Result;
2827	if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2828	return Result;
2829	if ((Result =
2830	lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2831	return Result;
2832
2833	// TODO: This comment may be enabled in the future to better match the
2834	// pattern for instruction selection.
2835	/ V2 = V1; /
2836	}
2837
2838	// It is recommended not to change the pattern comparison order for better
2839	// performance.
2840	if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2841	return Result;
2842	if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2843	return Result;
2844	if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2845	return Result;
2846	if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2847	return Result;
2848	if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2849	return Result;
2850	if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2851	return Result;
2852	if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2853	Zeroable)))
2854	return Result;
2855	if ((Result =
2856	lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2857	return Result;
2858	if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2859	Subtarget)))
2860	return Result;
2861
2862	// canonicalize non cross-lane shuffle vector
2863	SmallVector<int> NewMask(Mask);
2864	if (canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget))
2865	return lower256BitShuffle(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget);
2866
2867	// FIXME: Handling the remaining cases earlier can degrade performance
2868	// in some situations. Further analysis is required to enable more
2869	// effective optimizations.
2870	if (V2.isUndef()) {
2871	if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT,
2872	V1, V2, DAG)))
2873	return Result;
2874	}
2875
2876	if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG))
2877	return NewShuffle;
2878	if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
2879	return Result;
2880
2881	return SDValue ();
2882	}
2883
2884	SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2885	SelectionDAG &DAG) const {
2886	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
2887	ArrayRef<int> OrigMask = SVOp->getMask();
2888	SDValue V1 = Op.getOperand(i: `0`);
2889	SDValue V2 = Op.getOperand(i: `1`);
2890	MVT VT = Op.getSimpleValueType();
2891	int NumElements = VT.getVectorNumElements();
2892	SDLoc DL(Op);
2893
2894	bool V1IsUndef = V1.isUndef();
2895	bool V2IsUndef = V2.isUndef();
2896	if (V1IsUndef && V2IsUndef)
2897	return DAG.getUNDEF(VT);
2898
2899	// When we create a shuffle node we put the UNDEF node to second operand,
2900	// but in some cases the first operand may be transformed to UNDEF.
2901	// In this case we should just commute the node.
2902	if (V1IsUndef)
2903	return DAG.getCommutedVectorShuffle(SV: *SVOp);
2904
2905	// Check for non-undef masks pointing at an undef vector and make the masks
2906	// undef as well. This makes it easier to match the shuffle based solely on
2907	// the mask.
2908	if (V2IsUndef &&
2909	any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
2910	SmallVector<int, `8`> NewMask(OrigMask);
2911	for (int &M : NewMask)
2912	if (M >= NumElements)
2913	M = -`1`;
2914	return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
2915	}
2916
2917	// Check for illegal shuffle mask element index values.
2918	int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? `1` : `2`);
2919	(void)MaskUpperLimit;
2920	assert(llvm::all_of(OrigMask,
2921	[&](int M) { return -`1` <= M && M < MaskUpperLimit; }) &&
2922	"Out of bounds shuffle index");
2923
2924	// For each vector width, delegate to a specialized lowering routine.
2925	if (VT.is128BitVector())
2926	return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
2927
2928	if (VT.is256BitVector())
2929	return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
2930
2931	return SDValue ();
2932	}
2933
2934	SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2935	SelectionDAG &DAG) const {
2936	// Custom lower to ensure the libcall return is passed in an FPR on hard
2937	// float ABIs.
2938	SDLoc DL(Op);
2939	MakeLibCallOptions CallOptions;
2940	SDValue Op0 = Op.getOperand(i: `0`);
2941	SDValue Chain = SDValue ();
2942	RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
2943	SDValue Res;
2944	std::tie(args&: Res, args&: Chain) =
2945	makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
2946	if (Subtarget.is64Bit())
2947	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2948	return DAG.getBitcast(VT: MVT::i32, V: Res);
2949	}
2950
2951	SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2952	SelectionDAG &DAG) const {
2953	// Custom lower to ensure the libcall argument is passed in an FPR on hard
2954	// float ABIs.
2955	SDLoc DL(Op);
2956	MakeLibCallOptions CallOptions;
2957	SDValue Op0 = Op.getOperand(i: `0`);
2958	SDValue Chain = SDValue ();
2959	SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2960	DL, VT: MVT::f32, Operand: Op0)
2961	: DAG.getBitcast(VT: MVT::f32, V: Op0);
2962	SDValue Res;
2963	std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
2964	CallOptions, dl: DL, Chain);
2965	return Res;
2966	}
2967
2968	SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2969	SelectionDAG &DAG) const {
2970	assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2971	SDLoc DL(Op);
2972	MakeLibCallOptions CallOptions;
2973	RTLIB::Libcall LC =
2974	RTLIB::getFPROUND(OpVT: Op.getOperand(i: `0`).getValueType(), RetVT: MVT::bf16);
2975	SDValue Res =
2976	makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: `0`), CallOptions, dl: DL).first;
2977	if (Subtarget.is64Bit())
2978	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2979	return DAG.getBitcast(VT: MVT::i32, V: Res);
2980	}
2981
2982	SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2983	SelectionDAG &DAG) const {
2984	assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2985	MVT VT = Op.getSimpleValueType();
2986	SDLoc DL(Op);
2987	Op = DAG.getNode(
2988	Opcode: ISD::SHL, DL, VT: Op.getOperand(i: `0`).getValueType(), N1: Op.getOperand(i: `0`),
2989	N2: DAG.getShiftAmountConstant(Val: `16`, VT: Op.getOperand(i: `0`).getValueType(), DL));
2990	SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2991	DL, VT: MVT::f32, Operand: Op)
2992	: DAG.getBitcast(VT: MVT::f32, V: Op);
2993	if (VT != MVT::f32)
2994	return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
2995	return Res;
2996	}
2997
2998	// Lower BUILD_VECTOR as broadcast load (if possible).
2999	// For example:
3000	// %a = load i8, ptr %ptr
3001	// %b = build_vector %a, %a, %a, %a
3002	// is lowered to :
3003	// (VLDREPL_B $a0, 0)
3004	static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
3005	const SDLoc &DL,
3006	SelectionDAG &DAG) {
3007	MVT VT = BVOp->getSimpleValueType(ResNo: `0`);
3008	int NumOps = BVOp->getNumOperands();
3009
3010	assert((VT.is128BitVector() \|\| VT.is256BitVector()) &&
3011	"Unsupported vector type for broadcast.");
3012
3013	SDValue IdentitySrc;
3014	bool IsIdeneity = true;
3015
3016	for (int i = `0`; i != NumOps; i++) {
3017	SDValue Op = BVOp->getOperand(Num: i);
3018	if (Op.getOpcode() != ISD::LOAD \|\| (IdentitySrc && Op != IdentitySrc)) {
3019	IsIdeneity = false;
3020	break;
3021	}
3022	IdentitySrc = BVOp->getOperand(Num: `0`);
3023	}
3024
3025	// make sure that this load is valid and only has one user.
3026	if (!IsIdeneity \|\| !IdentitySrc \|\| !BVOp->isOnlyUserOf(N: IdentitySrc.getNode()))
3027	return SDValue ();
3028
3029	auto *LN = cast<LoadSDNode>(Val&: IdentitySrc);
3030	auto ExtType = LN->getExtensionType();
3031
3032	if ((ExtType == ISD::EXTLOAD \|\| ExtType == ISD::NON_EXTLOAD) &&
3033	VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3034	// Indexed loads and stores are not supported on LoongArch.
3035	assert(LN->isUnindexed() && "Unexpected indexed load.");
3036
3037	SDVTList Tys = DAG.getVTList(VT1: VT, VT2: MVT::Other);
3038	// The offset operand of unindexed load is always undefined, so there is
3039	// no need to pass it to VLDREPL.
3040	SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3041	SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops);
3042	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN, `1`), To: BCast.getValue(R: `1`));
3043	return BCast;
3044	}
3045	return SDValue ();
3046	}
3047
3048	// Sequentially insert elements from Ops into Vector, from low to high indices.
3049	// Note: Ops can have fewer elements than Vector.
3050	static void fillVector(ArrayRef<SDValue> Ops, SelectionDAG &DAG, SDLoc DL,
3051	const LoongArchSubtarget &Subtarget, SDValue &Vector,
3052	EVT ResTy) {
3053	assert(Ops.size() <= ResTy.getVectorNumElements());
3054
3055	SDValue Op0 = Ops [`0`];
3056	if (!Op0.isUndef())
3057	Vector = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: ResTy, Operand: Op0);
3058	for (unsigned i = `1`; i < Ops.size(); ++i) {
3059	SDValue Opi = Ops [i];
3060	if (Opi.isUndef())
3061	continue;
3062	Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, N2: Opi,
3063	N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3064	}
3065	}
3066
3067	// Build a ResTy subvector from Node, taking NumElts elements starting at index
3068	// 'first'.
3069	static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
3070	SelectionDAG &DAG, SDLoc DL,
3071	const LoongArchSubtarget &Subtarget,
3072	EVT ResTy, unsigned first) {
3073	unsigned NumElts = ResTy.getVectorNumElements();
3074
3075	assert(first + NumElts <= Node->getSimpleValueType(`0`).getVectorNumElements());
3076
3077	SmallVector<SDValue, `16`> Ops(Node->op_begin() + first,
3078	Node->op_begin() + first + NumElts);
3079	SDValue Vector = DAG.getUNDEF(VT: ResTy);
3080	fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3081	return Vector;
3082	}
3083
3084	SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3085	SelectionDAG &DAG) const {
3086	BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
3087	MVT VT = Node->getSimpleValueType(ResNo: `0`);
3088	EVT ResTy = Op ->getValueType(ResNo: `0`);
3089	unsigned NumElts = ResTy.getVectorNumElements();
3090	SDLoc DL(Op);
3091	APInt SplatValue, SplatUndef;
3092	unsigned SplatBitSize;
3093	bool HasAnyUndefs;
3094	bool IsConstant = false;
3095	bool UseSameConstant = true;
3096	SDValue ConstantValue;
3097	bool Is128Vec = ResTy.is128BitVector();
3098	bool Is256Vec = ResTy.is256BitVector();
3099
3100	if ((!Subtarget.hasExtLSX() \|\| !Is128Vec) &&
3101	(!Subtarget.hasExtLASX() \|\| !Is256Vec))
3102	return SDValue ();
3103
3104	if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG))
3105	return Result;
3106
3107	if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3108	/MinSplatBits=/`8`) &&
3109	SplatBitSize <= `64`) {
3110	// We can only cope with 8, 16, 32, or 64-bit elements.
3111	if (SplatBitSize != `8` && SplatBitSize != `16` && SplatBitSize != `32` &&
3112	SplatBitSize != `64`)
3113	return SDValue ();
3114
3115	if (SplatBitSize == `64` && !Subtarget.is64Bit()) {
3116	// We can only handle 64-bit elements that are within
3117	// the signed 10-bit range or match vldi patterns on 32-bit targets.
3118	// See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3119	if (!SplatValue.isSignedIntN(N: `10`) &&
3120	!isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3121	return SDValue ();
3122	if ((Is128Vec && ResTy == MVT::v4i32) \|\|
3123	(Is256Vec && ResTy == MVT::v8i32))
3124	return Op;
3125	}
3126
3127	EVT ViaVecTy;
3128
3129	switch (SplatBitSize) {
3130	default:
3131	return SDValue ();
3132	case `8`:
3133	ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3134	break;
3135	case `16`:
3136	ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3137	break;
3138	case `32`:
3139	ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3140	break;
3141	case `64`:
3142	ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3143	break;
3144	}
3145
3146	// SelectionDAG::getConstant will promote SplatValue appropriately.
3147	SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
3148
3149	// Bitcast to the type we originally wanted.
3150	if (ViaVecTy != ResTy)
3151	Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc (Node), VT: ResTy, Operand: Result);
3152
3153	return Result;
3154	}
3155
3156	if (DAG.isSplatValue(V: Op, /AllowUndefs=/false))
3157	return Op;
3158
3159	for (unsigned i = `0`; i < NumElts; ++i) {
3160	SDValue Opi = Node->getOperand(Num: i);
3161	if (isIntOrFPConstant(V: Opi)) {
3162	IsConstant = true;
3163	if (!ConstantValue.getNode())
3164	ConstantValue = Opi;
3165	else if (ConstantValue != Opi)
3166	UseSameConstant = false;
3167	}
3168	}
3169
3170	// If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3171	if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3172	SDValue Result = DAG.getSplatBuildVector(VT: ResTy, DL, Op: ConstantValue);
3173	for (unsigned i = `0`; i < NumElts; ++i) {
3174	SDValue Opi = Node->getOperand(Num: i);
3175	if (!isIntOrFPConstant(V: Opi))
3176	Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Result, N2: Opi,
3177	N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3178	}
3179	return Result;
3180	}
3181
3182	if (!IsConstant) {
3183	// If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3184	// the sub-sequence of the vector and then broadcast the sub-sequence.
3185	//
3186	// TODO: If the BUILD_VECTOR contains undef elements, consider falling
3187	// back to use INSERT_VECTOR_ELT to materialize the vector, because it
3188	// generates worse code in some cases. This could be further optimized
3189	// with more consideration.
3190	SmallVector<SDValue> Sequence;
3191	BitVector UndefElements;
3192	if (Node->getRepeatedSequence(Sequence, UndefElements: &UndefElements) &&
3193	UndefElements.count() == `0`) {
3194	// Using LSX instructions to fill the sub-sequence of 256-bits vector,
3195	// because the high part can be simply treated as undef.
3196	SDValue Vector = DAG.getUNDEF(VT: ResTy);
3197	EVT FillTy = Is256Vec
3198	? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext())
3199	: ResTy;
3200	SDValue FillVec =
3201	Is256Vec ? DAG.getExtractSubvector(DL, VT: FillTy, Vec: Vector, Idx: `0`) : Vector;
3202
3203	fillVector(Ops: Sequence, DAG, DL, Subtarget, Vector&: FillVec, ResTy: FillTy);
3204
3205	unsigned SeqLen = Sequence.size();
3206	unsigned SplatLen = NumElts / SeqLen;
3207	MVT SplatEltTy = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits() * SeqLen);
3208	MVT SplatTy = MVT::getVectorVT(VT: SplatEltTy, NumElements: SplatLen);
3209
3210	// If size of the sub-sequence is half of a 256-bits vector, bitcast the
3211	// vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3212	if (SplatEltTy == MVT::i128)
3213	SplatTy = MVT::v4i64;
3214
3215	SDValue SplatVec;
3216	SDValue SrcVec = DAG.getBitcast(
3217	VT: SplatTy,
3218	V: Is256Vec ? DAG.getInsertSubvector(DL, Vec: Vector, SubVec: FillVec, Idx: `0`) : FillVec);
3219	if (Is256Vec) {
3220	SplatVec =
3221	DAG.getNode(Opcode: (SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3222	: LoongArchISD::XVREPLVE0,
3223	DL, VT: SplatTy, Operand: SrcVec);
3224	} else {
3225	SplatVec = DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT: SplatTy, N1: SrcVec,
3226	N2: DAG.getConstant(Val: `0`, DL, VT: Subtarget.getGRLenVT()));
3227	}
3228
3229	return DAG.getBitcast(VT: ResTy, V: SplatVec);
3230	}
3231
3232	// Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3233	// using memory operations is much lower.
3234	//
3235	// For 256-bit vectors, normally split into two halves and concatenate.
3236	// Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3237	// one non-undef element, skip spliting to avoid a worse result.
3238	if (ResTy == MVT::v8i32 \|\| ResTy == MVT::v8f32 \|\| ResTy == MVT::v4i64 \|\|
3239	ResTy == MVT::v4f64) {
3240	unsigned NonUndefCount = `0`;
3241	for (unsigned i = NumElts / `2`; i < NumElts; ++i) {
3242	if (!Node->getOperand(Num: i).isUndef()) {
3243	++NonUndefCount;
3244	if (NonUndefCount > `1`)
3245	break;
3246	}
3247	}
3248	if (NonUndefCount == `1`)
3249	return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, first: `0`);
3250	}
3251
3252	EVT VecTy =
3253	Is256Vec ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext()) : ResTy;
3254	SDValue Vector =
3255	fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy: VecTy, first: `0`);
3256
3257	if (Is128Vec)
3258	return Vector;
3259
3260	SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3261	ResTy: VecTy, first: NumElts / `2`);
3262
3263	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResTy, N1: Vector, N2: VectorHi);
3264	}
3265
3266	return SDValue ();
3267	}
3268
3269	SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3270	SelectionDAG &DAG) const {
3271	SDLoc DL(Op);
3272	MVT ResVT = Op.getSimpleValueType();
3273	assert(ResVT.is256BitVector() && Op.getNumOperands() == `2`);
3274
3275	unsigned NumOperands = Op.getNumOperands();
3276	unsigned NumFreezeUndef = `0`;
3277	unsigned NumZero = `0`;
3278	unsigned NumNonZero = `0`;
3279	unsigned NonZeros = `0`;
3280	SmallSet<SDValue, `4`> Undefs;
3281	for (unsigned i = `0`; i != NumOperands; ++i) {
3282	SDValue SubVec = Op.getOperand(i);
3283	if (SubVec.isUndef())
3284	continue;
3285	if (ISD::isFreezeUndef(N: SubVec.getNode())) {
3286	// If the freeze(undef) has multiple uses then we must fold to zero.
3287	if (SubVec.hasOneUse()) {
3288	++NumFreezeUndef;
3289	} else {
3290	++NumZero;
3291	Undefs.insert(V: SubVec);
3292	}
3293	} else if (ISD::isBuildVectorAllZeros(N: SubVec.getNode()))
3294	++NumZero;
3295	else {
3296	assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3297	NonZeros \|= `1` << i;
3298	++NumNonZero;
3299	}
3300	}
3301
3302	// If we have more than 2 non-zeros, build each half separately.
3303	if (NumNonZero > `2`) {
3304	MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3305	ArrayRef<SDUse> Ops = Op ->ops();
3306	SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3307	Ops: Ops.slice(N: `0`, M: NumOperands / `2`));
3308	SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3309	Ops: Ops.slice(N: NumOperands / `2`));
3310	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResVT, N1: Lo, N2: Hi);
3311	}
3312
3313	// Otherwise, build it up through insert_subvectors.
3314	SDValue Vec = NumZero ? DAG.getConstant(Val: `0`, DL, VT: ResVT)
3315	: (NumFreezeUndef ? DAG.getFreeze(V: DAG.getUNDEF(VT: ResVT))
3316	: DAG.getUNDEF(VT: ResVT));
3317
3318	// Replace Undef operands with ZeroVector.
3319	for (SDValue U : Undefs)
3320	DAG.ReplaceAllUsesWith(From: U, To: DAG.getConstant(Val: `0`, DL, VT: U.getSimpleValueType()));
3321
3322	MVT SubVT = Op.getOperand(i: `0`).getSimpleValueType();
3323	unsigned NumSubElems = SubVT.getVectorNumElements();
3324	for (unsigned i = `0`; i != NumOperands; ++i) {
3325	if ((NonZeros & (`1` << i)) == `0`)
3326	continue;
3327
3328	Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ResVT, N1: Vec, N2: Op.getOperand(i),
3329	N3: DAG.getVectorIdxConstant(Val: i * NumSubElems, DL));
3330	}
3331
3332	return Vec;
3333	}
3334
3335	SDValue
3336	LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3337	SelectionDAG &DAG) const {
3338	MVT EltVT = Op.getSimpleValueType();
3339	SDValue Vec = Op ->getOperand(Num: `0`);
3340	EVT VecTy = Vec ->getValueType(ResNo: `0`);
3341	SDValue Idx = Op ->getOperand(Num: `1`);
3342	SDLoc DL(Op);
3343	MVT GRLenVT = Subtarget.getGRLenVT();
3344
3345	assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3346
3347	if (isa<ConstantSDNode>(Val: Idx))
3348	return Op;
3349
3350	switch (VecTy.getSimpleVT().SimpleTy) {
3351	default:
3352	llvm_unreachable("Unexpected type");
3353	case MVT::v32i8:
3354	case MVT::v16i16:
3355	case MVT::v4i64:
3356	case MVT::v4f64: {
3357	// Extract the high half subvector and place it to the low half of a new
3358	// vector. It doesn't matter what the high half of the new vector is.
3359	EVT HalfTy = VecTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
3360	SDValue VecHi =
3361	DAG.getExtractSubvector(DL, VT: HalfTy, Vec, Idx: HalfTy.getVectorNumElements());
3362	SDValue TmpVec =
3363	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecTy, N1: DAG.getUNDEF(VT: VecTy),
3364	N2: VecHi, N3: DAG.getConstant(Val: `0`, DL, VT: GRLenVT));
3365
3366	// Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3367	// of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3368	// desired element.
3369	SDValue IdxCp =
3370	Subtarget.is64Bit()
3371	? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Idx)
3372	: DAG.getBitcast(VT: MVT::f32, V: Idx);
3373	SDValue IdxVec = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v8f32, Operand: IdxCp);
3374	SDValue MaskVec =
3375	DAG.getBitcast(VT: (VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, V: IdxVec);
3376	SDValue ResVec =
3377	DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT: VecTy, N1: MaskVec, N2: TmpVec, N3: Vec);
3378
3379	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: ResVec,
3380	N2: DAG.getConstant(Val: `0`, DL, VT: GRLenVT));
3381	}
3382	case MVT::v8i32:
3383	case MVT::v8f32: {
3384	SDValue SplatIdx = DAG.getSplatBuildVector(VT: MVT::v8i32, DL, Op: Idx);
3385	SDValue SplatValue =
3386	DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT: VecTy, N1: Vec, N2: SplatIdx);
3387
3388	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: SplatValue,
3389	N2: DAG.getConstant(Val: `0`, DL, VT: GRLenVT));
3390	}
3391	}
3392	}
3393
3394	SDValue
3395	LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3396	SelectionDAG &DAG) const {
3397	MVT VT = Op.getSimpleValueType();
3398	MVT EltVT = VT.getVectorElementType();
3399	unsigned NumElts = VT.getVectorNumElements();
3400	unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3401	SDLoc DL(Op);
3402	SDValue Op0 = Op.getOperand(i: `0`);
3403	SDValue Op1 = Op.getOperand(i: `1`);
3404	SDValue Op2 = Op.getOperand(i: `2`);
3405
3406	if (isa<ConstantSDNode>(Val: Op2))
3407	return Op;
3408
3409	MVT IdxTy = MVT::getIntegerVT(BitWidth: EltSizeInBits);
3410	MVT IdxVTy = MVT::getVectorVT(VT: IdxTy, NumElements: NumElts);
3411
3412	if (!isTypeLegal(VT) \|\| !isTypeLegal(VT: IdxVTy))
3413	return SDValue ();
3414
3415	SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op: Op1);
3416	SmallVector<SDValue, `32`> RawIndices;
3417	SDValue SplatIdx;
3418	SDValue Indices;
3419
3420	if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3421	MVT PairVTy = MVT::getVectorVT(VT: MVT::i32, NumElements: NumElts * `2`);
3422	for (unsigned i = `0`; i < NumElts; ++i) {
3423	RawIndices.push_back(Elt: Op2);
3424	RawIndices.push_back(Elt: DAG.getConstant(Val: `0`, DL, VT: MVT::i32));
3425	}
3426	SplatIdx = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3427	SplatIdx = DAG.getBitcast(VT: IdxVTy, V: SplatIdx);
3428
3429	RawIndices.clear();
3430	for (unsigned i = `0`; i < NumElts; ++i) {
3431	RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: MVT::i32));
3432	RawIndices.push_back(Elt: DAG.getConstant(Val: `0`, DL, VT: MVT::i32));
3433	}
3434	Indices = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3435	Indices = DAG.getBitcast(VT: IdxVTy, V: Indices);
3436	} else {
3437	SplatIdx = DAG.getSplatBuildVector(VT: IdxVTy, DL, Op: Op2);
3438
3439	for (unsigned i = `0`; i < NumElts; ++i)
3440	RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3441	Indices = DAG.getBuildVector(VT: IdxVTy, DL, Ops: RawIndices);
3442	}
3443
3444	// insert vec, elt, idx
3445	// =>
3446	// select (splatidx == {0,1,2...}) ? splatelt : vec
3447	SDValue SelectCC =
3448	DAG.getSetCC(DL, VT: IdxVTy, LHS: SplatIdx, RHS: Indices, Cond: ISD::CondCode::SETEQ);
3449	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectCC, N2: SplatElt, N3: Op0);
3450	}
3451
3452	SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3453	SelectionDAG &DAG) const {
3454	SDLoc DL(Op);
3455	SyncScope::ID FenceSSID =
3456	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
3457
3458	// singlethread fences only synchronize with signal handlers on the same
3459	// thread and thus only need to preserve instruction order, not actually
3460	// enforce memory ordering.
3461	if (FenceSSID == SyncScope::SingleThread)
3462	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
3463	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
3464
3465	return Op;
3466	}
3467
3468	SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3469	SelectionDAG &DAG) const {
3470
3471	if (Subtarget.is64Bit() && Op.getOperand(i: `2`).getValueType() == MVT::i32) {
3472	DAG.getContext()->emitError(
3473	ErrorStr: "On LA64, only 64-bit registers can be written.");
3474	return Op.getOperand(i: `0`);
3475	}
3476
3477	if (!Subtarget.is64Bit() && Op.getOperand(i: `2`).getValueType() == MVT::i64) {
3478	DAG.getContext()->emitError(
3479	ErrorStr: "On LA32, only 32-bit registers can be written.");
3480	return Op.getOperand(i: `0`);
3481	}
3482
3483	return Op;
3484	}
3485
3486	SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3487	SelectionDAG &DAG) const {
3488	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
3489	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
3490	"be a constant integer");
3491	return SDValue ();
3492	}
3493
3494	MachineFunction &MF = DAG.getMachineFunction();
3495	MF.getFrameInfo().setFrameAddressIsTaken(true);
3496	Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3497	EVT VT = Op.getValueType();
3498	SDLoc DL(Op);
3499	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
3500	unsigned Depth = Op.getConstantOperandVal(i: `0`);
3501	int GRLenInBytes = Subtarget.getGRLen() / `8`;
3502
3503	while (Depth--) {
3504	int Offset = -(GRLenInBytes * `2`);
3505	SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
3506	N2: DAG.getSignedConstant(Val: Offset, DL, VT));
3507	FrameAddr =
3508	DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo ());
3509	}
3510	return FrameAddr;
3511	}
3512
3513	SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3514	SelectionDAG &DAG) const {
3515	// Currently only support lowering return address for current frame.
3516	if (Op.getConstantOperandVal(i: `0`) != `0`) {
3517	DAG.getContext()->emitError(
3518	ErrorStr: "return address can only be determined for the current frame");
3519	return SDValue ();
3520	}
3521
3522	MachineFunction &MF = DAG.getMachineFunction();
3523	MF.getFrameInfo().setReturnAddressIsTaken(true);
3524	MVT GRLenVT = Subtarget.getGRLenVT();
3525
3526	// Return the value of the return address register, marking it an implicit
3527	// live-in.
3528	Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
3529	RC: getRegClassFor(VT: GRLenVT));
3530	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc (Op), Reg, VT: GRLenVT);
3531	}
3532
3533	SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3534	SelectionDAG &DAG) const {
3535	MachineFunction &MF = DAG.getMachineFunction();
3536	auto Size = Subtarget.getGRLen() / `8`;
3537	auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: `0`, IsImmutable: false);
3538	return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
3539	}
3540
3541	SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3542	SelectionDAG &DAG) const {
3543	MachineFunction &MF = DAG.getMachineFunction();
3544	auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3545
3546	SDLoc DL(Op);
3547	SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
3548	VT: getPointerTy(DL: MF.getDataLayout()));
3549
3550	// vastart just stores the address of the VarArgsFrameIndex slot into the
3551	// memory location argument.
3552	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
3553	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: FI, Ptr: Op.getOperand(i: `1`),
3554	PtrInfo: MachinePointerInfo (SV));
3555	}
3556
3557	SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3558	SelectionDAG &DAG) const {
3559	assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3560	!Subtarget.hasBasicD() && "unexpected target features");
3561
3562	SDLoc DL(Op);
3563	SDValue Op0 = Op.getOperand(i: `0`);
3564	if (Op0 ->getOpcode() == ISD::AND) {
3565	auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: `1`));
3566	if (C && C->getZExtValue() < UINT64_C(`0xFFFFFFFF`))
3567	return Op;
3568	}
3569
3570	if (Op0 ->getOpcode() == LoongArchISD::BSTRPICK &&
3571	Op0.getConstantOperandVal(i: `1`) < UINT64_C(`0X1F`) &&
3572	Op0.getConstantOperandVal(i: `2`) == UINT64_C(`0`))
3573	return Op;
3574
3575	if (Op0.getOpcode() == ISD::AssertZext &&
3576	dyn_cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT().bitsLT(VT: MVT::i32))
3577	return Op;
3578
3579	EVT OpVT = Op0.getValueType();
3580	EVT RetVT = Op.getValueType();
3581	RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3582	MakeLibCallOptions CallOptions;
3583	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
3584	SDValue Chain = SDValue ();
3585	SDValue Result;
3586	std::tie(args&: Result, args&: Chain) =
3587	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
3588	return Result;
3589	}
3590
3591	SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3592	SelectionDAG &DAG) const {
3593	assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3594	!Subtarget.hasBasicD() && "unexpected target features");
3595
3596	SDLoc DL(Op);
3597	SDValue Op0 = Op.getOperand(i: `0`);
3598
3599	if ((Op0.getOpcode() == ISD::AssertSext \|\|
3600	Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
3601	dyn_cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT().bitsLE(VT: MVT::i32))
3602	return Op;
3603
3604	EVT OpVT = Op0.getValueType();
3605	EVT RetVT = Op.getValueType();
3606	RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3607	MakeLibCallOptions CallOptions;
3608	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
3609	SDValue Chain = SDValue ();
3610	SDValue Result;
3611	std::tie(args&: Result, args&: Chain) =
3612	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
3613	return Result;
3614	}
3615
3616	SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3617	SelectionDAG &DAG) const {
3618
3619	SDLoc DL(Op);
3620	EVT VT = Op.getValueType();
3621	SDValue Op0 = Op.getOperand(i: `0`);
3622	EVT Op0VT = Op0.getValueType();
3623
3624	if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3625	Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3626	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
3627	return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
3628	}
3629	if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3630	SDValue Lo, Hi;
3631	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
3632	return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
3633	}
3634	return Op;
3635	}
3636
3637	SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3638	SelectionDAG &DAG) const {
3639
3640	SDLoc DL(Op);
3641	SDValue Op0 = Op.getOperand(i: `0`);
3642
3643	if (Op0.getValueType() == MVT::f16)
3644	Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
3645
3646	if (Op.getValueSizeInBits() > `32` && Subtarget.hasBasicF() &&
3647	!Subtarget.hasBasicD()) {
3648	SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0);
3649	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
3650	}
3651
3652	EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
3653	SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0);
3654	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
3655	}
3656
3657	static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3658	SelectionDAG &DAG, unsigned Flags) {
3659	return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: Flags);
3660	}
3661
3662	static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3663	SelectionDAG &DAG, unsigned Flags) {
3664	return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
3665	TargetFlags: Flags);
3666	}
3667
3668	static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3669	SelectionDAG &DAG, unsigned Flags) {
3670	return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
3671	Offset: N->getOffset(), TargetFlags: Flags);
3672	}
3673
3674	static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3675	SelectionDAG &DAG, unsigned Flags) {
3676	return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
3677	}
3678
3679	template <class NodeTy>
3680	SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3681	CodeModel::Model M,
3682	bool IsLocal) const {
3683	SDLoc DL(N);
3684	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3685	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
3686	SDValue Load;
3687
3688	switch (M) {
3689	default:
3690	report_fatal_error(reason: "Unsupported code model");
3691
3692	case CodeModel::Large: {
3693	assert(Subtarget.is64Bit() && "Large code model requires LA64");
3694
3695	// This is not actually used, but is necessary for successfully matching
3696	// the PseudoLA__LARGE nodes.*
3697	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
3698	if (IsLocal) {
3699	// This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3700	// eventually becomes the desired 5-insn code sequence.
3701	Load = SDValue (DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
3702	Op1: Tmp, Op2: Addr),
3703	`0`);
3704	} else {
3705	// This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3706	// eventually becomes the desired 5-insn code sequence.
3707	Load = SDValue (
3708	DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
3709	`0`);
3710	}
3711	break;
3712	}
3713
3714	case CodeModel::Small:
3715	case CodeModel::Medium:
3716	if (IsLocal) {
3717	// This generates the pattern (PseudoLA_PCREL sym), which
3718	//
3719	// for la32r expands to:
3720	// (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3721	//
3722	// for la32s and la64 expands to:
3723	// (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3724	Load = SDValue (
3725	DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), `0`);
3726	} else {
3727	// This generates the pattern (PseudoLA_GOT sym), which
3728	//
3729	// for la32r expands to:
3730	// (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3731	//
3732	// for la32s and la64 expands to:
3733	// (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3734	Load =
3735	SDValue (DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), `0`);
3736	}
3737	}
3738
3739	if (!IsLocal) {
3740	// Mark the load instruction as invariant to enable hoisting in MachineLICM.
3741	MachineFunction &MF = DAG.getMachineFunction();
3742	MachineMemOperand *MemOp = MF.getMachineMemOperand(
3743	PtrInfo: MachinePointerInfo::getGOT(MF),
3744	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
3745	MachineMemOperand::MOInvariant,
3746	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
3747	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
3748	}
3749
3750	return Load;
3751	}
3752
3753	SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3754	SelectionDAG &DAG) const {
3755	return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
3756	M: DAG.getTarget().getCodeModel());
3757	}
3758
3759	SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3760	SelectionDAG &DAG) const {
3761	return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
3762	M: DAG.getTarget().getCodeModel());
3763	}
3764
3765	SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3766	SelectionDAG &DAG) const {
3767	return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
3768	M: DAG.getTarget().getCodeModel());
3769	}
3770
3771	SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3772	SelectionDAG &DAG) const {
3773	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
3774	assert(N->getOffset() == `0` && "unexpected offset in global node");
3775	auto CM = DAG.getTarget().getCodeModel();
3776	const GlobalValue *GV = N->getGlobal();
3777
3778	if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
3779	if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
3780	CM = *GCM;
3781	}
3782
3783	return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
3784	}
3785
3786	SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3787	SelectionDAG &DAG,
3788	unsigned Opc, bool UseGOT,
3789	bool Large) const {
3790	SDLoc DL(N);
3791	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3792	MVT GRLenVT = Subtarget.getGRLenVT();
3793
3794	// This is not actually used, but is necessary for successfully matching the
3795	// PseudoLA__LARGE nodes.*
3796	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
3797	SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: `0`);
3798
3799	// Only IE needs an extra argument for large code model.
3800	SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3801	? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
3802	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
3803
3804	// If it is LE for normal/medium code model, the add tp operation will occur
3805	// during the pseudo-instruction expansion.
3806	if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3807	return Offset;
3808
3809	if (UseGOT) {
3810	// Mark the load instruction as invariant to enable hoisting in MachineLICM.
3811	MachineFunction &MF = DAG.getMachineFunction();
3812	MachineMemOperand *MemOp = MF.getMachineMemOperand(
3813	PtrInfo: MachinePointerInfo::getGOT(MF),
3814	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
3815	MachineMemOperand::MOInvariant,
3816	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
3817	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
3818	}
3819
3820	// Add the thread pointer.
3821	return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
3822	N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
3823	}
3824
3825	SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3826	SelectionDAG &DAG,
3827	unsigned Opc,
3828	bool Large) const {
3829	SDLoc DL(N);
3830	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3831	IntegerType CallTy = Type::getIntNTy(C&: DAG.getContext(), N: Ty.getSizeInBits());
3832
3833	// This is not actually used, but is necessary for successfully matching the
3834	// PseudoLA__LARGE nodes.*
3835	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
3836
3837	// Use a PC-relative addressing mode to access the dynamic GOT address.
3838	SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: `0`);
3839	SDValue Load = Large ? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
3840	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
3841
3842	// Prepare argument list to generate call.
3843	ArgListTy Args;
3844	Args.emplace_back(args&: Load, args&: CallTy);
3845
3846	// Setup call to __tls_get_addr.
3847	TargetLowering::CallLoweringInfo CLI(DAG);
3848	CLI.setDebugLoc(DL)
3849	.setChain(DAG.getEntryNode())
3850	.setLibCallee(CC: CallingConv::C, ResultType: CallTy,
3851	Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
3852	ArgsList: std::move(Args));
3853
3854	return LowerCallTo(CLI).first;
3855	}
3856
3857	SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3858	SelectionDAG &DAG, unsigned Opc,
3859	bool Large) const {
3860	SDLoc DL(N);
3861	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3862	const GlobalValue *GV = N->getGlobal();
3863
3864	// This is not actually used, but is necessary for successfully matching the
3865	// PseudoLA__LARGE nodes.*
3866	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
3867
3868	// Use a PC-relative addressing mode to access the global dynamic GOT address.
3869	// This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3870	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
3871	return Large ? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
3872	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
3873	}
3874
3875	SDValue
3876	LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3877	SelectionDAG &DAG) const {
3878	if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3879	CallingConv::GHC)
3880	report_fatal_error(reason: "In GHC calling convention TLS is not supported");
3881
3882	bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3883	assert((!Large \|\| Subtarget.is64Bit()) && "Large code model requires LA64");
3884
3885	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
3886	assert(N->getOffset() == `0` && "unexpected offset in global node");
3887
3888	if (DAG.getTarget().useEmulatedTLS())
3889	reportFatalUsageError(reason: "the emulated TLS is prohibited");
3890
3891	bool IsDesc = DAG.getTarget().useTLSDESC();
3892
3893	switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
3894	case TLSModel::GeneralDynamic:
3895	// In this model, application code calls the dynamic linker function
3896	// __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3897	// runtime.
3898	if (!IsDesc)
3899	return getDynamicTLSAddr(N, DAG,
3900	Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3901	: LoongArch::PseudoLA_TLS_GD,
3902	Large);
3903	break;
3904	case TLSModel::LocalDynamic:
3905	// Same as GeneralDynamic, except for assembly modifiers and relocation
3906	// records.
3907	if (!IsDesc)
3908	return getDynamicTLSAddr(N, DAG,
3909	Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3910	: LoongArch::PseudoLA_TLS_LD,
3911	Large);
3912	break;
3913	case TLSModel::InitialExec:
3914	// This model uses the GOT to resolve TLS offsets.
3915	return getStaticTLSAddr(N, DAG,
3916	Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3917	: LoongArch::PseudoLA_TLS_IE,
3918	/UseGOT=/true, Large);
3919	case TLSModel::LocalExec:
3920	// This model is used when static linking as the TLS offsets are resolved
3921	// during program linking.
3922	//
3923	// This node doesn't need an extra argument for the large code model.
3924	return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
3925	/UseGOT=/false, Large);
3926	}
3927
3928	return getTLSDescAddr(N, DAG,
3929	Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3930	: LoongArch::PseudoLA_TLS_DESC,
3931	Large);
3932	}
3933
3934	template <unsigned N>
3935	static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3936	SelectionDAG &DAG, bool IsSigned = false) {
3937	auto *CImm = cast<ConstantSDNode>(Val: Op ->getOperand(Num: ImmOp));
3938	// Check the ImmArg.
3939	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
3940	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3941	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) +
3942	": argument out of range.");
3943	return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc (Op), VT: Op.getValueType());
3944	}
3945	return SDValue ();
3946	}
3947
3948	SDValue
3949	LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3950	SelectionDAG &DAG) const {
3951	switch (Op.getConstantOperandVal(i: `0`)) {
3952	default:
3953	return SDValue (); // Don't custom lower most intrinsics.
3954	case Intrinsic::thread_pointer: {
3955	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3956	return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
3957	}
3958	case Intrinsic::loongarch_lsx_vpickve2gr_d:
3959	case Intrinsic::loongarch_lsx_vpickve2gr_du:
3960	case Intrinsic::loongarch_lsx_vreplvei_d:
3961	case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3962	return checkIntrinsicImmArg<`1`>(Op, ImmOp: `2`, DAG);
3963	case Intrinsic::loongarch_lsx_vreplvei_w:
3964	case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3965	case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3966	case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3967	case Intrinsic::loongarch_lasx_xvpickve_d:
3968	case Intrinsic::loongarch_lasx_xvpickve_d_f:
3969	return checkIntrinsicImmArg<`2`>(Op, ImmOp: `2`, DAG);
3970	case Intrinsic::loongarch_lasx_xvinsve0_d:
3971	return checkIntrinsicImmArg<`2`>(Op, ImmOp: `3`, DAG);
3972	case Intrinsic::loongarch_lsx_vsat_b:
3973	case Intrinsic::loongarch_lsx_vsat_bu:
3974	case Intrinsic::loongarch_lsx_vrotri_b:
3975	case Intrinsic::loongarch_lsx_vsllwil_h_b:
3976	case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3977	case Intrinsic::loongarch_lsx_vsrlri_b:
3978	case Intrinsic::loongarch_lsx_vsrari_b:
3979	case Intrinsic::loongarch_lsx_vreplvei_h:
3980	case Intrinsic::loongarch_lasx_xvsat_b:
3981	case Intrinsic::loongarch_lasx_xvsat_bu:
3982	case Intrinsic::loongarch_lasx_xvrotri_b:
3983	case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3984	case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3985	case Intrinsic::loongarch_lasx_xvsrlri_b:
3986	case Intrinsic::loongarch_lasx_xvsrari_b:
3987	case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3988	case Intrinsic::loongarch_lasx_xvpickve_w:
3989	case Intrinsic::loongarch_lasx_xvpickve_w_f:
3990	return checkIntrinsicImmArg<`3`>(Op, ImmOp: `2`, DAG);
3991	case Intrinsic::loongarch_lasx_xvinsve0_w:
3992	return checkIntrinsicImmArg<`3`>(Op, ImmOp: `3`, DAG);
3993	case Intrinsic::loongarch_lsx_vsat_h:
3994	case Intrinsic::loongarch_lsx_vsat_hu:
3995	case Intrinsic::loongarch_lsx_vrotri_h:
3996	case Intrinsic::loongarch_lsx_vsllwil_w_h:
3997	case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3998	case Intrinsic::loongarch_lsx_vsrlri_h:
3999	case Intrinsic::loongarch_lsx_vsrari_h:
4000	case Intrinsic::loongarch_lsx_vreplvei_b:
4001	case Intrinsic::loongarch_lasx_xvsat_h:
4002	case Intrinsic::loongarch_lasx_xvsat_hu:
4003	case Intrinsic::loongarch_lasx_xvrotri_h:
4004	case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4005	case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4006	case Intrinsic::loongarch_lasx_xvsrlri_h:
4007	case Intrinsic::loongarch_lasx_xvsrari_h:
4008	case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4009	return checkIntrinsicImmArg<`4`>(Op, ImmOp: `2`, DAG);
4010	case Intrinsic::loongarch_lsx_vsrlni_b_h:
4011	case Intrinsic::loongarch_lsx_vsrani_b_h:
4012	case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4013	case Intrinsic::loongarch_lsx_vsrarni_b_h:
4014	case Intrinsic::loongarch_lsx_vssrlni_b_h:
4015	case Intrinsic::loongarch_lsx_vssrani_b_h:
4016	case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4017	case Intrinsic::loongarch_lsx_vssrani_bu_h:
4018	case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4019	case Intrinsic::loongarch_lsx_vssrarni_b_h:
4020	case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4021	case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4022	case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4023	case Intrinsic::loongarch_lasx_xvsrani_b_h:
4024	case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4025	case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4026	case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4027	case Intrinsic::loongarch_lasx_xvssrani_b_h:
4028	case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4029	case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4030	case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4031	case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4032	case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4033	case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4034	return checkIntrinsicImmArg<`4`>(Op, ImmOp: `3`, DAG);
4035	case Intrinsic::loongarch_lsx_vsat_w:
4036	case Intrinsic::loongarch_lsx_vsat_wu:
4037	case Intrinsic::loongarch_lsx_vrotri_w:
4038	case Intrinsic::loongarch_lsx_vsllwil_d_w:
4039	case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4040	case Intrinsic::loongarch_lsx_vsrlri_w:
4041	case Intrinsic::loongarch_lsx_vsrari_w:
4042	case Intrinsic::loongarch_lsx_vslei_bu:
4043	case Intrinsic::loongarch_lsx_vslei_hu:
4044	case Intrinsic::loongarch_lsx_vslei_wu:
4045	case Intrinsic::loongarch_lsx_vslei_du:
4046	case Intrinsic::loongarch_lsx_vslti_bu:
4047	case Intrinsic::loongarch_lsx_vslti_hu:
4048	case Intrinsic::loongarch_lsx_vslti_wu:
4049	case Intrinsic::loongarch_lsx_vslti_du:
4050	case Intrinsic::loongarch_lsx_vbsll_v:
4051	case Intrinsic::loongarch_lsx_vbsrl_v:
4052	case Intrinsic::loongarch_lasx_xvsat_w:
4053	case Intrinsic::loongarch_lasx_xvsat_wu:
4054	case Intrinsic::loongarch_lasx_xvrotri_w:
4055	case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4056	case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4057	case Intrinsic::loongarch_lasx_xvsrlri_w:
4058	case Intrinsic::loongarch_lasx_xvsrari_w:
4059	case Intrinsic::loongarch_lasx_xvslei_bu:
4060	case Intrinsic::loongarch_lasx_xvslei_hu:
4061	case Intrinsic::loongarch_lasx_xvslei_wu:
4062	case Intrinsic::loongarch_lasx_xvslei_du:
4063	case Intrinsic::loongarch_lasx_xvslti_bu:
4064	case Intrinsic::loongarch_lasx_xvslti_hu:
4065	case Intrinsic::loongarch_lasx_xvslti_wu:
4066	case Intrinsic::loongarch_lasx_xvslti_du:
4067	case Intrinsic::loongarch_lasx_xvbsll_v:
4068	case Intrinsic::loongarch_lasx_xvbsrl_v:
4069	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `2`, DAG);
4070	case Intrinsic::loongarch_lsx_vseqi_b:
4071	case Intrinsic::loongarch_lsx_vseqi_h:
4072	case Intrinsic::loongarch_lsx_vseqi_w:
4073	case Intrinsic::loongarch_lsx_vseqi_d:
4074	case Intrinsic::loongarch_lsx_vslei_b:
4075	case Intrinsic::loongarch_lsx_vslei_h:
4076	case Intrinsic::loongarch_lsx_vslei_w:
4077	case Intrinsic::loongarch_lsx_vslei_d:
4078	case Intrinsic::loongarch_lsx_vslti_b:
4079	case Intrinsic::loongarch_lsx_vslti_h:
4080	case Intrinsic::loongarch_lsx_vslti_w:
4081	case Intrinsic::loongarch_lsx_vslti_d:
4082	case Intrinsic::loongarch_lasx_xvseqi_b:
4083	case Intrinsic::loongarch_lasx_xvseqi_h:
4084	case Intrinsic::loongarch_lasx_xvseqi_w:
4085	case Intrinsic::loongarch_lasx_xvseqi_d:
4086	case Intrinsic::loongarch_lasx_xvslei_b:
4087	case Intrinsic::loongarch_lasx_xvslei_h:
4088	case Intrinsic::loongarch_lasx_xvslei_w:
4089	case Intrinsic::loongarch_lasx_xvslei_d:
4090	case Intrinsic::loongarch_lasx_xvslti_b:
4091	case Intrinsic::loongarch_lasx_xvslti_h:
4092	case Intrinsic::loongarch_lasx_xvslti_w:
4093	case Intrinsic::loongarch_lasx_xvslti_d:
4094	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `2`, DAG, /IsSigned=/true);
4095	case Intrinsic::loongarch_lsx_vsrlni_h_w:
4096	case Intrinsic::loongarch_lsx_vsrani_h_w:
4097	case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4098	case Intrinsic::loongarch_lsx_vsrarni_h_w:
4099	case Intrinsic::loongarch_lsx_vssrlni_h_w:
4100	case Intrinsic::loongarch_lsx_vssrani_h_w:
4101	case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4102	case Intrinsic::loongarch_lsx_vssrani_hu_w:
4103	case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4104	case Intrinsic::loongarch_lsx_vssrarni_h_w:
4105	case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4106	case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4107	case Intrinsic::loongarch_lsx_vfrstpi_b:
4108	case Intrinsic::loongarch_lsx_vfrstpi_h:
4109	case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4110	case Intrinsic::loongarch_lasx_xvsrani_h_w:
4111	case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4112	case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4113	case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4114	case Intrinsic::loongarch_lasx_xvssrani_h_w:
4115	case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4116	case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4117	case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4118	case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4119	case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4120	case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4121	case Intrinsic::loongarch_lasx_xvfrstpi_b:
4122	case Intrinsic::loongarch_lasx_xvfrstpi_h:
4123	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `3`, DAG);
4124	case Intrinsic::loongarch_lsx_vsat_d:
4125	case Intrinsic::loongarch_lsx_vsat_du:
4126	case Intrinsic::loongarch_lsx_vrotri_d:
4127	case Intrinsic::loongarch_lsx_vsrlri_d:
4128	case Intrinsic::loongarch_lsx_vsrari_d:
4129	case Intrinsic::loongarch_lasx_xvsat_d:
4130	case Intrinsic::loongarch_lasx_xvsat_du:
4131	case Intrinsic::loongarch_lasx_xvrotri_d:
4132	case Intrinsic::loongarch_lasx_xvsrlri_d:
4133	case Intrinsic::loongarch_lasx_xvsrari_d:
4134	return checkIntrinsicImmArg<`6`>(Op, ImmOp: `2`, DAG);
4135	case Intrinsic::loongarch_lsx_vsrlni_w_d:
4136	case Intrinsic::loongarch_lsx_vsrani_w_d:
4137	case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4138	case Intrinsic::loongarch_lsx_vsrarni_w_d:
4139	case Intrinsic::loongarch_lsx_vssrlni_w_d:
4140	case Intrinsic::loongarch_lsx_vssrani_w_d:
4141	case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4142	case Intrinsic::loongarch_lsx_vssrani_wu_d:
4143	case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4144	case Intrinsic::loongarch_lsx_vssrarni_w_d:
4145	case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4146	case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4147	case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4148	case Intrinsic::loongarch_lasx_xvsrani_w_d:
4149	case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4150	case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4151	case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4152	case Intrinsic::loongarch_lasx_xvssrani_w_d:
4153	case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4154	case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4155	case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4156	case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4157	case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4158	case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4159	return checkIntrinsicImmArg<`6`>(Op, ImmOp: `3`, DAG);
4160	case Intrinsic::loongarch_lsx_vsrlni_d_q:
4161	case Intrinsic::loongarch_lsx_vsrani_d_q:
4162	case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4163	case Intrinsic::loongarch_lsx_vsrarni_d_q:
4164	case Intrinsic::loongarch_lsx_vssrlni_d_q:
4165	case Intrinsic::loongarch_lsx_vssrani_d_q:
4166	case Intrinsic::loongarch_lsx_vssrlni_du_q:
4167	case Intrinsic::loongarch_lsx_vssrani_du_q:
4168	case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4169	case Intrinsic::loongarch_lsx_vssrarni_d_q:
4170	case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4171	case Intrinsic::loongarch_lsx_vssrarni_du_q:
4172	case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4173	case Intrinsic::loongarch_lasx_xvsrani_d_q:
4174	case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4175	case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4176	case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4177	case Intrinsic::loongarch_lasx_xvssrani_d_q:
4178	case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4179	case Intrinsic::loongarch_lasx_xvssrani_du_q:
4180	case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4181	case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4182	case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4183	case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4184	return checkIntrinsicImmArg<`7`>(Op, ImmOp: `3`, DAG);
4185	case Intrinsic::loongarch_lsx_vnori_b:
4186	case Intrinsic::loongarch_lsx_vshuf4i_b:
4187	case Intrinsic::loongarch_lsx_vshuf4i_h:
4188	case Intrinsic::loongarch_lsx_vshuf4i_w:
4189	case Intrinsic::loongarch_lasx_xvnori_b:
4190	case Intrinsic::loongarch_lasx_xvshuf4i_b:
4191	case Intrinsic::loongarch_lasx_xvshuf4i_h:
4192	case Intrinsic::loongarch_lasx_xvshuf4i_w:
4193	case Intrinsic::loongarch_lasx_xvpermi_d:
4194	return checkIntrinsicImmArg<`8`>(Op, ImmOp: `2`, DAG);
4195	case Intrinsic::loongarch_lsx_vshuf4i_d:
4196	case Intrinsic::loongarch_lsx_vpermi_w:
4197	case Intrinsic::loongarch_lsx_vbitseli_b:
4198	case Intrinsic::loongarch_lsx_vextrins_b:
4199	case Intrinsic::loongarch_lsx_vextrins_h:
4200	case Intrinsic::loongarch_lsx_vextrins_w:
4201	case Intrinsic::loongarch_lsx_vextrins_d:
4202	case Intrinsic::loongarch_lasx_xvshuf4i_d:
4203	case Intrinsic::loongarch_lasx_xvpermi_w:
4204	case Intrinsic::loongarch_lasx_xvpermi_q:
4205	case Intrinsic::loongarch_lasx_xvbitseli_b:
4206	case Intrinsic::loongarch_lasx_xvextrins_b:
4207	case Intrinsic::loongarch_lasx_xvextrins_h:
4208	case Intrinsic::loongarch_lasx_xvextrins_w:
4209	case Intrinsic::loongarch_lasx_xvextrins_d:
4210	return checkIntrinsicImmArg<`8`>(Op, ImmOp: `3`, DAG);
4211	case Intrinsic::loongarch_lsx_vrepli_b:
4212	case Intrinsic::loongarch_lsx_vrepli_h:
4213	case Intrinsic::loongarch_lsx_vrepli_w:
4214	case Intrinsic::loongarch_lsx_vrepli_d:
4215	case Intrinsic::loongarch_lasx_xvrepli_b:
4216	case Intrinsic::loongarch_lasx_xvrepli_h:
4217	case Intrinsic::loongarch_lasx_xvrepli_w:
4218	case Intrinsic::loongarch_lasx_xvrepli_d:
4219	return checkIntrinsicImmArg<`10`>(Op, ImmOp: `1`, DAG, /IsSigned=/true);
4220	case Intrinsic::loongarch_lsx_vldi:
4221	case Intrinsic::loongarch_lasx_xvldi:
4222	return checkIntrinsicImmArg<`13`>(Op, ImmOp: `1`, DAG, /IsSigned=/true);
4223	}
4224	}
4225
4226	// Helper function that emits error message for intrinsics with chain and return
4227	// merge values of a UNDEF and the chain.
4228	static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
4229	StringRef ErrorMsg,
4230	SelectionDAG &DAG) {
4231	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
4232	return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: `0`)},
4233	dl: SDLoc (Op));
4234	}
4235
4236	SDValue
4237	LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4238	SelectionDAG &DAG) const {
4239	SDLoc DL(Op);
4240	MVT GRLenVT = Subtarget.getGRLenVT();
4241	EVT VT = Op.getValueType();
4242	SDValue Chain = Op.getOperand(i: `0`);
4243	const StringRef ErrorMsgOOR = "argument out of range";
4244	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4245	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4246
4247	switch (Op.getConstantOperandVal(i: `1`)) {
4248	default:
4249	return Op;
4250	case Intrinsic::loongarch_crc_w_b_w:
4251	case Intrinsic::loongarch_crc_w_h_w:
4252	case Intrinsic::loongarch_crc_w_w_w:
4253	case Intrinsic::loongarch_crc_w_d_w:
4254	case Intrinsic::loongarch_crcc_w_b_w:
4255	case Intrinsic::loongarch_crcc_w_h_w:
4256	case Intrinsic::loongarch_crcc_w_w_w:
4257	case Intrinsic::loongarch_crcc_w_d_w:
4258	return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4259	case Intrinsic::loongarch_csrrd_w:
4260	case Intrinsic::loongarch_csrrd_d: {
4261	unsigned Imm = Op.getConstantOperandVal(i: `2`);
4262	return !isUInt<`14`>(x: Imm)
4263	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4264	: DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
4265	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4266	}
4267	case Intrinsic::loongarch_csrwr_w:
4268	case Intrinsic::loongarch_csrwr_d: {
4269	unsigned Imm = Op.getConstantOperandVal(i: `3`);
4270	return !isUInt<`14`>(x: Imm)
4271	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4272	: DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
4273	Ops: {Chain, Op.getOperand(i: `2`),
4274	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4275	}
4276	case Intrinsic::loongarch_csrxchg_w:
4277	case Intrinsic::loongarch_csrxchg_d: {
4278	unsigned Imm = Op.getConstantOperandVal(i: `4`);
4279	return !isUInt<`14`>(x: Imm)
4280	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4281	: DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
4282	Ops: {Chain, Op.getOperand(i: `2`), Op.getOperand(i: `3`),
4283	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4284	}
4285	case Intrinsic::loongarch_iocsrrd_d: {
4286	return DAG.getNode(
4287	Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
4288	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: `2`))});
4289	}
4290	#define IOCSRRD_CASE(NAME, NODE) \
4291	case Intrinsic::loongarch_##NAME: { \
4292	return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4293	{Chain, Op.getOperand(2)}); \
4294	}
4295	IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4296	IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4297	IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4298	#undef IOCSRRD_CASE
4299	case Intrinsic::loongarch_cpucfg: {
4300	return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
4301	Ops: {Chain, Op.getOperand(i: `2`)});
4302	}
4303	case Intrinsic::loongarch_lddir_d: {
4304	unsigned Imm = Op.getConstantOperandVal(i: `3`);
4305	return !isUInt<`8`>(x: Imm)
4306	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4307	: Op;
4308	}
4309	case Intrinsic::loongarch_movfcsr2gr: {
4310	if (!Subtarget.hasBasicF())
4311	return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4312	unsigned Imm = Op.getConstantOperandVal(i: `2`);
4313	return !isUInt<`2`>(x: Imm)
4314	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4315	: DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
4316	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4317	}
4318	case Intrinsic::loongarch_lsx_vld:
4319	case Intrinsic::loongarch_lsx_vldrepl_b:
4320	case Intrinsic::loongarch_lasx_xvld:
4321	case Intrinsic::loongarch_lasx_xvldrepl_b:
4322	return !isInt<`12`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
4323	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4324	: SDValue ();
4325	case Intrinsic::loongarch_lsx_vldrepl_h:
4326	case Intrinsic::loongarch_lasx_xvldrepl_h:
4327	return !isShiftedInt<`11`, `1`>(
4328	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
4329	? emitIntrinsicWithChainErrorMessage(
4330	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4331	: SDValue ();
4332	case Intrinsic::loongarch_lsx_vldrepl_w:
4333	case Intrinsic::loongarch_lasx_xvldrepl_w:
4334	return !isShiftedInt<`10`, `2`>(
4335	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
4336	? emitIntrinsicWithChainErrorMessage(
4337	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4338	: SDValue ();
4339	case Intrinsic::loongarch_lsx_vldrepl_d:
4340	case Intrinsic::loongarch_lasx_xvldrepl_d:
4341	return !isShiftedInt<`9`, `3`>(
4342	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
4343	? emitIntrinsicWithChainErrorMessage(
4344	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4345	: SDValue ();
4346	}
4347	}
4348
4349	// Helper function that emits error message for intrinsics with void return
4350	// value and return the chain.
4351	static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
4352	SelectionDAG &DAG) {
4353
4354	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
4355	return Op.getOperand(i: `0`);
4356	}
4357
4358	SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4359	SelectionDAG &DAG) const {
4360	SDLoc DL(Op);
4361	MVT GRLenVT = Subtarget.getGRLenVT();
4362	SDValue Chain = Op.getOperand(i: `0`);
4363	uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: `1`);
4364	SDValue Op2 = Op.getOperand(i: `2`);
4365	const StringRef ErrorMsgOOR = "argument out of range";
4366	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4367	const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4368	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4369
4370	switch (IntrinsicEnum) {
4371	default:
4372	// TODO: Add more Intrinsics.
4373	return SDValue ();
4374	case Intrinsic::loongarch_cacop_d:
4375	case Intrinsic::loongarch_cacop_w: {
4376	if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4377	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4378	if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4379	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
4380	// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4381	unsigned Imm1 = Op2 ->getAsZExtVal();
4382	int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue();
4383	if (!isUInt<`5`>(x: Imm1) \|\| !isInt<`12`>(x: Imm2))
4384	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
4385	return Op;
4386	}
4387	case Intrinsic::loongarch_dbar: {
4388	unsigned Imm = Op2 ->getAsZExtVal();
4389	return !isUInt<`15`>(x: Imm)
4390	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4391	: DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
4392	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4393	}
4394	case Intrinsic::loongarch_ibar: {
4395	unsigned Imm = Op2 ->getAsZExtVal();
4396	return !isUInt<`15`>(x: Imm)
4397	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4398	: DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
4399	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4400	}
4401	case Intrinsic::loongarch_break: {
4402	unsigned Imm = Op2 ->getAsZExtVal();
4403	return !isUInt<`15`>(x: Imm)
4404	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4405	: DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
4406	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4407	}
4408	case Intrinsic::loongarch_movgr2fcsr: {
4409	if (!Subtarget.hasBasicF())
4410	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4411	unsigned Imm = Op2 ->getAsZExtVal();
4412	return !isUInt<`2`>(x: Imm)
4413	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4414	: DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
4415	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
4416	N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
4417	Operand: Op.getOperand(i: `3`)));
4418	}
4419	case Intrinsic::loongarch_syscall: {
4420	unsigned Imm = Op2 ->getAsZExtVal();
4421	return !isUInt<`15`>(x: Imm)
4422	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4423	: DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
4424	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4425	}
4426	#define IOCSRWR_CASE(NAME, NODE) \
4427	case Intrinsic::loongarch_##NAME: { \
4428	SDValue Op3 = Op.getOperand(3); \
4429	return Subtarget.is64Bit() \
4430	? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4431	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4432	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4433	: DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4434	Op3); \
4435	}
4436	IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4437	IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4438	IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4439	#undef IOCSRWR_CASE
4440	case Intrinsic::loongarch_iocsrwr_d: {
4441	return !Subtarget.is64Bit()
4442	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
4443	: DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
4444	N2: Op2,
4445	N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
4446	Operand: Op.getOperand(i: `3`)));
4447	}
4448	#define ASRT_LE_GT_CASE(NAME) \
4449	case Intrinsic::loongarch_##NAME: { \
4450	return !Subtarget.is64Bit() \
4451	? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4452	: Op; \
4453	}
4454	ASRT_LE_GT_CASE(asrtle_d)
4455	ASRT_LE_GT_CASE(asrtgt_d)
4456	#undef ASRT_LE_GT_CASE
4457	case Intrinsic::loongarch_ldpte_d: {
4458	unsigned Imm = Op.getConstantOperandVal(i: `3`);
4459	return !Subtarget.is64Bit()
4460	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
4461	: !isUInt<`8`>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4462	: Op;
4463	}
4464	case Intrinsic::loongarch_lsx_vst:
4465	case Intrinsic::loongarch_lasx_xvst:
4466	return !isInt<`12`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue())
4467	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4468	: SDValue ();
4469	case Intrinsic::loongarch_lasx_xvstelm_b:
4470	return (!isInt<`8`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4471	!isUInt<`5`>(x: Op.getConstantOperandVal(i: `5`)))
4472	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4473	: SDValue ();
4474	case Intrinsic::loongarch_lsx_vstelm_b:
4475	return (!isInt<`8`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4476	!isUInt<`4`>(x: Op.getConstantOperandVal(i: `5`)))
4477	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4478	: SDValue ();
4479	case Intrinsic::loongarch_lasx_xvstelm_h:
4480	return (!isShiftedInt<`8`, `1`>(
4481	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4482	!isUInt<`4`>(x: Op.getConstantOperandVal(i: `5`)))
4483	? emitIntrinsicErrorMessage(
4484	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4485	: SDValue ();
4486	case Intrinsic::loongarch_lsx_vstelm_h:
4487	return (!isShiftedInt<`8`, `1`>(
4488	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4489	!isUInt<`3`>(x: Op.getConstantOperandVal(i: `5`)))
4490	? emitIntrinsicErrorMessage(
4491	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4492	: SDValue ();
4493	case Intrinsic::loongarch_lasx_xvstelm_w:
4494	return (!isShiftedInt<`8`, `2`>(
4495	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4496	!isUInt<`3`>(x: Op.getConstantOperandVal(i: `5`)))
4497	? emitIntrinsicErrorMessage(
4498	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4499	: SDValue ();
4500	case Intrinsic::loongarch_lsx_vstelm_w:
4501	return (!isShiftedInt<`8`, `2`>(
4502	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4503	!isUInt<`2`>(x: Op.getConstantOperandVal(i: `5`)))
4504	? emitIntrinsicErrorMessage(
4505	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4506	: SDValue ();
4507	case Intrinsic::loongarch_lasx_xvstelm_d:
4508	return (!isShiftedInt<`8`, `3`>(
4509	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4510	!isUInt<`2`>(x: Op.getConstantOperandVal(i: `5`)))
4511	? emitIntrinsicErrorMessage(
4512	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4513	: SDValue ();
4514	case Intrinsic::loongarch_lsx_vstelm_d:
4515	return (!isShiftedInt<`8`, `3`>(
4516	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
4517	!isUInt<`1`>(x: Op.getConstantOperandVal(i: `5`)))
4518	? emitIntrinsicErrorMessage(
4519	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4520	: SDValue ();
4521	}
4522	}
4523
4524	SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4525	SelectionDAG &DAG) const {
4526	SDLoc DL(Op);
4527	SDValue Lo = Op.getOperand(i: `0`);
4528	SDValue Hi = Op.getOperand(i: `1`);
4529	SDValue Shamt = Op.getOperand(i: `2`);
4530	EVT VT = Lo.getValueType();
4531
4532	// if Shamt-GRLen < 0: // Shamt < GRLen
4533	// Lo = Lo << Shamt
4534	// Hi = (Hi << Shamt) \| ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4535	// else:
4536	// Lo = 0
4537	// Hi = Lo << (Shamt-GRLen)
4538
4539	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
4540	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
4541	SDValue MinusGRLen =
4542	DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
4543	SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT);
4544	SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
4545	SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
4546
4547	SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
4548	SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
4549	SDValue ShiftRightLo =
4550	DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
4551	SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
4552	SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
4553	SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
4554
4555	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
4556
4557	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
4558	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
4559
4560	SDValue Parts[`2`] = {Lo, Hi};
4561	return DAG.getMergeValues(Ops: Parts, dl: DL);
4562	}
4563
4564	SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4565	SelectionDAG &DAG,
4566	bool IsSRA) const {
4567	SDLoc DL(Op);
4568	SDValue Lo = Op.getOperand(i: `0`);
4569	SDValue Hi = Op.getOperand(i: `1`);
4570	SDValue Shamt = Op.getOperand(i: `2`);
4571	EVT VT = Lo.getValueType();
4572
4573	// SRA expansion:
4574	// if Shamt-GRLen < 0: // Shamt < GRLen
4575	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (ShAmt ^ GRLen-1))
4576	// Hi = Hi >>s Shamt
4577	// else:
4578	// Lo = Hi >>s (Shamt-GRLen);
4579	// Hi = Hi >>s (GRLen-1)
4580	//
4581	// SRL expansion:
4582	// if Shamt-GRLen < 0: // Shamt < GRLen
4583	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (ShAmt ^ GRLen-1))
4584	// Hi = Hi >>u Shamt
4585	// else:
4586	// Lo = Hi >>u (Shamt-GRLen);
4587	// Hi = 0;
4588
4589	unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4590
4591	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
4592	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
4593	SDValue MinusGRLen =
4594	DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
4595	SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT);
4596	SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
4597	SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
4598
4599	SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
4600	SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
4601	SDValue ShiftLeftHi =
4602	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
4603	SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
4604	SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
4605	SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
4606	SDValue HiFalse =
4607	IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
4608
4609	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
4610
4611	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
4612	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
4613
4614	SDValue Parts[`2`] = {Lo, Hi};
4615	return DAG.getMergeValues(Ops: Parts, dl: DL);
4616	}
4617
4618	// Returns the opcode of the target-specific SDNode that implements the 32-bit
4619	// form of the given Opcode.
4620	static unsigned getLoongArchWOpcode(unsigned Opcode) {
4621	switch (Opcode) {
4622	default:
4623	llvm_unreachable("Unexpected opcode");
4624	case ISD::SDIV:
4625	return LoongArchISD::DIV_W;
4626	case ISD::UDIV:
4627	return LoongArchISD::DIV_WU;
4628	case ISD::SREM:
4629	return LoongArchISD::MOD_W;
4630	case ISD::UREM:
4631	return LoongArchISD::MOD_WU;
4632	case ISD::SHL:
4633	return LoongArchISD::SLL_W;
4634	case ISD::SRA:
4635	return LoongArchISD::SRA_W;
4636	case ISD::SRL:
4637	return LoongArchISD::SRL_W;
4638	case ISD::ROTL:
4639	case ISD::ROTR:
4640	return LoongArchISD::ROTR_W;
4641	case ISD::CTTZ:
4642	return LoongArchISD::CTZ_W;
4643	case ISD::CTLZ:
4644	return LoongArchISD::CLZ_W;
4645	}
4646	}
4647
4648	// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4649	// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4650	// otherwise be promoted to i64, making it difficult to select the
4651	// SLL_W/.../W later one because the fact the operation was originally of*
4652	// type i8/i16/i32 is lost.
4653	static SDValue customLegalizeToWOp(SDNode N, SelectionDAG &DAG, int* NumOp,
4654	unsigned ExtOpc = ISD::ANY_EXTEND) {
4655	SDLoc DL(N);
4656	unsigned WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
4657	SDValue NewOp0, NewRes;
4658
4659	switch (NumOp) {
4660	default:
4661	llvm_unreachable("Unexpected NumOp");
4662	case `1`: {
4663	NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
4664	NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
4665	break;
4666	}
4667	case `2`: {
4668	NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
4669	SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
4670	if (N->getOpcode() == ISD::ROTL) {
4671	SDValue TmpOp = DAG.getConstant(Val: `32`, DL, VT: MVT::i64);
4672	NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
4673	}
4674	NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
4675	break;
4676	}
4677	// TODO:Handle more NumOp.
4678	}
4679
4680	// ReplaceNodeResults requires we maintain the same type for the return
4681	// value.
4682	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewRes);
4683	}
4684
4685	// Converts the given 32-bit operation to a i64 operation with signed extension
4686	// semantic to reduce the signed extension instructions.
4687	static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
4688	SDLoc DL(N);
4689	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
4690	SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
4691	SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
4692	SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
4693	N2: DAG.getValueType(MVT::i32));
4694	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
4695	}
4696
4697	// Helper function that emits error message for intrinsics with/without chain
4698	// and return a UNDEF or and the chain as the results.
4699	static void emitErrorAndReplaceIntrinsicResults(
4700	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
4701	StringRef ErrorMsg, bool WithChain = true) {
4702	DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
4703	Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)));
4704	if (!WithChain)
4705	return;
4706	Results.push_back(Elt: N->getOperand(Num: `0`));
4707	}
4708
4709	template <unsigned N>
4710	static void
4711	replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
4712	SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4713	unsigned ResOp) {
4714	const StringRef ErrorMsgOOR = "argument out of range";
4715	unsigned Imm = Node->getConstantOperandVal(Num: `2`);
4716	if (!isUInt<N>(Imm)) {
4717	emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
4718	/WithChain=/false);
4719	return;
4720	}
4721	SDLoc DL(Node);
4722	SDValue Vec = Node->getOperand(Num: `1`);
4723
4724	SDValue PickElt =
4725	DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
4726	N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
4727	N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
4728	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: `0`),
4729	Operand: PickElt.getValue(R: `0`)));
4730	}
4731
4732	static void replaceVecCondBranchResults(SDNode *N,
4733	SmallVectorImpl<SDValue> &Results,
4734	SelectionDAG &DAG,
4735	const LoongArchSubtarget &Subtarget,
4736	unsigned ResOp) {
4737	SDLoc DL(N);
4738	SDValue Vec = N->getOperand(Num: `1`);
4739
4740	SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
4741	Results.push_back(
4742	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: CB.getValue(R: `0`)));
4743	}
4744
4745	static void
4746	replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
4747	SelectionDAG &DAG,
4748	const LoongArchSubtarget &Subtarget) {
4749	switch (N->getConstantOperandVal(Num: `0`)) {
4750	default:
4751	llvm_unreachable("Unexpected Intrinsic.");
4752	case Intrinsic::loongarch_lsx_vpickve2gr_b:
4753	replaceVPICKVE2GRResults<`4`>(Node: N, Results, DAG, Subtarget,
4754	ResOp: LoongArchISD::VPICK_SEXT_ELT);
4755	break;
4756	case Intrinsic::loongarch_lsx_vpickve2gr_h:
4757	case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4758	replaceVPICKVE2GRResults<`3`>(Node: N, Results, DAG, Subtarget,
4759	ResOp: LoongArchISD::VPICK_SEXT_ELT);
4760	break;
4761	case Intrinsic::loongarch_lsx_vpickve2gr_w:
4762	replaceVPICKVE2GRResults<`2`>(Node: N, Results, DAG, Subtarget,
4763	ResOp: LoongArchISD::VPICK_SEXT_ELT);
4764	break;
4765	case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4766	replaceVPICKVE2GRResults<`4`>(Node: N, Results, DAG, Subtarget,
4767	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4768	break;
4769	case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4770	case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4771	replaceVPICKVE2GRResults<`3`>(Node: N, Results, DAG, Subtarget,
4772	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4773	break;
4774	case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4775	replaceVPICKVE2GRResults<`2`>(Node: N, Results, DAG, Subtarget,
4776	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4777	break;
4778	case Intrinsic::loongarch_lsx_bz_b:
4779	case Intrinsic::loongarch_lsx_bz_h:
4780	case Intrinsic::loongarch_lsx_bz_w:
4781	case Intrinsic::loongarch_lsx_bz_d:
4782	case Intrinsic::loongarch_lasx_xbz_b:
4783	case Intrinsic::loongarch_lasx_xbz_h:
4784	case Intrinsic::loongarch_lasx_xbz_w:
4785	case Intrinsic::loongarch_lasx_xbz_d:
4786	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4787	ResOp: LoongArchISD::VALL_ZERO);
4788	break;
4789	case Intrinsic::loongarch_lsx_bz_v:
4790	case Intrinsic::loongarch_lasx_xbz_v:
4791	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4792	ResOp: LoongArchISD::VANY_ZERO);
4793	break;
4794	case Intrinsic::loongarch_lsx_bnz_b:
4795	case Intrinsic::loongarch_lsx_bnz_h:
4796	case Intrinsic::loongarch_lsx_bnz_w:
4797	case Intrinsic::loongarch_lsx_bnz_d:
4798	case Intrinsic::loongarch_lasx_xbnz_b:
4799	case Intrinsic::loongarch_lasx_xbnz_h:
4800	case Intrinsic::loongarch_lasx_xbnz_w:
4801	case Intrinsic::loongarch_lasx_xbnz_d:
4802	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4803	ResOp: LoongArchISD::VALL_NONZERO);
4804	break;
4805	case Intrinsic::loongarch_lsx_bnz_v:
4806	case Intrinsic::loongarch_lasx_xbnz_v:
4807	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4808	ResOp: LoongArchISD::VANY_NONZERO);
4809	break;
4810	}
4811	}
4812
4813	static void replaceCMP_XCHG_128Results(SDNode *N,
4814	SmallVectorImpl<SDValue> &Results,
4815	SelectionDAG &DAG) {
4816	assert(N->getValueType(`0`) == MVT::i128 &&
4817	"AtomicCmpSwap on types less than 128 should be legal");
4818	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4819
4820	unsigned Opcode;
4821	switch (MemOp->getMergedOrdering()) {
4822	case AtomicOrdering::Acquire:
4823	case AtomicOrdering::AcquireRelease:
4824	case AtomicOrdering::SequentiallyConsistent:
4825	Opcode = LoongArch::PseudoCmpXchg128Acquire;
4826	break;
4827	case AtomicOrdering::Monotonic:
4828	case AtomicOrdering::Release:
4829	Opcode = LoongArch::PseudoCmpXchg128;
4830	break;
4831	default:
4832	llvm_unreachable("Unexpected ordering!");
4833	}
4834
4835	SDLoc DL(N);
4836	auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: `2`), DL, LoVT: MVT::i64, HiVT: MVT::i64);
4837	auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: `3`), DL, LoVT: MVT::i64, HiVT: MVT::i64);
4838	SDValue Ops[] = {N->getOperand(Num: `1`), CmpVal.first, CmpVal.second,
4839	NewVal.first, NewVal.second, N->getOperand(Num: `0`)};
4840
4841	SDNode *CmpSwap = DAG.getMachineNode(
4842	Opcode, dl: SDLoc (N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other),
4843	Ops);
4844	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4845	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128,
4846	N1: SDValue (CmpSwap, `0`), N2: SDValue (CmpSwap, `1`)));
4847	Results.push_back(Elt: SDValue (CmpSwap, `3`));
4848	}
4849
4850	void LoongArchTargetLowering::ReplaceNodeResults(
4851	SDNode N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const* {
4852	SDLoc DL(N);
4853	EVT VT = N->getValueType(ResNo: `0`);
4854	switch (N->getOpcode()) {
4855	default:
4856	llvm_unreachable("Don't know how to legalize this operation");
4857	case ISD::ADD:
4858	case ISD::SUB:
4859	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
4860	"Unexpected custom legalisation");
4861	Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
4862	break;
4863	case ISD::SDIV:
4864	case ISD::UDIV:
4865	case ISD::SREM:
4866	case ISD::UREM:
4867	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4868	"Unexpected custom legalisation");
4869	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`,
4870	ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32
4871	? ISD::ANY_EXTEND
4872	: ISD::SIGN_EXTEND));
4873	break;
4874	case ISD::SHL:
4875	case ISD::SRA:
4876	case ISD::SRL:
4877	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4878	"Unexpected custom legalisation");
4879	if (N->getOperand(Num: `1`).getOpcode() != ISD::Constant) {
4880	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`));
4881	break;
4882	}
4883	break;
4884	case ISD::ROTL:
4885	case ISD::ROTR:
4886	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4887	"Unexpected custom legalisation");
4888	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`));
4889	break;
4890	case ISD::FP_TO_SINT: {
4891	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4892	"Unexpected custom legalisation");
4893	SDValue Src = N->getOperand(Num: `0`);
4894	EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: `0`));
4895	if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
4896	TargetLowering::TypeSoftenFloat) {
4897	if (!isTypeLegal(VT: Src.getValueType()))
4898	return;
4899	if (Src.getValueType() == MVT::f16)
4900	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src);
4901	SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
4902	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
4903	return;
4904	}
4905	// If the FP type needs to be softened, emit a library call using the 'si'
4906	// version. If we left it to default legalization we'd end up with 'di'.
4907	RTLIB::Libcall LC;
4908	LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
4909	MakeLibCallOptions CallOptions;
4910	EVT OpVT = Src.getValueType();
4911	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT);
4912	SDValue Chain = SDValue ();
4913	SDValue Result;
4914	std::tie(args&: Result, args&: Chain) =
4915	makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
4916	Results.push_back(Elt: Result);
4917	break;
4918	}
4919	case ISD::BITCAST: {
4920	SDValue Src = N->getOperand(Num: `0`);
4921	EVT SrcVT = Src.getValueType();
4922	if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4923	Subtarget.hasBasicF()) {
4924	SDValue Dst =
4925	DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
4926	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
4927	} else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4928	SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
4929	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src);
4930	SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
4931	N1: NewReg.getValue(R: `0`), N2: NewReg.getValue(R: `1`));
4932	Results.push_back(Elt: RetReg);
4933	}
4934	break;
4935	}
4936	case ISD::FP_TO_UINT: {
4937	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4938	"Unexpected custom legalisation");
4939	auto &TLI = DAG.getTargetLoweringInfo();
4940	SDValue Tmp1, Tmp2;
4941	TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
4942	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
4943	break;
4944	}
4945	case ISD::BSWAP: {
4946	SDValue Src = N->getOperand(Num: `0`);
4947	assert((VT == MVT::i16 \|\| VT == MVT::i32) &&
4948	"Unexpected custom legalization");
4949	MVT GRLenVT = Subtarget.getGRLenVT();
4950	SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4951	SDValue Tmp;
4952	switch (VT.getSizeInBits()) {
4953	default:
4954	llvm_unreachable("Unexpected operand width");
4955	case `16`:
4956	Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
4957	break;
4958	case `32`:
4959	// Only LA64 will get to here due to the size mismatch between VT and
4960	// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4961	Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
4962	break;
4963	}
4964	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4965	break;
4966	}
4967	case ISD::BITREVERSE: {
4968	SDValue Src = N->getOperand(Num: `0`);
4969	assert((VT == MVT::i8 \|\| (VT == MVT::i32 && Subtarget.is64Bit())) &&
4970	"Unexpected custom legalization");
4971	MVT GRLenVT = Subtarget.getGRLenVT();
4972	SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4973	SDValue Tmp;
4974	switch (VT.getSizeInBits()) {
4975	default:
4976	llvm_unreachable("Unexpected operand width");
4977	case `8`:
4978	Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
4979	break;
4980	case `32`:
4981	Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
4982	break;
4983	}
4984	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4985	break;
4986	}
4987	case ISD::CTLZ:
4988	case ISD::CTTZ: {
4989	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4990	"Unexpected custom legalisation");
4991	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `1`));
4992	break;
4993	}
4994	case ISD::INTRINSIC_W_CHAIN: {
4995	SDValue Chain = N->getOperand(Num: `0`);
4996	SDValue Op2 = N->getOperand(Num: `2`);
4997	MVT GRLenVT = Subtarget.getGRLenVT();
4998	const StringRef ErrorMsgOOR = "argument out of range";
4999	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5000	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5001
5002	switch (N->getConstantOperandVal(Num: `1`)) {
5003	default:
5004	llvm_unreachable("Unexpected Intrinsic.");
5005	case Intrinsic::loongarch_movfcsr2gr: {
5006	if (!Subtarget.hasBasicF()) {
5007	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
5008	return;
5009	}
5010	unsigned Imm = Op2 ->getAsZExtVal();
5011	if (!isUInt<`2`>(x: Imm)) {
5012	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5013	return;
5014	}
5015	SDValue MOVFCSR2GRResults = DAG.getNode(
5016	Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc (N), ResultTys: {MVT::i64, MVT::Other},
5017	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5018	Results.push_back(
5019	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: `0`)));
5020	Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: `1`));
5021	break;
5022	}
5023	#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5024	case Intrinsic::loongarch_##NAME: { \
5025	SDValue NODE = DAG.getNode( \
5026	LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5027	{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5028	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5029	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5030	Results.push_back(NODE.getValue(1)); \
5031	break; \
5032	}
5033	CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5034	CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5035	CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5036	CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5037	CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5038	CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5039	#undef CRC_CASE_EXT_BINARYOP
5040
5041	#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5042	case Intrinsic::loongarch_##NAME: { \
5043	SDValue NODE = DAG.getNode( \
5044	LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5045	{Chain, Op2, \
5046	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5047	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5048	Results.push_back(NODE.getValue(1)); \
5049	break; \
5050	}
5051	CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5052	CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5053	#undef CRC_CASE_EXT_UNARYOP
5054	#define CSR_CASE(ID) \
5055	case Intrinsic::loongarch_##ID: { \
5056	if (!Subtarget.is64Bit()) \
5057	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5058	break; \
5059	}
5060	CSR_CASE(csrrd_d);
5061	CSR_CASE(csrwr_d);
5062	CSR_CASE(csrxchg_d);
5063	CSR_CASE(iocsrrd_d);
5064	#undef CSR_CASE
5065	case Intrinsic::loongarch_csrrd_w: {
5066	unsigned Imm = Op2 ->getAsZExtVal();
5067	if (!isUInt<`14`>(x: Imm)) {
5068	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5069	return;
5070	}
5071	SDValue CSRRDResults =
5072	DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
5073	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5074	Results.push_back(
5075	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: `0`)));
5076	Results.push_back(Elt: CSRRDResults.getValue(R: `1`));
5077	break;
5078	}
5079	case Intrinsic::loongarch_csrwr_w: {
5080	unsigned Imm = N->getConstantOperandVal(Num: `3`);
5081	if (!isUInt<`14`>(x: Imm)) {
5082	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5083	return;
5084	}
5085	SDValue CSRWRResults =
5086	DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
5087	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5088	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5089	Results.push_back(
5090	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: `0`)));
5091	Results.push_back(Elt: CSRWRResults.getValue(R: `1`));
5092	break;
5093	}
5094	case Intrinsic::loongarch_csrxchg_w: {
5095	unsigned Imm = N->getConstantOperandVal(Num: `4`);
5096	if (!isUInt<`14`>(x: Imm)) {
5097	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5098	return;
5099	}
5100	SDValue CSRXCHGResults = DAG.getNode(
5101	Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
5102	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5103	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `3`)),
5104	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5105	Results.push_back(
5106	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: `0`)));
5107	Results.push_back(Elt: CSRXCHGResults.getValue(R: `1`));
5108	break;
5109	}
5110	#define IOCSRRD_CASE(NAME, NODE) \
5111	case Intrinsic::loongarch_##NAME: { \
5112	SDValue IOCSRRDResults = \
5113	DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5114	{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5115	Results.push_back( \
5116	DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5117	Results.push_back(IOCSRRDResults.getValue(1)); \
5118	break; \
5119	}
5120	IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5121	IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5122	IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5123	#undef IOCSRRD_CASE
5124	case Intrinsic::loongarch_cpucfg: {
5125	SDValue CPUCFGResults =
5126	DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
5127	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
5128	Results.push_back(
5129	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: `0`)));
5130	Results.push_back(Elt: CPUCFGResults.getValue(R: `1`));
5131	break;
5132	}
5133	case Intrinsic::loongarch_lddir_d: {
5134	if (!Subtarget.is64Bit()) {
5135	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
5136	return;
5137	}
5138	break;
5139	}
5140	}
5141	break;
5142	}
5143	case ISD::READ_REGISTER: {
5144	if (Subtarget.is64Bit())
5145	DAG.getContext()->emitError(
5146	ErrorStr: "On LA64, only 64-bit registers can be read.");
5147	else
5148	DAG.getContext()->emitError(
5149	ErrorStr: "On LA32, only 32-bit registers can be read.");
5150	Results.push_back(Elt: DAG.getUNDEF(VT));
5151	Results.push_back(Elt: N->getOperand(Num: `0`));
5152	break;
5153	}
5154	case ISD::INTRINSIC_WO_CHAIN: {
5155	replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5156	break;
5157	}
5158	case ISD::LROUND: {
5159	SDValue Op0 = N->getOperand(Num: `0`);
5160	EVT OpVT = Op0.getValueType();
5161	RTLIB::Libcall LC =
5162	OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5163	MakeLibCallOptions CallOptions;
5164	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64);
5165	SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
5166	Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
5167	Results.push_back(Elt: Result);
5168	break;
5169	}
5170	case ISD::ATOMIC_CMP_SWAP: {
5171	replaceCMP_XCHG_128Results(N, Results, DAG);
5172	break;
5173	}
5174	case ISD::TRUNCATE: {
5175	MVT VT = N->getSimpleValueType(ResNo: `0`);
5176	if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector)
5177	return;
5178
5179	MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT();
5180	SDValue In = N->getOperand(Num: `0`);
5181	EVT InVT = In.getValueType();
5182	EVT InEltVT = InVT.getVectorElementType();
5183	EVT EltVT = VT.getVectorElementType();
5184	unsigned MinElts = VT.getVectorNumElements();
5185	unsigned WidenNumElts = WidenVT.getVectorNumElements();
5186	unsigned InBits = InVT.getSizeInBits();
5187
5188	if ((`128` % InBits) == `0` && WidenVT.is128BitVector()) {
5189	if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == `0`) {
5190	int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5191	SmallVector<int, `16`> TruncMask(WidenNumElts, -`1`);
5192	for (unsigned I = `0`; I < MinElts; ++I)
5193	TruncMask [I] = Scale * I;
5194
5195	unsigned WidenNumElts = `128` / In.getScalarValueSizeInBits();
5196	MVT SVT = In.getSimpleValueType().getScalarType();
5197	MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts);
5198	SDValue WidenIn =
5199	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In,
5200	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
5201	assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5202	"Illegal vector type in truncation");
5203	WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn);
5204	Results.push_back(
5205	Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask));
5206	return;
5207	}
5208	}
5209
5210	break;
5211	}
5212	}
5213	}
5214
5215	/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5216	static SDValue combineAndNotIntoVANDN(SDNode N, const* SDLoc &DL,
5217	SelectionDAG &DAG) {
5218	assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5219
5220	MVT VT = N->getSimpleValueType(ResNo: `0`);
5221	if (!VT.is128BitVector() && !VT.is256BitVector())
5222	return SDValue ();
5223
5224	SDValue X, Y;
5225	SDValue N0 = N->getOperand(Num: `0`);
5226	SDValue N1 = N->getOperand(Num: `1`);
5227
5228	if (SDValue Not = isNOT(V: N0, DAG)) {
5229	X = Not;
5230	Y = N1;
5231	} else if (SDValue Not = isNOT(V: N1, DAG)) {
5232	X = Not;
5233	Y = N0;
5234	} else
5235	return SDValue ();
5236
5237	X = DAG.getBitcast(VT, V: X);
5238	Y = DAG.getBitcast(VT, V: Y);
5239	return DAG.getNode(Opcode: LoongArchISD::VANDN, DL, VT, N1: X, N2: Y);
5240	}
5241
5242	static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
5243	TargetLowering::DAGCombinerInfo &DCI,
5244	const LoongArchSubtarget &Subtarget) {
5245	if (DCI.isBeforeLegalizeOps())
5246	return SDValue ();
5247
5248	SDValue FirstOperand = N->getOperand(Num: `0`);
5249	SDValue SecondOperand = N->getOperand(Num: `1`);
5250	unsigned FirstOperandOpc = FirstOperand.getOpcode();
5251	EVT ValTy = N->getValueType(ResNo: `0`);
5252	SDLoc DL(N);
5253	uint64_t lsb, msb;
5254	unsigned SMIdx, SMLen;
5255	ConstantSDNode *CN;
5256	SDValue NewOperand;
5257	MVT GRLenVT = Subtarget.getGRLenVT();
5258
5259	if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5260	return R;
5261
5262	// BSTRPICK requires the 32S feature.
5263	if (!Subtarget.has32S())
5264	return SDValue ();
5265
5266	// Op's second operand must be a shifted mask.
5267	if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) \|\|
5268	!isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
5269	return SDValue ();
5270
5271	if (FirstOperandOpc == ISD::SRA \|\| FirstOperandOpc == ISD::SRL) {
5272	// Pattern match BSTRPICK.
5273	// $dst = and ((sra or srl) $src , lsb), (2len - 1)
5274	// => BSTRPICK $dst, $src, msb, lsb
5275	// where msb = lsb + len - 1
5276
5277	// The second operand of the shift must be an immediate.
5278	if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: `1`))))
5279	return SDValue ();
5280
5281	lsb = CN->getZExtValue();
5282
5283	// Return if the shifted mask does not start at bit 0 or the sum of its
5284	// length and lsb exceeds the word's size.
5285	if (SMIdx != `0` \|\| lsb + SMLen > ValTy.getSizeInBits())
5286	return SDValue ();
5287
5288	NewOperand = FirstOperand.getOperand(i: `0`);
5289	} else {
5290	// Pattern match BSTRPICK.
5291	// $dst = and $src, (2len- 1) , if len > 12
5292	// => BSTRPICK $dst, $src, msb, lsb
5293	// where lsb = 0 and msb = len - 1
5294
5295	// If the mask is <= 0xfff, andi can be used instead.
5296	if (CN->getZExtValue() <= `0xfff`)
5297	return SDValue ();
5298
5299	// Return if the MSB exceeds.
5300	if (SMIdx + SMLen > ValTy.getSizeInBits())
5301	return SDValue ();
5302
5303	if (SMIdx > `0`) {
5304	// Omit if the constant has more than 2 uses. This a conservative
5305	// decision. Whether it is a win depends on the HW microarchitecture.
5306	// However it should always be better for 1 and 2 uses.
5307	if (CN->use_size() > `2`)
5308	return SDValue ();
5309	// Return if the constant can be composed by a single LU12I.W.
5310	if ((CN->getZExtValue() & `0xfff`) == `0`)
5311	return SDValue ();
5312	// Return if the constand can be composed by a single ADDI with
5313	// the zero register.
5314	if (CN->getSExtValue() >= -`2048` && CN->getSExtValue() < `0`)
5315	return SDValue ();
5316	}
5317
5318	lsb = SMIdx;
5319	NewOperand = FirstOperand;
5320	}
5321
5322	msb = lsb + SMLen - `1`;
5323	SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
5324	N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
5325	N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
5326	if (FirstOperandOpc == ISD::SRA \|\| FirstOperandOpc == ISD::SRL \|\| lsb == `0`)
5327	return NR0;
5328	// Try to optimize to
5329	// bstrpick $Rd, $Rs, msb, lsb
5330	// slli $Rd, $Rd, lsb
5331	return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
5332	N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
5333	}
5334
5335	static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
5336	TargetLowering::DAGCombinerInfo &DCI,
5337	const LoongArchSubtarget &Subtarget) {
5338	// BSTRPICK requires the 32S feature.
5339	if (!Subtarget.has32S())
5340	return SDValue ();
5341
5342	if (DCI.isBeforeLegalizeOps())
5343	return SDValue ();
5344
5345	// $dst = srl (and $src, Mask), Shamt
5346	// =>
5347	// BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5348	// when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5349	//
5350
5351	SDValue FirstOperand = N->getOperand(Num: `0`);
5352	ConstantSDNode *CN;
5353	EVT ValTy = N->getValueType(ResNo: `0`);
5354	SDLoc DL(N);
5355	MVT GRLenVT = Subtarget.getGRLenVT();
5356	unsigned MaskIdx, MaskLen;
5357	uint64_t Shamt;
5358
5359	// The first operand must be an AND and the second operand of the AND must be
5360	// a shifted mask.
5361	if (FirstOperand.getOpcode() != ISD::AND \|\|
5362	!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: `1`))) \|\|
5363	!isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
5364	return SDValue ();
5365
5366	// The second operand (shift amount) must be an immediate.
5367	if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))))
5368	return SDValue ();
5369
5370	Shamt = CN->getZExtValue();
5371	if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - `1`)
5372	return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
5373	N1: FirstOperand ->getOperand(Num: `0`),
5374	N2: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
5375	N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5376
5377	return SDValue ();
5378	}
5379
5380	// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5381	// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5382	static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5383	unsigned Depth) {
5384	// Limit recursion.
5385	if (Depth >= SelectionDAG::MaxRecursionDepth)
5386	return false;
5387	switch (Src.getOpcode()) {
5388	case ISD::SETCC:
5389	case ISD::TRUNCATE:
5390	return Src.getOperand(i: `0`).getValueSizeInBits() == Size;
5391	case ISD::FREEZE:
5392	return checkBitcastSrcVectorSize(Src: Src.getOperand(i: `0`), Size, Depth: Depth + `1`);
5393	case ISD::AND:
5394	case ISD::XOR:
5395	case ISD::OR:
5396	return checkBitcastSrcVectorSize(Src: Src.getOperand(i: `0`), Size, Depth: Depth + `1`) &&
5397	checkBitcastSrcVectorSize(Src: Src.getOperand(i: `1`), Size, Depth: Depth + `1`);
5398	case ISD::SELECT:
5399	case ISD::VSELECT:
5400	return Src.getOperand(i: `0`).getScalarValueSizeInBits() == `1` &&
5401	checkBitcastSrcVectorSize(Src: Src.getOperand(i: `1`), Size, Depth: Depth + `1`) &&
5402	checkBitcastSrcVectorSize(Src: Src.getOperand(i: `2`), Size, Depth: Depth + `1`);
5403	case ISD::BUILD_VECTOR:
5404	return ISD::isBuildVectorAllZeros(N: Src.getNode()) \|\|
5405	ISD::isBuildVectorAllOnes(N: Src.getNode());
5406	}
5407	return false;
5408	}
5409
5410	// Helper to push sign extension of vXi1 SETCC result through bitops.
5411	static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
5412	SDValue Src, const SDLoc &DL) {
5413	switch (Src.getOpcode()) {
5414	case ISD::SETCC:
5415	case ISD::FREEZE:
5416	case ISD::TRUNCATE:
5417	case ISD::BUILD_VECTOR:
5418	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
5419	case ISD::AND:
5420	case ISD::XOR:
5421	case ISD::OR:
5422	return DAG.getNode(
5423	Opcode: Src.getOpcode(), DL, VT: SExtVT,
5424	N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `0`), DL),
5425	N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `1`), DL));
5426	case ISD::SELECT:
5427	case ISD::VSELECT:
5428	return DAG.getSelect(
5429	DL, VT: SExtVT, Cond: Src.getOperand(i: `0`),
5430	LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `1`), DL),
5431	RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `2`), DL));
5432	}
5433	llvm_unreachable("Unexpected node type for vXi1 sign extension");
5434	}
5435
5436	static SDValue
5437	performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
5438	TargetLowering::DAGCombinerInfo &DCI,
5439	const LoongArchSubtarget &Subtarget) {
5440	SDLoc DL(N);
5441	EVT VT = N->getValueType(ResNo: `0`);
5442	SDValue Src = N->getOperand(Num: `0`);
5443	EVT SrcVT = Src.getValueType();
5444
5445	if (Src.getOpcode() != ISD::SETCC \|\| !Src.hasOneUse())
5446	return SDValue ();
5447
5448	bool UseLASX;
5449	unsigned Opc = ISD::DELETED_NODE;
5450	EVT CmpVT = Src.getOperand(i: `0`).getValueType();
5451	EVT EltVT = CmpVT.getVectorElementType();
5452
5453	if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == `128`)
5454	UseLASX = false;
5455	else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5456	CmpVT.getSizeInBits() == `256`)
5457	UseLASX = true;
5458	else
5459	return SDValue ();
5460
5461	SDValue SrcN1 = Src.getOperand(i: `1`);
5462	switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: `2`))->get()) {
5463	default:
5464	break;
5465	case ISD::SETEQ:
5466	// x == 0 => not (vmsknez.b x)
5467	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5468	Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5469	break;
5470	case ISD::SETGT:
5471	// x > -1 => vmskgez.b x
5472	if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8)
5473	Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5474	break;
5475	case ISD::SETGE:
5476	// x >= 0 => vmskgez.b x
5477	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5478	Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5479	break;
5480	case ISD::SETLT:
5481	// x < 0 => vmskltz.{b,h,w,d} x
5482	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) &&
5483	(EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
5484	EltVT == MVT::i64))
5485	Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486	break;
5487	case ISD::SETLE:
5488	// x <= -1 => vmskltz.{b,h,w,d} x
5489	if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) &&
5490	(EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
5491	EltVT == MVT::i64))
5492	Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5493	break;
5494	case ISD::SETNE:
5495	// x != 0 => vmsknez.b x
5496	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5497	Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5498	break;
5499	}
5500
5501	if (Opc == ISD::DELETED_NODE)
5502	return SDValue ();
5503
5504	SDValue V = DAG.getNode(Opcode: Opc, DL, VT: Subtarget.getGRLenVT(), Operand: Src.getOperand(i: `0`));
5505	EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
5506	V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
5507	return DAG.getBitcast(VT, V);
5508	}
5509
5510	static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
5511	TargetLowering::DAGCombinerInfo &DCI,
5512	const LoongArchSubtarget &Subtarget) {
5513	SDLoc DL(N);
5514	EVT VT = N->getValueType(ResNo: `0`);
5515	SDValue Src = N->getOperand(Num: `0`);
5516	EVT SrcVT = Src.getValueType();
5517	MVT GRLenVT = Subtarget.getGRLenVT();
5518
5519	if (!DCI.isBeforeLegalizeOps())
5520	return SDValue ();
5521
5522	if (!SrcVT.isSimple() \|\| SrcVT.getScalarType() != MVT::i1)
5523	return SDValue ();
5524
5525	// Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5526	SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5527	if (Res)
5528	return Res;
5529
5530	// Generate vXi1 using [X]VMSKLTZ
5531	MVT SExtVT;
5532	unsigned Opc;
5533	bool UseLASX = false;
5534	bool PropagateSExt = false;
5535
5536	if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5537	EVT CmpVT = Src.getOperand(i: `0`).getValueType();
5538	if (CmpVT.getSizeInBits() > `256`)
5539	return SDValue ();
5540	}
5541
5542	switch (SrcVT.getSimpleVT().SimpleTy) {
5543	default:
5544	return SDValue ();
5545	case MVT::v2i1:
5546	SExtVT = MVT::v2i64;
5547	break;
5548	case MVT::v4i1:
5549	SExtVT = MVT::v4i32;
5550	if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: `256`, Depth: `0`)) {
5551	SExtVT = MVT::v4i64;
5552	UseLASX = true;
5553	PropagateSExt = true;
5554	}
5555	break;
5556	case MVT::v8i1:
5557	SExtVT = MVT::v8i16;
5558	if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: `256`, Depth: `0`)) {
5559	SExtVT = MVT::v8i32;
5560	UseLASX = true;
5561	PropagateSExt = true;
5562	}
5563	break;
5564	case MVT::v16i1:
5565	SExtVT = MVT::v16i8;
5566	if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: `256`, Depth: `0`)) {
5567	SExtVT = MVT::v16i16;
5568	UseLASX = true;
5569	PropagateSExt = true;
5570	}
5571	break;
5572	case MVT::v32i1:
5573	SExtVT = MVT::v32i8;
5574	UseLASX = true;
5575	break;
5576	};
5577	Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5578	: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
5579
5580	SDValue V;
5581	if (!Subtarget.has32S() \|\| !Subtarget.hasExtLASX()) {
5582	if (Src.getSimpleValueType() == MVT::v32i8) {
5583	SDValue Lo, Hi;
5584	std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Src, DL);
5585	Lo = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Lo);
5586	Hi = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Hi);
5587	Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: GRLenVT, N1: Hi,
5588	N2: DAG.getShiftAmountConstant(Val: `16`, VT: GRLenVT, DL));
5589	V = DAG.getNode(Opcode: ISD::OR, DL, VT: GRLenVT, N1: Lo, N2: Hi);
5590	} else if (UseLASX) {
5591	return SDValue ();
5592	}
5593	}
5594
5595	if (!V) {
5596	Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5597	V = DAG.getNode(Opcode: Opc, DL, VT: GRLenVT, Operand: Src);
5598	}
5599
5600	EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
5601	V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
5602	return DAG.getBitcast(VT, V);
5603	}
5604
5605	static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
5606	TargetLowering::DAGCombinerInfo &DCI,
5607	const LoongArchSubtarget &Subtarget) {
5608	MVT GRLenVT = Subtarget.getGRLenVT();
5609	EVT ValTy = N->getValueType(ResNo: `0`);
5610	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
5611	ConstantSDNode CN0, CN1;
5612	SDLoc DL(N);
5613	unsigned ValBits = ValTy.getSizeInBits();
5614	unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5615	unsigned Shamt;
5616	bool SwapAndRetried = false;
5617
5618	// BSTRPICK requires the 32S feature.
5619	if (!Subtarget.has32S())
5620	return SDValue ();
5621
5622	if (DCI.isBeforeLegalizeOps())
5623	return SDValue ();
5624
5625	if (ValBits != `32` && ValBits != `64`)
5626	return SDValue ();
5627
5628	Retry:
5629	// 1st pattern to match BSTRINS:
5630	// R = or (and X, mask0), (and (shl Y, lsb), mask1)
5631	// where mask1 = (2size - 1) << lsb, mask0 = ~mask1
5632	// =>
5633	// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5634	if (N0.getOpcode() == ISD::AND &&
5635	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
5636	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5637	N1.getOpcode() == ISD::AND && N1.getOperand(i: `0`).getOpcode() == ISD::SHL &&
5638	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
5639	isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
5640	MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5641	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
5642	(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5643	(MaskIdx0 + MaskLen0 <= ValBits)) {
5644	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5645	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
5646	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
5647	N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - `1`), DL, VT: GRLenVT),
5648	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5649	}
5650
5651	// 2nd pattern to match BSTRINS:
5652	// R = or (and X, mask0), (shl (and Y, mask1), lsb)
5653	// where mask1 = (2size - 1), mask0 = ~(mask1 << lsb)
5654	// =>
5655	// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5656	if (N0.getOpcode() == ISD::AND &&
5657	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
5658	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5659	N1.getOpcode() == ISD::SHL && N1.getOperand(i: `0`).getOpcode() == ISD::AND &&
5660	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
5661	(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5662	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
5663	isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
5664	MaskLen0 == MaskLen1 && MaskIdx1 == `0` &&
5665	(MaskIdx0 + MaskLen0 <= ValBits)) {
5666	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5667	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
5668	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
5669	N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - `1`), DL, VT: GRLenVT),
5670	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5671	}
5672
5673	// 3rd pattern to match BSTRINS:
5674	// R = or (and X, mask0), (and Y, mask1)
5675	// where ~mask0 = (2size - 1) << lsb, mask0 & mask1 = 0
5676	// =>
5677	// R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5678	// where msb = lsb + size - 1
5679	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5680	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
5681	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5682	(MaskIdx0 + MaskLen0 <= `64`) &&
5683	(CN1 = dyn_cast<ConstantSDNode>(Val: N1 ->getOperand(Num: `1`))) &&
5684	(CN1->getSExtValue() & CN0->getSExtValue()) == `0`) {
5685	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5686	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
5687	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1 ->getValueType(ResNo: `0`), N1,
5688	N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
5689	N3: DAG.getConstant(Val: ValBits == `32`
5690	? (MaskIdx0 + (MaskLen0 & `31`) - `1`)
5691	: (MaskIdx0 + MaskLen0 - `1`),
5692	DL, VT: GRLenVT),
5693	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5694	}
5695
5696	// 4th pattern to match BSTRINS:
5697	// R = or (and X, mask), (shl Y, shamt)
5698	// where mask = (2shamt - 1)
5699	// =>
5700	// R = BSTRINS X, Y, ValBits - 1, shamt
5701	// where ValBits = 32 or 64
5702	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5703	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
5704	isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5705	MaskIdx0 == `0` && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
5706	(Shamt = CN1->getZExtValue()) == MaskLen0 &&
5707	(MaskIdx0 + MaskLen0 <= ValBits)) {
5708	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5709	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
5710	N2: N1.getOperand(i: `0`),
5711	N3: DAG.getConstant(Val: (ValBits - `1`), DL, VT: GRLenVT),
5712	N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5713	}
5714
5715	// 5th pattern to match BSTRINS:
5716	// R = or (and X, mask), const
5717	// where ~mask = (2size - 1) << lsb, mask & const = 0
5718	// =>
5719	// R = BSTRINS X, (const >> lsb), msb, lsb
5720	// where msb = lsb + size - 1
5721	if (N0.getOpcode() == ISD::AND &&
5722	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
5723	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5724	(CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
5725	(CN1->getSExtValue() & CN0->getSExtValue()) == `0`) {
5726	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5727	return DAG.getNode(
5728	Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
5729	N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
5730	N3: DAG.getConstant(Val: ValBits == `32` ? (MaskIdx0 + (MaskLen0 & `31`) - `1`)
5731	: (MaskIdx0 + MaskLen0 - `1`),
5732	DL, VT: GRLenVT),
5733	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5734	}
5735
5736	// 6th pattern.
5737	// a = b \| ((c & mask) << shamt), where all positions in b to be overwritten
5738	// by the incoming bits are known to be zero.
5739	// =>
5740	// a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5741	//
5742	// Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5743	// pattern is more common than the 1st. So we put the 1st before the 6th in
5744	// order to match as many nodes as possible.
5745	ConstantSDNode CNMask, CNShamt;
5746	unsigned MaskIdx, MaskLen;
5747	if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: `0`).getOpcode() == ISD::AND &&
5748	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
5749	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5750	MaskIdx == `0` && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
5751	CNShamt->getZExtValue() + MaskLen <= ValBits) {
5752	Shamt = CNShamt->getZExtValue();
5753	APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5754	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5755	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5756	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5757	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
5758	N3: DAG.getConstant(Val: Shamt + MaskLen - `1`, DL, VT: GRLenVT),
5759	N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5760	}
5761	}
5762
5763	// 7th pattern.
5764	// a = b \| ((c << shamt) & shifted_mask), where all positions in b to be
5765	// overwritten by the incoming bits are known to be zero.
5766	// =>
5767	// a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5768	//
5769	// Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5770	// before the 7th in order to match as many nodes as possible.
5771	if (N1.getOpcode() == ISD::AND &&
5772	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
5773	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5774	N1.getOperand(i: `0`).getOpcode() == ISD::SHL &&
5775	(CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
5776	CNShamt->getZExtValue() == MaskIdx) {
5777	APInt ShMask(ValBits, CNMask->getZExtValue());
5778	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5779	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5780	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5781	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
5782	N3: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
5783	N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
5784	}
5785	}
5786
5787	// (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5788	if (!SwapAndRetried) {
5789	std::swap(a&: N0, b&: N1);
5790	SwapAndRetried = true;
5791	goto Retry;
5792	}
5793
5794	SwapAndRetried = false;
5795	Retry2:
5796	// 8th pattern.
5797	// a = b \| (c & shifted_mask), where all positions in b to be overwritten by
5798	// the incoming bits are known to be zero.
5799	// =>
5800	// a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5801	//
5802	// Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5803	// we put it here in order to match as many nodes as possible or generate less
5804	// instructions.
5805	if (N1.getOpcode() == ISD::AND &&
5806	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
5807	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5808	APInt ShMask(ValBits, CNMask->getZExtValue());
5809	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5810	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5811	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5812	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1 ->getValueType(ResNo: `0`),
5813	N1: N1 ->getOperand(Num: `0`),
5814	N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
5815	N3: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
5816	N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
5817	}
5818	}
5819	// Swap N0/N1 and retry.
5820	if (!SwapAndRetried) {
5821	std::swap(a&: N0, b&: N1);
5822	SwapAndRetried = true;
5823	goto Retry2;
5824	}
5825
5826	return SDValue ();
5827	}
5828
5829	static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5830	ExtType = ISD::NON_EXTLOAD;
5831
5832	switch (V.getNode()->getOpcode()) {
5833	case ISD::LOAD: {
5834	LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
5835	if ((LoadNode->getMemoryVT() == MVT::i8) \|\|
5836	(LoadNode->getMemoryVT() == MVT::i16)) {
5837	ExtType = LoadNode->getExtensionType();
5838	return true;
5839	}
5840	return false;
5841	}
5842	case ISD::AssertSext: {
5843	VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: `1`));
5844	if ((TypeNode->getVT() == MVT::i8) \|\| (TypeNode->getVT() == MVT::i16)) {
5845	ExtType = ISD::SEXTLOAD;
5846	return true;
5847	}
5848	return false;
5849	}
5850	case ISD::AssertZext: {
5851	VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: `1`));
5852	if ((TypeNode->getVT() == MVT::i8) \|\| (TypeNode->getVT() == MVT::i16)) {
5853	ExtType = ISD::ZEXTLOAD;
5854	return true;
5855	}
5856	return false;
5857	}
5858	default:
5859	return false;
5860	}
5861
5862	return false;
5863	}
5864
5865	// Eliminate redundant truncation and zero-extension nodes.
5866	// Case 1:*
5867	// +------------+ +------------+ +------------+
5868	// \| Input1 \| \| Input2 \| \| CC \|
5869	// +------------+ +------------+ +------------+
5870	// \| \| \|
5871	// V V +----+
5872	// +------------+ +------------+ \|
5873	// \| TRUNCATE \| \| TRUNCATE \| \|
5874	// +------------+ +------------+ \|
5875	// \| \| \|
5876	// V V \|
5877	// +------------+ +------------+ \|
5878	// \| ZERO_EXT \| \| ZERO_EXT \| \|
5879	// +------------+ +------------+ \|
5880	// \| \| \|
5881	// \| +-------------+ \|
5882	// V V \| \|
5883	// +----------------+ \| \|
5884	// \| AND \| \| \|
5885	// +----------------+ \| \|
5886	// \| \| \|
5887	// +---------------+ \| \|
5888	// \| \| \|
5889	// V V V
5890	// +-------------+
5891	// \| CMP \|
5892	// +-------------+
5893	// Case 2:*
5894	// +------------+ +------------+ +-------------+ +------------+ +------------+
5895	// \| Input1 \| \| Input2 \| \| Constant -1 \| \| Constant 0 \| \| CC \|
5896	// +------------+ +------------+ +-------------+ +------------+ +------------+
5897	// \| \| \| \| \|
5898	// V \| \| \| \|
5899	// +------------+ \| \| \| \|
5900	// \| XOR \|<---------------------+ \| \|
5901	// +------------+ \| \| \|
5902	// \| \| \| \|
5903	// V V +---------------+ \|
5904	// +------------+ +------------+ \| \|
5905	// \| TRUNCATE \| \| TRUNCATE \| \| +-------------------------+
5906	// +------------+ +------------+ \| \|
5907	// \| \| \| \|
5908	// V V \| \|
5909	// +------------+ +------------+ \| \|
5910	// \| ZERO_EXT \| \| ZERO_EXT \| \| \|
5911	// +------------+ +------------+ \| \|
5912	// \| \| \| \|
5913	// V V \| \|
5914	// +----------------+ \| \|
5915	// \| AND \| \| \|
5916	// +----------------+ \| \|
5917	// \| \| \|
5918	// +---------------+ \| \|
5919	// \| \| \|
5920	// V V V
5921	// +-------------+
5922	// \| CMP \|
5923	// +-------------+
5924	static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
5925	TargetLowering::DAGCombinerInfo &DCI,
5926	const LoongArchSubtarget &Subtarget) {
5927	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
5928
5929	SDNode *AndNode = N->getOperand(Num: `0`).getNode();
5930	if (AndNode->getOpcode() != ISD::AND)
5931	return SDValue ();
5932
5933	SDValue AndInputValue2 = AndNode->getOperand(Num: `1`);
5934	if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5935	return SDValue ();
5936
5937	SDValue CmpInputValue = N->getOperand(Num: `1`);
5938	SDValue AndInputValue1 = AndNode->getOperand(Num: `0`);
5939	if (AndInputValue1.getOpcode() == ISD::XOR) {
5940	if (CC != ISD::SETEQ && CC != ISD::SETNE)
5941	return SDValue ();
5942	ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: `1`));
5943	if (!CN \|\| !CN->isAllOnes())
5944	return SDValue ();
5945	CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
5946	if (!CN \|\| !CN->isZero())
5947	return SDValue ();
5948	AndInputValue1 = AndInputValue1.getOperand(i: `0`);
5949	if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5950	return SDValue ();
5951	} else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5952	if (AndInputValue2 != CmpInputValue)
5953	return SDValue ();
5954	} else {
5955	return SDValue ();
5956	}
5957
5958	SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: `0`);
5959	if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5960	return SDValue ();
5961
5962	SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: `0`);
5963	if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5964	return SDValue ();
5965
5966	SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: `0`);
5967	SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: `0`);
5968	ISD::LoadExtType ExtType1;
5969	ISD::LoadExtType ExtType2;
5970
5971	if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) \|\|
5972	!checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
5973	return SDValue ();
5974
5975	if (TruncInputValue1 ->getValueType(ResNo: `0`) != TruncInputValue2 ->getValueType(ResNo: `0`) \|\|
5976	AndNode->getValueType(ResNo: `0`) != TruncInputValue1 ->getValueType(ResNo: `0`))
5977	return SDValue ();
5978
5979	if ((ExtType2 != ISD::ZEXTLOAD) &&
5980	((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5981	return SDValue ();
5982
5983	// These truncation and zero-extension nodes are not necessary, remove them.
5984	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N), VT: AndNode->getValueType(ResNo: `0`),
5985	N1: TruncInputValue1, N2: TruncInputValue2);
5986	SDValue NewSetCC =
5987	DAG.getSetCC(DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
5988	DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
5989	return SDValue (N, `0`);
5990	}
5991
5992	// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5993	static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5994	TargetLowering::DAGCombinerInfo &DCI,
5995	const LoongArchSubtarget &Subtarget) {
5996	if (DCI.isBeforeLegalizeOps())
5997	return SDValue ();
5998
5999	SDValue Src = N->getOperand(Num: `0`);
6000	if (Src.getOpcode() != LoongArchISD::REVB_2W)
6001	return SDValue ();
6002
6003	return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
6004	Operand: Src.getOperand(i: `0`));
6005	}
6006
6007	// Perform common combines for BR_CC and SELECT_CC conditions.
6008	static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6009	SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6010	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
6011
6012	// As far as arithmetic right shift always saves the sign,
6013	// shift can be omitted.
6014	// Fold setlt (sra X, N), 0 -> setlt X, 0 and
6015	// setge (sra X, N), 0 -> setge X, 0
6016	if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE \|\| CCVal == ISD::SETLT) &&
6017	LHS.getOpcode() == ISD::SRA) {
6018	LHS = LHS.getOperand(i: `0`);
6019	return true;
6020	}
6021
6022	if (!ISD::isIntEqualitySetCC(Code: CCVal))
6023	return false;
6024
6025	// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6026	// Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6027	if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
6028	LHS.getOperand(i: `0`).getValueType() == Subtarget.getGRLenVT()) {
6029	// If we're looking for eq 0 instead of ne 0, we need to invert the
6030	// condition.
6031	bool Invert = CCVal == ISD::SETEQ;
6032	CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: `2`))->get();
6033	if (Invert)
6034	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
6035
6036	RHS = LHS.getOperand(i: `1`);
6037	LHS = LHS.getOperand(i: `0`);
6038	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
6039
6040	CC = DAG.getCondCode(Cond: CCVal);
6041	return true;
6042	}
6043
6044	// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6045	if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6046	LHS.getOperand(i: `1`).getOpcode() == ISD::Constant) {
6047	SDValue LHS0 = LHS.getOperand(i: `0`);
6048	if (LHS0.getOpcode() == ISD::AND &&
6049	LHS0.getOperand(i: `1`).getOpcode() == ISD::Constant) {
6050	uint64_t Mask = LHS0.getConstantOperandVal(i: `1`);
6051	uint64_t ShAmt = LHS.getConstantOperandVal(i: `1`);
6052	if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
6053	CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6054	CC = DAG.getCondCode(Cond: CCVal);
6055
6056	ShAmt = LHS.getValueSizeInBits() - `1` - ShAmt;
6057	LHS = LHS0.getOperand(i: `0`);
6058	if (ShAmt != `0`)
6059	LHS =
6060	DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: `0`),
6061	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
6062	return true;
6063	}
6064	}
6065	}
6066
6067	// (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6068	// This can occur when legalizing some floating point comparisons.
6069	APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: `1`);
6070	if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
6071	CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
6072	CC = DAG.getCondCode(Cond: CCVal);
6073	RHS = DAG.getConstant(Val: `0`, DL, VT: LHS.getValueType());
6074	return true;
6075	}
6076
6077	return false;
6078	}
6079
6080	static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG,
6081	TargetLowering::DAGCombinerInfo &DCI,
6082	const LoongArchSubtarget &Subtarget) {
6083	SDValue LHS = N->getOperand(Num: `1`);
6084	SDValue RHS = N->getOperand(Num: `2`);
6085	SDValue CC = N->getOperand(Num: `3`);
6086	SDLoc DL(N);
6087
6088	if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6089	return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: N->getValueType(ResNo: `0`),
6090	N1: N->getOperand(Num: `0`), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: `4`));
6091
6092	return SDValue ();
6093	}
6094
6095	static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
6096	TargetLowering::DAGCombinerInfo &DCI,
6097	const LoongArchSubtarget &Subtarget) {
6098	// Transform
6099	SDValue LHS = N->getOperand(Num: `0`);
6100	SDValue RHS = N->getOperand(Num: `1`);
6101	SDValue CC = N->getOperand(Num: `2`);
6102	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
6103	SDValue TrueV = N->getOperand(Num: `3`);
6104	SDValue FalseV = N->getOperand(Num: `4`);
6105	SDLoc DL(N);
6106	EVT VT = N->getValueType(ResNo: `0`);
6107
6108	// If the True and False values are the same, we don't need a select_cc.
6109	if (TrueV == FalseV)
6110	return TrueV;
6111
6112	// (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6113	// (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6114	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
6115	isNullConstant(V: RHS) &&
6116	(CCVal == ISD::CondCode::SETLT \|\| CCVal == ISD::CondCode::SETGE)) {
6117	if (CCVal == ISD::CondCode::SETGE)
6118	std::swap(a&: TrueV, b&: FalseV);
6119
6120	int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
6121	int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
6122	// Only handle simm12, if it is not in this range, it can be considered as
6123	// register.
6124	if (isInt<`12`>(x: TrueSImm) && isInt<`12`>(x: FalseSImm) &&
6125	isInt<`12`>(x: TrueSImm - FalseSImm)) {
6126	SDValue SRA =
6127	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
6128	N2: DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT));
6129	SDValue AND =
6130	DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
6131	N2: DAG.getSignedConstant(Val: TrueSImm - FalseSImm, DL, VT));
6132	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
6133	}
6134
6135	if (CCVal == ISD::CondCode::SETGE)
6136	std::swap(a&: TrueV, b&: FalseV);
6137	}
6138
6139	if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6140	return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT: N->getValueType(ResNo: `0`),
6141	Ops: {LHS, RHS, CC, TrueV, FalseV});
6142
6143	return SDValue ();
6144	}
6145
6146	template <unsigned N>
6147	static SDValue legalizeIntrinsicImmArg(SDNode Node, unsigned* ImmOp,
6148	SelectionDAG &DAG,
6149	const LoongArchSubtarget &Subtarget,
6150	bool IsSigned = false) {
6151	SDLoc DL(Node);
6152	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
6153	// Check the ImmArg.
6154	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
6155	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6156	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
6157	": argument out of range.");
6158	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
6159	}
6160	return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
6161	}
6162
6163	template <unsigned N>
6164	static SDValue lowerVectorSplatImm(SDNode Node, unsigned* ImmOp,
6165	SelectionDAG &DAG, bool IsSigned = false) {
6166	SDLoc DL(Node);
6167	EVT ResTy = Node->getValueType(ResNo: `0`);
6168	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
6169
6170	// Check the ImmArg.
6171	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
6172	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6173	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
6174	": argument out of range.");
6175	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6176	}
6177	return DAG.getConstant(
6178	Val: APInt (ResTy.getScalarType().getSizeInBits(),
6179	IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6180	DL, VT: ResTy);
6181	}
6182
6183	static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
6184	SDLoc DL(Node);
6185	EVT ResTy = Node->getValueType(ResNo: `0`);
6186	SDValue Vec = Node->getOperand(Num: `2`);
6187	SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - `1`, DL, VT: ResTy);
6188	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
6189	}
6190
6191	static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
6192	SDLoc DL(Node);
6193	EVT ResTy = Node->getValueType(ResNo: `0`);
6194	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ResTy);
6195	SDValue Bit =
6196	DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
6197
6198	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: `1`),
6199	N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
6200	}
6201
6202	template <unsigned N>
6203	static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
6204	SDLoc DL(Node);
6205	EVT ResTy = Node->getValueType(ResNo: `0`);
6206	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
6207	// Check the unsigned ImmArg.
6208	if (!isUInt<N>(CImm->getZExtValue())) {
6209	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
6210	": argument out of range.");
6211	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6212	}
6213
6214	APInt BitImm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
6215	SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
6216
6217	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: Mask);
6218	}
6219
6220	template <unsigned N>
6221	static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
6222	SDLoc DL(Node);
6223	EVT ResTy = Node->getValueType(ResNo: `0`);
6224	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
6225	// Check the unsigned ImmArg.
6226	if (!isUInt<N>(CImm->getZExtValue())) {
6227	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
6228	": argument out of range.");
6229	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6230	}
6231
6232	APInt Imm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
6233	SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
6234	return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: BitImm);
6235	}
6236
6237	template <unsigned N>
6238	static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
6239	SDLoc DL(Node);
6240	EVT ResTy = Node->getValueType(ResNo: `0`);
6241	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
6242	// Check the unsigned ImmArg.
6243	if (!isUInt<N>(CImm->getZExtValue())) {
6244	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
6245	": argument out of range.");
6246	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6247	}
6248
6249	APInt Imm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
6250	SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
6251	return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: BitImm);
6252	}
6253
6254	template <unsigned W>
6255	static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
6256	unsigned ResOp) {
6257	unsigned Imm = N->getConstantOperandVal(Num: `2`);
6258	if (!isUInt<W>(Imm)) {
6259	const StringRef ErrorMsg = "argument out of range";
6260	DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
6261	return DAG.getUNDEF(VT: N->getValueType(ResNo: `0`));
6262	}
6263	SDLoc DL(N);
6264	SDValue Vec = N->getOperand(Num: `1`);
6265	SDValue Idx = DAG.getConstant(Val: Imm, DL, VT: MVT::i32);
6266	SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
6267	return DAG.getNode(Opcode: ResOp, DL, VT: N->getValueType(ResNo: `0`), N1: Vec, N2: Idx, N3: EltVT);
6268	}
6269
6270	static SDValue
6271	performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
6272	TargetLowering::DAGCombinerInfo &DCI,
6273	const LoongArchSubtarget &Subtarget) {
6274	SDLoc DL(N);
6275	switch (N->getConstantOperandVal(Num: `0`)) {
6276	default:
6277	break;
6278	case Intrinsic::loongarch_lsx_vadd_b:
6279	case Intrinsic::loongarch_lsx_vadd_h:
6280	case Intrinsic::loongarch_lsx_vadd_w:
6281	case Intrinsic::loongarch_lsx_vadd_d:
6282	case Intrinsic::loongarch_lasx_xvadd_b:
6283	case Intrinsic::loongarch_lasx_xvadd_h:
6284	case Intrinsic::loongarch_lasx_xvadd_w:
6285	case Intrinsic::loongarch_lasx_xvadd_d:
6286	return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6287	N2: N->getOperand(Num: `2`));
6288	case Intrinsic::loongarch_lsx_vaddi_bu:
6289	case Intrinsic::loongarch_lsx_vaddi_hu:
6290	case Intrinsic::loongarch_lsx_vaddi_wu:
6291	case Intrinsic::loongarch_lsx_vaddi_du:
6292	case Intrinsic::loongarch_lasx_xvaddi_bu:
6293	case Intrinsic::loongarch_lasx_xvaddi_hu:
6294	case Intrinsic::loongarch_lasx_xvaddi_wu:
6295	case Intrinsic::loongarch_lasx_xvaddi_du:
6296	return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6297	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6298	case Intrinsic::loongarch_lsx_vsub_b:
6299	case Intrinsic::loongarch_lsx_vsub_h:
6300	case Intrinsic::loongarch_lsx_vsub_w:
6301	case Intrinsic::loongarch_lsx_vsub_d:
6302	case Intrinsic::loongarch_lasx_xvsub_b:
6303	case Intrinsic::loongarch_lasx_xvsub_h:
6304	case Intrinsic::loongarch_lasx_xvsub_w:
6305	case Intrinsic::loongarch_lasx_xvsub_d:
6306	return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6307	N2: N->getOperand(Num: `2`));
6308	case Intrinsic::loongarch_lsx_vsubi_bu:
6309	case Intrinsic::loongarch_lsx_vsubi_hu:
6310	case Intrinsic::loongarch_lsx_vsubi_wu:
6311	case Intrinsic::loongarch_lsx_vsubi_du:
6312	case Intrinsic::loongarch_lasx_xvsubi_bu:
6313	case Intrinsic::loongarch_lasx_xvsubi_hu:
6314	case Intrinsic::loongarch_lasx_xvsubi_wu:
6315	case Intrinsic::loongarch_lasx_xvsubi_du:
6316	return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6317	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6318	case Intrinsic::loongarch_lsx_vneg_b:
6319	case Intrinsic::loongarch_lsx_vneg_h:
6320	case Intrinsic::loongarch_lsx_vneg_w:
6321	case Intrinsic::loongarch_lsx_vneg_d:
6322	case Intrinsic::loongarch_lasx_xvneg_b:
6323	case Intrinsic::loongarch_lasx_xvneg_h:
6324	case Intrinsic::loongarch_lasx_xvneg_w:
6325	case Intrinsic::loongarch_lasx_xvneg_d:
6326	return DAG.getNode(
6327	Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`),
6328	N1: DAG.getConstant(
6329	Val: APInt (N->getValueType(ResNo: `0`).getScalarType().getSizeInBits(), `0`,
6330	/isSigned=/true),
6331	DL: SDLoc (N), VT: N->getValueType(ResNo: `0`)),
6332	N2: N->getOperand(Num: `1`));
6333	case Intrinsic::loongarch_lsx_vmax_b:
6334	case Intrinsic::loongarch_lsx_vmax_h:
6335	case Intrinsic::loongarch_lsx_vmax_w:
6336	case Intrinsic::loongarch_lsx_vmax_d:
6337	case Intrinsic::loongarch_lasx_xvmax_b:
6338	case Intrinsic::loongarch_lasx_xvmax_h:
6339	case Intrinsic::loongarch_lasx_xvmax_w:
6340	case Intrinsic::loongarch_lasx_xvmax_d:
6341	return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6342	N2: N->getOperand(Num: `2`));
6343	case Intrinsic::loongarch_lsx_vmax_bu:
6344	case Intrinsic::loongarch_lsx_vmax_hu:
6345	case Intrinsic::loongarch_lsx_vmax_wu:
6346	case Intrinsic::loongarch_lsx_vmax_du:
6347	case Intrinsic::loongarch_lasx_xvmax_bu:
6348	case Intrinsic::loongarch_lasx_xvmax_hu:
6349	case Intrinsic::loongarch_lasx_xvmax_wu:
6350	case Intrinsic::loongarch_lasx_xvmax_du:
6351	return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6352	N2: N->getOperand(Num: `2`));
6353	case Intrinsic::loongarch_lsx_vmaxi_b:
6354	case Intrinsic::loongarch_lsx_vmaxi_h:
6355	case Intrinsic::loongarch_lsx_vmaxi_w:
6356	case Intrinsic::loongarch_lsx_vmaxi_d:
6357	case Intrinsic::loongarch_lasx_xvmaxi_b:
6358	case Intrinsic::loongarch_lasx_xvmaxi_h:
6359	case Intrinsic::loongarch_lasx_xvmaxi_w:
6360	case Intrinsic::loongarch_lasx_xvmaxi_d:
6361	return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6362	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG, /IsSigned=/true));
6363	case Intrinsic::loongarch_lsx_vmaxi_bu:
6364	case Intrinsic::loongarch_lsx_vmaxi_hu:
6365	case Intrinsic::loongarch_lsx_vmaxi_wu:
6366	case Intrinsic::loongarch_lsx_vmaxi_du:
6367	case Intrinsic::loongarch_lasx_xvmaxi_bu:
6368	case Intrinsic::loongarch_lasx_xvmaxi_hu:
6369	case Intrinsic::loongarch_lasx_xvmaxi_wu:
6370	case Intrinsic::loongarch_lasx_xvmaxi_du:
6371	return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6372	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6373	case Intrinsic::loongarch_lsx_vmin_b:
6374	case Intrinsic::loongarch_lsx_vmin_h:
6375	case Intrinsic::loongarch_lsx_vmin_w:
6376	case Intrinsic::loongarch_lsx_vmin_d:
6377	case Intrinsic::loongarch_lasx_xvmin_b:
6378	case Intrinsic::loongarch_lasx_xvmin_h:
6379	case Intrinsic::loongarch_lasx_xvmin_w:
6380	case Intrinsic::loongarch_lasx_xvmin_d:
6381	return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6382	N2: N->getOperand(Num: `2`));
6383	case Intrinsic::loongarch_lsx_vmin_bu:
6384	case Intrinsic::loongarch_lsx_vmin_hu:
6385	case Intrinsic::loongarch_lsx_vmin_wu:
6386	case Intrinsic::loongarch_lsx_vmin_du:
6387	case Intrinsic::loongarch_lasx_xvmin_bu:
6388	case Intrinsic::loongarch_lasx_xvmin_hu:
6389	case Intrinsic::loongarch_lasx_xvmin_wu:
6390	case Intrinsic::loongarch_lasx_xvmin_du:
6391	return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6392	N2: N->getOperand(Num: `2`));
6393	case Intrinsic::loongarch_lsx_vmini_b:
6394	case Intrinsic::loongarch_lsx_vmini_h:
6395	case Intrinsic::loongarch_lsx_vmini_w:
6396	case Intrinsic::loongarch_lsx_vmini_d:
6397	case Intrinsic::loongarch_lasx_xvmini_b:
6398	case Intrinsic::loongarch_lasx_xvmini_h:
6399	case Intrinsic::loongarch_lasx_xvmini_w:
6400	case Intrinsic::loongarch_lasx_xvmini_d:
6401	return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6402	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG, /IsSigned=/true));
6403	case Intrinsic::loongarch_lsx_vmini_bu:
6404	case Intrinsic::loongarch_lsx_vmini_hu:
6405	case Intrinsic::loongarch_lsx_vmini_wu:
6406	case Intrinsic::loongarch_lsx_vmini_du:
6407	case Intrinsic::loongarch_lasx_xvmini_bu:
6408	case Intrinsic::loongarch_lasx_xvmini_hu:
6409	case Intrinsic::loongarch_lasx_xvmini_wu:
6410	case Intrinsic::loongarch_lasx_xvmini_du:
6411	return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6412	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6413	case Intrinsic::loongarch_lsx_vmul_b:
6414	case Intrinsic::loongarch_lsx_vmul_h:
6415	case Intrinsic::loongarch_lsx_vmul_w:
6416	case Intrinsic::loongarch_lsx_vmul_d:
6417	case Intrinsic::loongarch_lasx_xvmul_b:
6418	case Intrinsic::loongarch_lasx_xvmul_h:
6419	case Intrinsic::loongarch_lasx_xvmul_w:
6420	case Intrinsic::loongarch_lasx_xvmul_d:
6421	return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6422	N2: N->getOperand(Num: `2`));
6423	case Intrinsic::loongarch_lsx_vmadd_b:
6424	case Intrinsic::loongarch_lsx_vmadd_h:
6425	case Intrinsic::loongarch_lsx_vmadd_w:
6426	case Intrinsic::loongarch_lsx_vmadd_d:
6427	case Intrinsic::loongarch_lasx_xvmadd_b:
6428	case Intrinsic::loongarch_lasx_xvmadd_h:
6429	case Intrinsic::loongarch_lasx_xvmadd_w:
6430	case Intrinsic::loongarch_lasx_xvmadd_d: {
6431	EVT ResTy = N->getValueType(ResNo: `0`);
6432	return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `1`),
6433	N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `2`),
6434	N2: N->getOperand(Num: `3`)));
6435	}
6436	case Intrinsic::loongarch_lsx_vmsub_b:
6437	case Intrinsic::loongarch_lsx_vmsub_h:
6438	case Intrinsic::loongarch_lsx_vmsub_w:
6439	case Intrinsic::loongarch_lsx_vmsub_d:
6440	case Intrinsic::loongarch_lasx_xvmsub_b:
6441	case Intrinsic::loongarch_lasx_xvmsub_h:
6442	case Intrinsic::loongarch_lasx_xvmsub_w:
6443	case Intrinsic::loongarch_lasx_xvmsub_d: {
6444	EVT ResTy = N->getValueType(ResNo: `0`);
6445	return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `1`),
6446	N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `2`),
6447	N2: N->getOperand(Num: `3`)));
6448	}
6449	case Intrinsic::loongarch_lsx_vdiv_b:
6450	case Intrinsic::loongarch_lsx_vdiv_h:
6451	case Intrinsic::loongarch_lsx_vdiv_w:
6452	case Intrinsic::loongarch_lsx_vdiv_d:
6453	case Intrinsic::loongarch_lasx_xvdiv_b:
6454	case Intrinsic::loongarch_lasx_xvdiv_h:
6455	case Intrinsic::loongarch_lasx_xvdiv_w:
6456	case Intrinsic::loongarch_lasx_xvdiv_d:
6457	return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6458	N2: N->getOperand(Num: `2`));
6459	case Intrinsic::loongarch_lsx_vdiv_bu:
6460	case Intrinsic::loongarch_lsx_vdiv_hu:
6461	case Intrinsic::loongarch_lsx_vdiv_wu:
6462	case Intrinsic::loongarch_lsx_vdiv_du:
6463	case Intrinsic::loongarch_lasx_xvdiv_bu:
6464	case Intrinsic::loongarch_lasx_xvdiv_hu:
6465	case Intrinsic::loongarch_lasx_xvdiv_wu:
6466	case Intrinsic::loongarch_lasx_xvdiv_du:
6467	return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6468	N2: N->getOperand(Num: `2`));
6469	case Intrinsic::loongarch_lsx_vmod_b:
6470	case Intrinsic::loongarch_lsx_vmod_h:
6471	case Intrinsic::loongarch_lsx_vmod_w:
6472	case Intrinsic::loongarch_lsx_vmod_d:
6473	case Intrinsic::loongarch_lasx_xvmod_b:
6474	case Intrinsic::loongarch_lasx_xvmod_h:
6475	case Intrinsic::loongarch_lasx_xvmod_w:
6476	case Intrinsic::loongarch_lasx_xvmod_d:
6477	return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6478	N2: N->getOperand(Num: `2`));
6479	case Intrinsic::loongarch_lsx_vmod_bu:
6480	case Intrinsic::loongarch_lsx_vmod_hu:
6481	case Intrinsic::loongarch_lsx_vmod_wu:
6482	case Intrinsic::loongarch_lsx_vmod_du:
6483	case Intrinsic::loongarch_lasx_xvmod_bu:
6484	case Intrinsic::loongarch_lasx_xvmod_hu:
6485	case Intrinsic::loongarch_lasx_xvmod_wu:
6486	case Intrinsic::loongarch_lasx_xvmod_du:
6487	return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6488	N2: N->getOperand(Num: `2`));
6489	case Intrinsic::loongarch_lsx_vand_v:
6490	case Intrinsic::loongarch_lasx_xvand_v:
6491	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6492	N2: N->getOperand(Num: `2`));
6493	case Intrinsic::loongarch_lsx_vor_v:
6494	case Intrinsic::loongarch_lasx_xvor_v:
6495	return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6496	N2: N->getOperand(Num: `2`));
6497	case Intrinsic::loongarch_lsx_vxor_v:
6498	case Intrinsic::loongarch_lasx_xvxor_v:
6499	return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6500	N2: N->getOperand(Num: `2`));
6501	case Intrinsic::loongarch_lsx_vnor_v:
6502	case Intrinsic::loongarch_lasx_xvnor_v: {
6503	SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6504	N2: N->getOperand(Num: `2`));
6505	return DAG.getNOT(DL, Val: Res, VT: Res ->getValueType(ResNo: `0`));
6506	}
6507	case Intrinsic::loongarch_lsx_vandi_b:
6508	case Intrinsic::loongarch_lasx_xvandi_b:
6509	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6510	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
6511	case Intrinsic::loongarch_lsx_vori_b:
6512	case Intrinsic::loongarch_lasx_xvori_b:
6513	return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6514	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
6515	case Intrinsic::loongarch_lsx_vxori_b:
6516	case Intrinsic::loongarch_lasx_xvxori_b:
6517	return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6518	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
6519	case Intrinsic::loongarch_lsx_vsll_b:
6520	case Intrinsic::loongarch_lsx_vsll_h:
6521	case Intrinsic::loongarch_lsx_vsll_w:
6522	case Intrinsic::loongarch_lsx_vsll_d:
6523	case Intrinsic::loongarch_lasx_xvsll_b:
6524	case Intrinsic::loongarch_lasx_xvsll_h:
6525	case Intrinsic::loongarch_lasx_xvsll_w:
6526	case Intrinsic::loongarch_lasx_xvsll_d:
6527	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6528	N2: truncateVecElts(Node: N, DAG));
6529	case Intrinsic::loongarch_lsx_vslli_b:
6530	case Intrinsic::loongarch_lasx_xvslli_b:
6531	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6532	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
6533	case Intrinsic::loongarch_lsx_vslli_h:
6534	case Intrinsic::loongarch_lasx_xvslli_h:
6535	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6536	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
6537	case Intrinsic::loongarch_lsx_vslli_w:
6538	case Intrinsic::loongarch_lasx_xvslli_w:
6539	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6540	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6541	case Intrinsic::loongarch_lsx_vslli_d:
6542	case Intrinsic::loongarch_lasx_xvslli_d:
6543	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6544	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
6545	case Intrinsic::loongarch_lsx_vsrl_b:
6546	case Intrinsic::loongarch_lsx_vsrl_h:
6547	case Intrinsic::loongarch_lsx_vsrl_w:
6548	case Intrinsic::loongarch_lsx_vsrl_d:
6549	case Intrinsic::loongarch_lasx_xvsrl_b:
6550	case Intrinsic::loongarch_lasx_xvsrl_h:
6551	case Intrinsic::loongarch_lasx_xvsrl_w:
6552	case Intrinsic::loongarch_lasx_xvsrl_d:
6553	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6554	N2: truncateVecElts(Node: N, DAG));
6555	case Intrinsic::loongarch_lsx_vsrli_b:
6556	case Intrinsic::loongarch_lasx_xvsrli_b:
6557	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6558	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
6559	case Intrinsic::loongarch_lsx_vsrli_h:
6560	case Intrinsic::loongarch_lasx_xvsrli_h:
6561	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6562	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
6563	case Intrinsic::loongarch_lsx_vsrli_w:
6564	case Intrinsic::loongarch_lasx_xvsrli_w:
6565	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6566	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6567	case Intrinsic::loongarch_lsx_vsrli_d:
6568	case Intrinsic::loongarch_lasx_xvsrli_d:
6569	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6570	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
6571	case Intrinsic::loongarch_lsx_vsra_b:
6572	case Intrinsic::loongarch_lsx_vsra_h:
6573	case Intrinsic::loongarch_lsx_vsra_w:
6574	case Intrinsic::loongarch_lsx_vsra_d:
6575	case Intrinsic::loongarch_lasx_xvsra_b:
6576	case Intrinsic::loongarch_lasx_xvsra_h:
6577	case Intrinsic::loongarch_lasx_xvsra_w:
6578	case Intrinsic::loongarch_lasx_xvsra_d:
6579	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6580	N2: truncateVecElts(Node: N, DAG));
6581	case Intrinsic::loongarch_lsx_vsrai_b:
6582	case Intrinsic::loongarch_lasx_xvsrai_b:
6583	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6584	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
6585	case Intrinsic::loongarch_lsx_vsrai_h:
6586	case Intrinsic::loongarch_lasx_xvsrai_h:
6587	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6588	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
6589	case Intrinsic::loongarch_lsx_vsrai_w:
6590	case Intrinsic::loongarch_lasx_xvsrai_w:
6591	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6592	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
6593	case Intrinsic::loongarch_lsx_vsrai_d:
6594	case Intrinsic::loongarch_lasx_xvsrai_d:
6595	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6596	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
6597	case Intrinsic::loongarch_lsx_vclz_b:
6598	case Intrinsic::loongarch_lsx_vclz_h:
6599	case Intrinsic::loongarch_lsx_vclz_w:
6600	case Intrinsic::loongarch_lsx_vclz_d:
6601	case Intrinsic::loongarch_lasx_xvclz_b:
6602	case Intrinsic::loongarch_lasx_xvclz_h:
6603	case Intrinsic::loongarch_lasx_xvclz_w:
6604	case Intrinsic::loongarch_lasx_xvclz_d:
6605	return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: `0`), Operand: N->getOperand(Num: `1`));
6606	case Intrinsic::loongarch_lsx_vpcnt_b:
6607	case Intrinsic::loongarch_lsx_vpcnt_h:
6608	case Intrinsic::loongarch_lsx_vpcnt_w:
6609	case Intrinsic::loongarch_lsx_vpcnt_d:
6610	case Intrinsic::loongarch_lasx_xvpcnt_b:
6611	case Intrinsic::loongarch_lasx_xvpcnt_h:
6612	case Intrinsic::loongarch_lasx_xvpcnt_w:
6613	case Intrinsic::loongarch_lasx_xvpcnt_d:
6614	return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: `0`), Operand: N->getOperand(Num: `1`));
6615	case Intrinsic::loongarch_lsx_vbitclr_b:
6616	case Intrinsic::loongarch_lsx_vbitclr_h:
6617	case Intrinsic::loongarch_lsx_vbitclr_w:
6618	case Intrinsic::loongarch_lsx_vbitclr_d:
6619	case Intrinsic::loongarch_lasx_xvbitclr_b:
6620	case Intrinsic::loongarch_lasx_xvbitclr_h:
6621	case Intrinsic::loongarch_lasx_xvbitclr_w:
6622	case Intrinsic::loongarch_lasx_xvbitclr_d:
6623	return lowerVectorBitClear(Node: N, DAG);
6624	case Intrinsic::loongarch_lsx_vbitclri_b:
6625	case Intrinsic::loongarch_lasx_xvbitclri_b:
6626	return lowerVectorBitClearImm<`3`>(Node: N, DAG);
6627	case Intrinsic::loongarch_lsx_vbitclri_h:
6628	case Intrinsic::loongarch_lasx_xvbitclri_h:
6629	return lowerVectorBitClearImm<`4`>(Node: N, DAG);
6630	case Intrinsic::loongarch_lsx_vbitclri_w:
6631	case Intrinsic::loongarch_lasx_xvbitclri_w:
6632	return lowerVectorBitClearImm<`5`>(Node: N, DAG);
6633	case Intrinsic::loongarch_lsx_vbitclri_d:
6634	case Intrinsic::loongarch_lasx_xvbitclri_d:
6635	return lowerVectorBitClearImm<`6`>(Node: N, DAG);
6636	case Intrinsic::loongarch_lsx_vbitset_b:
6637	case Intrinsic::loongarch_lsx_vbitset_h:
6638	case Intrinsic::loongarch_lsx_vbitset_w:
6639	case Intrinsic::loongarch_lsx_vbitset_d:
6640	case Intrinsic::loongarch_lasx_xvbitset_b:
6641	case Intrinsic::loongarch_lasx_xvbitset_h:
6642	case Intrinsic::loongarch_lasx_xvbitset_w:
6643	case Intrinsic::loongarch_lasx_xvbitset_d: {
6644	EVT VecTy = N->getValueType(ResNo: `0`);
6645	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VecTy);
6646	return DAG.getNode(
6647	Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: `1`),
6648	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
6649	}
6650	case Intrinsic::loongarch_lsx_vbitseti_b:
6651	case Intrinsic::loongarch_lasx_xvbitseti_b:
6652	return lowerVectorBitSetImm<`3`>(Node: N, DAG);
6653	case Intrinsic::loongarch_lsx_vbitseti_h:
6654	case Intrinsic::loongarch_lasx_xvbitseti_h:
6655	return lowerVectorBitSetImm<`4`>(Node: N, DAG);
6656	case Intrinsic::loongarch_lsx_vbitseti_w:
6657	case Intrinsic::loongarch_lasx_xvbitseti_w:
6658	return lowerVectorBitSetImm<`5`>(Node: N, DAG);
6659	case Intrinsic::loongarch_lsx_vbitseti_d:
6660	case Intrinsic::loongarch_lasx_xvbitseti_d:
6661	return lowerVectorBitSetImm<`6`>(Node: N, DAG);
6662	case Intrinsic::loongarch_lsx_vbitrev_b:
6663	case Intrinsic::loongarch_lsx_vbitrev_h:
6664	case Intrinsic::loongarch_lsx_vbitrev_w:
6665	case Intrinsic::loongarch_lsx_vbitrev_d:
6666	case Intrinsic::loongarch_lasx_xvbitrev_b:
6667	case Intrinsic::loongarch_lasx_xvbitrev_h:
6668	case Intrinsic::loongarch_lasx_xvbitrev_w:
6669	case Intrinsic::loongarch_lasx_xvbitrev_d: {
6670	EVT VecTy = N->getValueType(ResNo: `0`);
6671	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VecTy);
6672	return DAG.getNode(
6673	Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: `1`),
6674	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
6675	}
6676	case Intrinsic::loongarch_lsx_vbitrevi_b:
6677	case Intrinsic::loongarch_lasx_xvbitrevi_b:
6678	return lowerVectorBitRevImm<`3`>(Node: N, DAG);
6679	case Intrinsic::loongarch_lsx_vbitrevi_h:
6680	case Intrinsic::loongarch_lasx_xvbitrevi_h:
6681	return lowerVectorBitRevImm<`4`>(Node: N, DAG);
6682	case Intrinsic::loongarch_lsx_vbitrevi_w:
6683	case Intrinsic::loongarch_lasx_xvbitrevi_w:
6684	return lowerVectorBitRevImm<`5`>(Node: N, DAG);
6685	case Intrinsic::loongarch_lsx_vbitrevi_d:
6686	case Intrinsic::loongarch_lasx_xvbitrevi_d:
6687	return lowerVectorBitRevImm<`6`>(Node: N, DAG);
6688	case Intrinsic::loongarch_lsx_vfadd_s:
6689	case Intrinsic::loongarch_lsx_vfadd_d:
6690	case Intrinsic::loongarch_lasx_xvfadd_s:
6691	case Intrinsic::loongarch_lasx_xvfadd_d:
6692	return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6693	N2: N->getOperand(Num: `2`));
6694	case Intrinsic::loongarch_lsx_vfsub_s:
6695	case Intrinsic::loongarch_lsx_vfsub_d:
6696	case Intrinsic::loongarch_lasx_xvfsub_s:
6697	case Intrinsic::loongarch_lasx_xvfsub_d:
6698	return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6699	N2: N->getOperand(Num: `2`));
6700	case Intrinsic::loongarch_lsx_vfmul_s:
6701	case Intrinsic::loongarch_lsx_vfmul_d:
6702	case Intrinsic::loongarch_lasx_xvfmul_s:
6703	case Intrinsic::loongarch_lasx_xvfmul_d:
6704	return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6705	N2: N->getOperand(Num: `2`));
6706	case Intrinsic::loongarch_lsx_vfdiv_s:
6707	case Intrinsic::loongarch_lsx_vfdiv_d:
6708	case Intrinsic::loongarch_lasx_xvfdiv_s:
6709	case Intrinsic::loongarch_lasx_xvfdiv_d:
6710	return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6711	N2: N->getOperand(Num: `2`));
6712	case Intrinsic::loongarch_lsx_vfmadd_s:
6713	case Intrinsic::loongarch_lsx_vfmadd_d:
6714	case Intrinsic::loongarch_lasx_xvfmadd_s:
6715	case Intrinsic::loongarch_lasx_xvfmadd_d:
6716	return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
6717	N2: N->getOperand(Num: `2`), N3: N->getOperand(Num: `3`));
6718	case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6719	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
6720	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
6721	N3: legalizeIntrinsicImmArg<`4`>(Node: N, ImmOp: `3`, DAG, Subtarget));
6722	case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6723	case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6724	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
6725	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
6726	N3: legalizeIntrinsicImmArg<`3`>(Node: N, ImmOp: `3`, DAG, Subtarget));
6727	case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6728	case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6729	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
6730	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
6731	N3: legalizeIntrinsicImmArg<`2`>(Node: N, ImmOp: `3`, DAG, Subtarget));
6732	case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6733	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
6734	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
6735	N3: legalizeIntrinsicImmArg<`1`>(Node: N, ImmOp: `3`, DAG, Subtarget));
6736	case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6737	case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6738	case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6739	case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6740	case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6741	case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6742	case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6743	case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6744	return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: `0`),
6745	Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
6746	Operand: N->getOperand(Num: `1`)));
6747	case Intrinsic::loongarch_lsx_vreplve_b:
6748	case Intrinsic::loongarch_lsx_vreplve_h:
6749	case Intrinsic::loongarch_lsx_vreplve_w:
6750	case Intrinsic::loongarch_lsx_vreplve_d:
6751	case Intrinsic::loongarch_lasx_xvreplve_b:
6752	case Intrinsic::loongarch_lasx_xvreplve_h:
6753	case Intrinsic::loongarch_lasx_xvreplve_w:
6754	case Intrinsic::loongarch_lasx_xvreplve_d:
6755	return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: `0`),
6756	N1: N->getOperand(Num: `1`),
6757	N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
6758	Operand: N->getOperand(Num: `2`)));
6759	case Intrinsic::loongarch_lsx_vpickve2gr_b:
6760	if (!Subtarget.is64Bit())
6761	return lowerVectorPickVE2GR<`4`>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6762	break;
6763	case Intrinsic::loongarch_lsx_vpickve2gr_h:
6764	case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6765	if (!Subtarget.is64Bit())
6766	return lowerVectorPickVE2GR<`3`>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6767	break;
6768	case Intrinsic::loongarch_lsx_vpickve2gr_w:
6769	if (!Subtarget.is64Bit())
6770	return lowerVectorPickVE2GR<`2`>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6771	break;
6772	case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6773	if (!Subtarget.is64Bit())
6774	return lowerVectorPickVE2GR<`4`>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6775	break;
6776	case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6777	case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6778	if (!Subtarget.is64Bit())
6779	return lowerVectorPickVE2GR<`3`>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6780	break;
6781	case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6782	if (!Subtarget.is64Bit())
6783	return lowerVectorPickVE2GR<`2`>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6784	break;
6785	case Intrinsic::loongarch_lsx_bz_b:
6786	case Intrinsic::loongarch_lsx_bz_h:
6787	case Intrinsic::loongarch_lsx_bz_w:
6788	case Intrinsic::loongarch_lsx_bz_d:
6789	case Intrinsic::loongarch_lasx_xbz_b:
6790	case Intrinsic::loongarch_lasx_xbz_h:
6791	case Intrinsic::loongarch_lasx_xbz_w:
6792	case Intrinsic::loongarch_lasx_xbz_d:
6793	if (!Subtarget.is64Bit())
6794	return DAG.getNode(Opcode: LoongArchISD::VALL_ZERO, DL, VT: N->getValueType(ResNo: `0`),
6795	Operand: N->getOperand(Num: `1`));
6796	break;
6797	case Intrinsic::loongarch_lsx_bz_v:
6798	case Intrinsic::loongarch_lasx_xbz_v:
6799	if (!Subtarget.is64Bit())
6800	return DAG.getNode(Opcode: LoongArchISD::VANY_ZERO, DL, VT: N->getValueType(ResNo: `0`),
6801	Operand: N->getOperand(Num: `1`));
6802	break;
6803	case Intrinsic::loongarch_lsx_bnz_b:
6804	case Intrinsic::loongarch_lsx_bnz_h:
6805	case Intrinsic::loongarch_lsx_bnz_w:
6806	case Intrinsic::loongarch_lsx_bnz_d:
6807	case Intrinsic::loongarch_lasx_xbnz_b:
6808	case Intrinsic::loongarch_lasx_xbnz_h:
6809	case Intrinsic::loongarch_lasx_xbnz_w:
6810	case Intrinsic::loongarch_lasx_xbnz_d:
6811	if (!Subtarget.is64Bit())
6812	return DAG.getNode(Opcode: LoongArchISD::VALL_NONZERO, DL, VT: N->getValueType(ResNo: `0`),
6813	Operand: N->getOperand(Num: `1`));
6814	break;
6815	case Intrinsic::loongarch_lsx_bnz_v:
6816	case Intrinsic::loongarch_lasx_xbnz_v:
6817	if (!Subtarget.is64Bit())
6818	return DAG.getNode(Opcode: LoongArchISD::VANY_NONZERO, DL, VT: N->getValueType(ResNo: `0`),
6819	Operand: N->getOperand(Num: `1`));
6820	break;
6821	case Intrinsic::loongarch_lasx_concat_128_s:
6822	case Intrinsic::loongarch_lasx_concat_128_d:
6823	case Intrinsic::loongarch_lasx_concat_128:
6824	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: N->getValueType(ResNo: `0`),
6825	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`));
6826	}
6827	return SDValue ();
6828	}
6829
6830	static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
6831	TargetLowering::DAGCombinerInfo &DCI,
6832	const LoongArchSubtarget &Subtarget) {
6833	// If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6834	// conversion is unnecessary and can be replaced with the
6835	// MOVFR2GR_S_LA64 operand.
6836	SDValue Op0 = N->getOperand(Num: `0`);
6837	if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6838	return Op0.getOperand(i: `0`);
6839	return SDValue ();
6840	}
6841
6842	static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
6843	TargetLowering::DAGCombinerInfo &DCI,
6844	const LoongArchSubtarget &Subtarget) {
6845	// If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6846	// conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6847	// operand.
6848	SDValue Op0 = N->getOperand(Num: `0`);
6849	if (Op0 ->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6850	assert(Op0.getOperand(`0`).getValueType() == N->getSimpleValueType(`0`) &&
6851	"Unexpected value type!");
6852	return Op0.getOperand(i: `0`);
6853	}
6854	return SDValue ();
6855	}
6856
6857	static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
6858	TargetLowering::DAGCombinerInfo &DCI,
6859	const LoongArchSubtarget &Subtarget) {
6860	MVT VT = N->getSimpleValueType(ResNo: `0`);
6861	unsigned NumBits = VT.getScalarSizeInBits();
6862
6863	// Simplify the inputs.
6864	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6865	APInt DemandedMask(APInt::getAllOnes(numBits: NumBits));
6866	if (TLI.SimplifyDemandedBits(Op: SDValue (N, `0`), DemandedBits: DemandedMask, DCI))
6867	return SDValue (N, `0`);
6868
6869	return SDValue ();
6870	}
6871
6872	static SDValue
6873	performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
6874	TargetLowering::DAGCombinerInfo &DCI,
6875	const LoongArchSubtarget &Subtarget) {
6876	SDValue Op0 = N->getOperand(Num: `0`);
6877	SDLoc DL(N);
6878
6879	// If the input to SplitPairF64 is just BuildPairF64 then the operation is
6880	// redundant. Instead, use BuildPairF64's operands directly.
6881	if (Op0 ->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6882	return DCI.CombineTo(N, Res0: Op0.getOperand(i: `0`), Res1: Op0.getOperand(i: `1`));
6883
6884	if (Op0 ->isUndef()) {
6885	SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
6886	SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
6887	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
6888	}
6889
6890	// It's cheaper to materialise two 32-bit integers than to load a double
6891	// from the constant pool and transfer it to integer registers through the
6892	// stack.
6893	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
6894	APInt V = C->getValueAPF().bitcastToAPInt();
6895	SDValue Lo = DAG.getConstant(Val: V.trunc(width: `32`), DL, VT: MVT::i32);
6896	SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: `32`).trunc(width: `32`), DL, VT: MVT::i32);
6897	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
6898	}
6899
6900	return SDValue ();
6901	}
6902
6903	/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6904	static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
6905	TargetLowering::DAGCombinerInfo &DCI,
6906	const LoongArchSubtarget &Subtarget) {
6907	SDValue N0 = N->getOperand(Num: `0`);
6908	SDValue N1 = N->getOperand(Num: `1`);
6909	MVT VT = N->getSimpleValueType(ResNo: `0`);
6910	SDLoc DL(N);
6911
6912	// VANDN(undef, x) -> 0
6913	// VANDN(x, undef) -> 0
6914	if (N0.isUndef() \|\| N1.isUndef())
6915	return DAG.getConstant(Val: `0`, DL, VT);
6916
6917	// VANDN(0, x) -> x
6918	if (ISD::isBuildVectorAllZeros(N: N0.getNode()))
6919	return N1;
6920
6921	// VANDN(x, 0) -> 0
6922	if (ISD::isBuildVectorAllZeros(N: N1.getNode()))
6923	return DAG.getConstant(Val: `0`, DL, VT);
6924
6925	// VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6926	if (ISD::isBuildVectorAllOnes(N: N1.getNode()))
6927	return DAG.getNOT(DL, Val: N0, VT);
6928
6929	// Turn VANDN back to AND if input is inverted.
6930	if (SDValue Not = isNOT(V: N0, DAG))
6931	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: DAG.getBitcast(VT, V: Not), N2: N1);
6932
6933	// Folds for better commutativity:
6934	if (N1 ->hasOneUse()) {
6935	// VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6936	if (SDValue Not = isNOT(V: N1, DAG))
6937	return DAG.getNOT(
6938	DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)), VT);
6939
6940	// VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6941	// -> NOT(OR(x, SplatVector(-Imm))
6942	// Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6943	// gain benefits.
6944	if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 \|\| VT == MVT::v32i8) &&
6945	N1.getOpcode() == ISD::BUILD_VECTOR) {
6946	if (SDValue SplatValue =
6947	cast<BuildVectorSDNode>(Val: N1.getNode())->getSplatValue()) {
6948	if (!N1 ->isOnlyUserOf(N: SplatValue.getNode()))
6949	return SDValue ();
6950
6951	if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
6952	uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6953	SDValue Not =
6954	DAG.getSplat(VT, DL, Op: DAG.getTargetConstant(Val: NCVal, DL, VT: MVT::i8));
6955	return DAG.getNOT(
6956	DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)),
6957	VT);
6958	}
6959	}
6960	}
6961	}
6962
6963	return SDValue ();
6964	}
6965
6966	static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
6967	TargetLowering::DAGCombinerInfo &DCI,
6968	const LoongArchSubtarget &Subtarget) {
6969	SDLoc DL(N);
6970	EVT VT = N->getValueType(ResNo: `0`);
6971
6972	if (VT != MVT::f32 && VT != MVT::f64)
6973	return SDValue ();
6974	if (VT == MVT::f32 && !Subtarget.hasBasicF())
6975	return SDValue ();
6976	if (VT == MVT::f64 && !Subtarget.hasBasicD())
6977	return SDValue ();
6978
6979	// Only optimize when the source and destination types have the same width.
6980	if (VT.getSizeInBits() != N->getOperand(Num: `0`).getValueSizeInBits())
6981	return SDValue ();
6982
6983	SDValue Src = N->getOperand(Num: `0`);
6984	// If the result of an integer load is only used by an integer-to-float
6985	// conversion, use a fp load instead. This eliminates an integer-to-float-move
6986	// (movgr2fr) instruction.
6987	if (ISD::isNormalLoad(N: Src.getNode()) && Src.hasOneUse() &&
6988	// Do not change the width of a volatile load. This condition check is
6989	// inspired by AArch64.
6990	!cast<LoadSDNode>(Val&: Src)->isVolatile()) {
6991	LoadSDNode *LN0 = cast<LoadSDNode>(Val&: Src);
6992	SDValue Load = DAG.getLoad(VT, dl: DL, Chain: LN0->getChain(), Ptr: LN0->getBasePtr(),
6993	PtrInfo: LN0->getPointerInfo(), Alignment: LN0->getAlign(),
6994	MMOFlags: LN0->getMemOperand()->getFlags());
6995
6996	// Make sure successors of the original load stay after it by updating them
6997	// to use the new Chain.
6998	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN0, `1`), To: Load.getValue(R: `1`));
6999	return DAG.getNode(Opcode: LoongArchISD::SITOF, DL: SDLoc (N), VT, Operand: Load);
7000	}
7001
7002	return SDValue ();
7003	}
7004
7005	// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7006	// logical operations, like in the example below.
7007	// or (and (truncate x, truncate y)),
7008	// (xor (truncate z, build_vector (constants)))
7009	// Given a target type \p VT, we generate
7010	// or (and x, y), (xor z, zext(build_vector (constants)))
7011	// given x, y and z are of type \p VT. We can do so, if operands are either
7012	// truncates from VT types, the second operand is a vector of constants, can
7013	// be recursively promoted or is an existing extension we can extend further.
7014	static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
7015	SelectionDAG &DAG,
7016	const LoongArchSubtarget &Subtarget,
7017	unsigned Depth) {
7018	// Limit recursion to avoid excessive compile times.
7019	if (Depth >= SelectionDAG::MaxRecursionDepth)
7020	return SDValue ();
7021
7022	if (!ISD::isBitwiseLogicOp(Opcode: N.getOpcode()))
7023	return SDValue ();
7024
7025	SDValue N0 = N.getOperand(i: `0`);
7026	SDValue N1 = N.getOperand(i: `1`);
7027
7028	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7029	if (!TLI.isOperationLegalOrPromote(Op: N.getOpcode(), VT))
7030	return SDValue ();
7031
7032	if (SDValue NN0 =
7033	PromoteMaskArithmetic(N: N0, DL, VT, DAG, Subtarget, Depth: Depth + `1`))
7034	N0 = NN0;
7035	else {
7036	// The left side has to be a 'trunc'.
7037	bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7038	N0.getOperand(i: `0`).getValueType() == VT;
7039	if (LHSTrunc)
7040	N0 = N0.getOperand(i: `0`);
7041	else
7042	return SDValue ();
7043	}
7044
7045	if (SDValue NN1 =
7046	PromoteMaskArithmetic(N: N1, DL, VT, DAG, Subtarget, Depth: Depth + `1`))
7047	N1 = NN1;
7048	else {
7049	// The right side has to be a 'trunc', a (foldable) constant or an
7050	// existing extension we can extend further.
7051	bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7052	N1.getOperand(i: `0`).getValueType() == VT;
7053	if (RHSTrunc)
7054	N1 = N1.getOperand(i: `0`);
7055	else if (ISD::isExtVecInRegOpcode(Opcode: N1.getOpcode()) && VT.is256BitVector() &&
7056	Subtarget.hasExtLASX() && N1.hasOneUse())
7057	N1 = DAG.getNode(Opcode: N1.getOpcode(), DL, VT, Operand: N1.getOperand(i: `0`));
7058	// On 32-bit platform, i64 is an illegal integer scalar type, and
7059	// FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7060	// future.
7061	else if (SDValue Cst =
7062	DAG.FoldConstantArithmetic(Opcode: ISD::ZERO_EXTEND, DL, VT, Ops: {N1}))
7063	N1 = Cst;
7064	else
7065	return SDValue ();
7066	}
7067
7068	return DAG.getNode(Opcode: N.getOpcode(), DL, VT, N1: N0, N2: N1);
7069	}
7070
7071	// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7072	// is LSX-sized register. In most cases we actually compare or select LASX-sized
7073	// registers and mixing the two types creates horrible code. This method
7074	// optimizes some of the transition sequences.
7075	static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
7076	SelectionDAG &DAG,
7077	const LoongArchSubtarget &Subtarget) {
7078	EVT VT = N.getValueType();
7079	assert(VT.isVector() && "Expected vector type");
7080	assert((N.getOpcode() == ISD::ANY_EXTEND \|\|
7081	N.getOpcode() == ISD::ZERO_EXTEND \|\|
7082	N.getOpcode() == ISD::SIGN_EXTEND) &&
7083	"Invalid Node");
7084
7085	if (!Subtarget.hasExtLASX() \|\| !VT.is256BitVector())
7086	return SDValue ();
7087
7088	SDValue Narrow = N.getOperand(i: `0`);
7089	EVT NarrowVT = Narrow.getValueType();
7090
7091	// Generate the wide operation.
7092	SDValue Op = PromoteMaskArithmetic(N: Narrow, DL, VT, DAG, Subtarget, Depth: `0`);
7093	if (!Op)
7094	return SDValue ();
7095	switch (N.getOpcode()) {
7096	default:
7097	llvm_unreachable("Unexpected opcode");
7098	case ISD::ANY_EXTEND:
7099	return Op;
7100	case ISD::ZERO_EXTEND:
7101	return DAG.getZeroExtendInReg(Op, DL, VT: NarrowVT);
7102	case ISD::SIGN_EXTEND:
7103	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: Op,
7104	N2: DAG.getValueType(NarrowVT));
7105	}
7106	}
7107
7108	static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG,
7109	TargetLowering::DAGCombinerInfo &DCI,
7110	const LoongArchSubtarget &Subtarget) {
7111	EVT VT = N->getValueType(ResNo: `0`);
7112	SDLoc DL(N);
7113
7114	if (VT.isVector())
7115	if (SDValue R = PromoteMaskArithmetic(N: SDValue (N, `0`), DL, DAG, Subtarget))
7116	return R;
7117
7118	return SDValue ();
7119	}
7120
7121	SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
7122	DAGCombinerInfo &DCI) const {
7123	SelectionDAG &DAG = DCI.DAG;
7124	switch (N->getOpcode()) {
7125	default:
7126	break;
7127	case ISD::AND:
7128	return performANDCombine(N, DAG, DCI, Subtarget);
7129	case ISD::OR:
7130	return performORCombine(N, DAG, DCI, Subtarget);
7131	case ISD::SETCC:
7132	return performSETCCCombine(N, DAG, DCI, Subtarget);
7133	case ISD::SRL:
7134	return performSRLCombine(N, DAG, DCI, Subtarget);
7135	case ISD::BITCAST:
7136	return performBITCASTCombine(N, DAG, DCI, Subtarget);
7137	case ISD::ANY_EXTEND:
7138	case ISD::ZERO_EXTEND:
7139	case ISD::SIGN_EXTEND:
7140	return performEXTENDCombine(N, DAG, DCI, Subtarget);
7141	case ISD::SINT_TO_FP:
7142	return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7143	case LoongArchISD::BITREV_W:
7144	return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7145	case LoongArchISD::BR_CC:
7146	return performBR_CCCombine(N, DAG, DCI, Subtarget);
7147	case LoongArchISD::SELECT_CC:
7148	return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7149	case ISD::INTRINSIC_WO_CHAIN:
7150	return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7151	case LoongArchISD::MOVGR2FR_W_LA64:
7152	return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7153	case LoongArchISD::MOVFR2GR_S_LA64:
7154	return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7155	case LoongArchISD::VMSKLTZ:
7156	case LoongArchISD::XVMSKLTZ:
7157	return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7158	case LoongArchISD::SPLIT_PAIR_F64:
7159	return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7160	case LoongArchISD::VANDN:
7161	return performVANDNCombine(N, DAG, DCI, Subtarget);
7162	}
7163	return SDValue ();
7164	}
7165
7166	static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
7167	MachineBasicBlock *MBB) {
7168	if (!ZeroDivCheck)
7169	return MBB;
7170
7171	// Build instructions:
7172	// MBB:
7173	// div(or mod) $dst, $dividend, $divisor
7174	// bne $divisor, $zero, SinkMBB
7175	// BreakMBB:
7176	// break 7 // BRK_DIVZERO
7177	// SinkMBB:
7178	// fallthrough
7179	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7180	MachineFunction::iterator It = ++MBB->getIterator();
7181	MachineFunction *MF = MBB->getParent();
7182	auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
7183	auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
7184	MF->insert(MBBI: It, MBB: BreakMBB);
7185	MF->insert(MBBI: It, MBB: SinkMBB);
7186
7187	// Transfer the remainder of MBB and its successor edges to SinkMBB.
7188	SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
7189	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
7190
7191	const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7192	DebugLoc DL = MI.getDebugLoc();
7193	MachineOperand &Divisor = MI.getOperand(i: `2`);
7194	Register DivisorReg = Divisor.getReg();
7195
7196	// MBB:
7197	BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE))
7198	.addReg(RegNo: DivisorReg, Flags: getKillRegState(B: Divisor.isKill()))
7199	.addReg(RegNo: LoongArch::R0)
7200	.addMBB(MBB: SinkMBB);
7201	MBB->addSuccessor(Succ: BreakMBB);
7202	MBB->addSuccessor(Succ: SinkMBB);
7203
7204	// BreakMBB:
7205	// See linux header file arch/loongarch/include/uapi/asm/break.h for the
7206	// definition of BRK_DIVZERO.
7207	BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: `7` /BRK_DIVZERO/);
7208	BreakMBB->addSuccessor(Succ: SinkMBB);
7209
7210	// Clear Divisor's kill flag.
7211	Divisor.setIsKill(false);
7212
7213	return SinkMBB;
7214	}
7215
7216	static MachineBasicBlock *
7217	emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
7218	const LoongArchSubtarget &Subtarget) {
7219	unsigned CondOpc;
7220	switch (MI.getOpcode()) {
7221	default:
7222	llvm_unreachable("Unexpected opcode");
7223	case LoongArch::PseudoVBZ:
7224	CondOpc = LoongArch::VSETEQZ_V;
7225	break;
7226	case LoongArch::PseudoVBZ_B:
7227	CondOpc = LoongArch::VSETANYEQZ_B;
7228	break;
7229	case LoongArch::PseudoVBZ_H:
7230	CondOpc = LoongArch::VSETANYEQZ_H;
7231	break;
7232	case LoongArch::PseudoVBZ_W:
7233	CondOpc = LoongArch::VSETANYEQZ_W;
7234	break;
7235	case LoongArch::PseudoVBZ_D:
7236	CondOpc = LoongArch::VSETANYEQZ_D;
7237	break;
7238	case LoongArch::PseudoVBNZ:
7239	CondOpc = LoongArch::VSETNEZ_V;
7240	break;
7241	case LoongArch::PseudoVBNZ_B:
7242	CondOpc = LoongArch::VSETALLNEZ_B;
7243	break;
7244	case LoongArch::PseudoVBNZ_H:
7245	CondOpc = LoongArch::VSETALLNEZ_H;
7246	break;
7247	case LoongArch::PseudoVBNZ_W:
7248	CondOpc = LoongArch::VSETALLNEZ_W;
7249	break;
7250	case LoongArch::PseudoVBNZ_D:
7251	CondOpc = LoongArch::VSETALLNEZ_D;
7252	break;
7253	case LoongArch::PseudoXVBZ:
7254	CondOpc = LoongArch::XVSETEQZ_V;
7255	break;
7256	case LoongArch::PseudoXVBZ_B:
7257	CondOpc = LoongArch::XVSETANYEQZ_B;
7258	break;
7259	case LoongArch::PseudoXVBZ_H:
7260	CondOpc = LoongArch::XVSETANYEQZ_H;
7261	break;
7262	case LoongArch::PseudoXVBZ_W:
7263	CondOpc = LoongArch::XVSETANYEQZ_W;
7264	break;
7265	case LoongArch::PseudoXVBZ_D:
7266	CondOpc = LoongArch::XVSETANYEQZ_D;
7267	break;
7268	case LoongArch::PseudoXVBNZ:
7269	CondOpc = LoongArch::XVSETNEZ_V;
7270	break;
7271	case LoongArch::PseudoXVBNZ_B:
7272	CondOpc = LoongArch::XVSETALLNEZ_B;
7273	break;
7274	case LoongArch::PseudoXVBNZ_H:
7275	CondOpc = LoongArch::XVSETALLNEZ_H;
7276	break;
7277	case LoongArch::PseudoXVBNZ_W:
7278	CondOpc = LoongArch::XVSETALLNEZ_W;
7279	break;
7280	case LoongArch::PseudoXVBNZ_D:
7281	CondOpc = LoongArch::XVSETALLNEZ_D;
7282	break;
7283	}
7284
7285	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7286	const BasicBlock *LLVM_BB = BB->getBasicBlock();
7287	DebugLoc DL = MI.getDebugLoc();
7288	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7289	MachineFunction::iterator It = ++BB->getIterator();
7290
7291	MachineFunction *F = BB->getParent();
7292	MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7293	MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7294	MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7295
7296	F->insert(MBBI: It, MBB: FalseBB);
7297	F->insert(MBBI: It, MBB: TrueBB);
7298	F->insert(MBBI: It, MBB: SinkBB);
7299
7300	// Transfer the remainder of MBB and its successor edges to Sink.
7301	SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
7302	SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
7303
7304	// Insert the real instruction to BB.
7305	Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
7306	BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: `1`).getReg());
7307
7308	// Insert branch.
7309	BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
7310	BB->addSuccessor(Succ: FalseBB);
7311	BB->addSuccessor(Succ: TrueBB);
7312
7313	// FalseBB.
7314	Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7315	BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
7316	.addReg(RegNo: LoongArch::R0)
7317	.addImm(Val: `0`);
7318	BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
7319	FalseBB->addSuccessor(Succ: SinkBB);
7320
7321	// TrueBB.
7322	Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7323	BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
7324	.addReg(RegNo: LoongArch::R0)
7325	.addImm(Val: `1`);
7326	TrueBB->addSuccessor(Succ: SinkBB);
7327
7328	// SinkBB: merge the results.
7329	BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
7330	DestReg: MI.getOperand(i: `0`).getReg())
7331	.addReg(RegNo: RD1)
7332	.addMBB(MBB: FalseBB)
7333	.addReg(RegNo: RD2)
7334	.addMBB(MBB: TrueBB);
7335
7336	// The pseudo instruction is gone now.
7337	MI.eraseFromParent();
7338	return SinkBB;
7339	}
7340
7341	static MachineBasicBlock *
7342	emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
7343	const LoongArchSubtarget &Subtarget) {
7344	unsigned InsOp;
7345	unsigned BroadcastOp;
7346	unsigned HalfSize;
7347	switch (MI.getOpcode()) {
7348	default:
7349	llvm_unreachable("Unexpected opcode");
7350	case LoongArch::PseudoXVINSGR2VR_B:
7351	HalfSize = `16`;
7352	BroadcastOp = LoongArch::XVREPLGR2VR_B;
7353	InsOp = LoongArch::XVEXTRINS_B;
7354	break;
7355	case LoongArch::PseudoXVINSGR2VR_H:
7356	HalfSize = `8`;
7357	BroadcastOp = LoongArch::XVREPLGR2VR_H;
7358	InsOp = LoongArch::XVEXTRINS_H;
7359	break;
7360	}
7361	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7362	const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7363	const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7364	DebugLoc DL = MI.getDebugLoc();
7365	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7366	// XDst = vector_insert XSrc, Elt, Idx
7367	Register XDst = MI.getOperand(i: `0`).getReg();
7368	Register XSrc = MI.getOperand(i: `1`).getReg();
7369	Register Elt = MI.getOperand(i: `2`).getReg();
7370	unsigned Idx = MI.getOperand(i: `3`).getImm();
7371
7372	if (XSrc.isVirtual() && MRI.getVRegDef(Reg: XSrc)->isImplicitDef() &&
7373	Idx < HalfSize) {
7374	Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
7375	Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
7376
7377	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
7378	.addReg(RegNo: XSrc, Flags: {}, SubReg: LoongArch::sub_128);
7379	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7380	MCID: TII->get(Opcode: HalfSize == `8` ? LoongArch::VINSGR2VR_H
7381	: LoongArch::VINSGR2VR_B),
7382	DestReg: ScratchSubReg2)
7383	.addReg(RegNo: ScratchSubReg1)
7384	.addReg(RegNo: Elt)
7385	.addImm(Val: Idx);
7386
7387	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: XDst)
7388	.addReg(RegNo: ScratchSubReg2)
7389	.addImm(Val: LoongArch::sub_128);
7390	} else {
7391	Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
7392	Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
7393
7394	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BroadcastOp), DestReg: ScratchReg1).addReg(RegNo: Elt);
7395
7396	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg2)
7397	.addReg(RegNo: ScratchReg1)
7398	.addReg(RegNo: XSrc)
7399	.addImm(Val: Idx >= HalfSize ? `48` : `18`);
7400
7401	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: XDst)
7402	.addReg(RegNo: XSrc)
7403	.addReg(RegNo: ScratchReg2)
7404	.addImm(Val: (Idx >= HalfSize ? Idx - HalfSize : Idx) * `17`);
7405	}
7406
7407	MI.eraseFromParent();
7408	return BB;
7409	}
7410
7411	static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
7412	MachineBasicBlock *BB,
7413	const LoongArchSubtarget &Subtarget) {
7414	assert(Subtarget.hasExtLSX());
7415	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7416	const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7417	DebugLoc DL = MI.getDebugLoc();
7418	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7419	Register Dst = MI.getOperand(i: `0`).getReg();
7420	Register Src = MI.getOperand(i: `1`).getReg();
7421	Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
7422	Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
7423	Register ScratchReg3 = MRI.createVirtualRegister(RegClass: RC);
7424
7425	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VLDI), DestReg: ScratchReg1).addImm(Val: `0`);
7426	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7427	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7428	: LoongArch::VINSGR2VR_W),
7429	DestReg: ScratchReg2)
7430	.addReg(RegNo: ScratchReg1)
7431	.addReg(RegNo: Src)
7432	.addImm(Val: `0`);
7433	BuildMI(
7434	BB&: *BB, I&: MI, MIMD: DL,
7435	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7436	DestReg: ScratchReg3)
7437	.addReg(RegNo: ScratchReg2);
7438	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7439	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7440	: LoongArch::VPICKVE2GR_W),
7441	DestReg: Dst)
7442	.addReg(RegNo: ScratchReg3)
7443	.addImm(Val: `0`);
7444
7445	MI.eraseFromParent();
7446	return BB;
7447	}
7448
7449	static MachineBasicBlock *
7450	emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
7451	const LoongArchSubtarget &Subtarget) {
7452	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7453	const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7454	const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7455	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7456	Register Dst = MI.getOperand(i: `0`).getReg();
7457	Register Src = MI.getOperand(i: `1`).getReg();
7458	DebugLoc DL = MI.getDebugLoc();
7459	unsigned EleBits = `8`;
7460	unsigned NotOpc = `0`;
7461	unsigned MskOpc;
7462
7463	switch (MI.getOpcode()) {
7464	default:
7465	llvm_unreachable("Unexpected opcode");
7466	case LoongArch::PseudoVMSKLTZ_B:
7467	MskOpc = LoongArch::VMSKLTZ_B;
7468	break;
7469	case LoongArch::PseudoVMSKLTZ_H:
7470	MskOpc = LoongArch::VMSKLTZ_H;
7471	EleBits = `16`;
7472	break;
7473	case LoongArch::PseudoVMSKLTZ_W:
7474	MskOpc = LoongArch::VMSKLTZ_W;
7475	EleBits = `32`;
7476	break;
7477	case LoongArch::PseudoVMSKLTZ_D:
7478	MskOpc = LoongArch::VMSKLTZ_D;
7479	EleBits = `64`;
7480	break;
7481	case LoongArch::PseudoVMSKGEZ_B:
7482	MskOpc = LoongArch::VMSKGEZ_B;
7483	break;
7484	case LoongArch::PseudoVMSKEQZ_B:
7485	MskOpc = LoongArch::VMSKNZ_B;
7486	NotOpc = LoongArch::VNOR_V;
7487	break;
7488	case LoongArch::PseudoVMSKNEZ_B:
7489	MskOpc = LoongArch::VMSKNZ_B;
7490	break;
7491	case LoongArch::PseudoXVMSKLTZ_B:
7492	MskOpc = LoongArch::XVMSKLTZ_B;
7493	RC = &LoongArch::LASX256RegClass;
7494	break;
7495	case LoongArch::PseudoXVMSKLTZ_H:
7496	MskOpc = LoongArch::XVMSKLTZ_H;
7497	RC = &LoongArch::LASX256RegClass;
7498	EleBits = `16`;
7499	break;
7500	case LoongArch::PseudoXVMSKLTZ_W:
7501	MskOpc = LoongArch::XVMSKLTZ_W;
7502	RC = &LoongArch::LASX256RegClass;
7503	EleBits = `32`;
7504	break;
7505	case LoongArch::PseudoXVMSKLTZ_D:
7506	MskOpc = LoongArch::XVMSKLTZ_D;
7507	RC = &LoongArch::LASX256RegClass;
7508	EleBits = `64`;
7509	break;
7510	case LoongArch::PseudoXVMSKGEZ_B:
7511	MskOpc = LoongArch::XVMSKGEZ_B;
7512	RC = &LoongArch::LASX256RegClass;
7513	break;
7514	case LoongArch::PseudoXVMSKEQZ_B:
7515	MskOpc = LoongArch::XVMSKNZ_B;
7516	NotOpc = LoongArch::XVNOR_V;
7517	RC = &LoongArch::LASX256RegClass;
7518	break;
7519	case LoongArch::PseudoXVMSKNEZ_B:
7520	MskOpc = LoongArch::XVMSKNZ_B;
7521	RC = &LoongArch::LASX256RegClass;
7522	break;
7523	}
7524
7525	Register Msk = MRI.createVirtualRegister(RegClass: RC);
7526	if (NotOpc) {
7527	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
7528	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src);
7529	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk)
7530	.addReg(RegNo: Tmp, Flags: RegState::Kill)
7531	.addReg(RegNo: Tmp, Flags: RegState::Kill);
7532	} else {
7533	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src);
7534	}
7535
7536	if (TRI->getRegSizeInBits(RC: *RC) > `128`) {
7537	Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7538	Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7539	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo)
7540	.addReg(RegNo: Msk)
7541	.addImm(Val: `0`);
7542	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi)
7543	.addReg(RegNo: Msk, Flags: RegState::Kill)
7544	.addImm(Val: `4`);
7545	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7546	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7547	: LoongArch::BSTRINS_W),
7548	DestReg: Dst)
7549	.addReg(RegNo: Lo, Flags: RegState::Kill)
7550	.addReg(RegNo: Hi, Flags: RegState::Kill)
7551	.addImm(Val: `256` / EleBits - `1`)
7552	.addImm(Val: `128` / EleBits);
7553	} else {
7554	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst)
7555	.addReg(RegNo: Msk, Flags: RegState::Kill)
7556	.addImm(Val: `0`);
7557	}
7558
7559	MI.eraseFromParent();
7560	return BB;
7561	}
7562
7563	static MachineBasicBlock *
7564	emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
7565	const LoongArchSubtarget &Subtarget) {
7566	assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7567	"Unexpected instruction");
7568
7569	MachineFunction &MF = *BB->getParent();
7570	DebugLoc DL = MI.getDebugLoc();
7571	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7572	Register LoReg = MI.getOperand(i: `0`).getReg();
7573	Register HiReg = MI.getOperand(i: `1`).getReg();
7574	Register SrcReg = MI.getOperand(i: `2`).getReg();
7575
7576	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg);
7577	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg)
7578	.addReg(RegNo: SrcReg, Flags: getKillRegState(B: MI.getOperand(i: `2`).isKill()));
7579	MI.eraseFromParent(); // The pseudo instruction is gone now.
7580	return BB;
7581	}
7582
7583	static MachineBasicBlock *
7584	emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
7585	const LoongArchSubtarget &Subtarget) {
7586	assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7587	"Unexpected instruction");
7588
7589	MachineFunction &MF = *BB->getParent();
7590	DebugLoc DL = MI.getDebugLoc();
7591	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7592	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7593	Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass);
7594	Register DstReg = MI.getOperand(i: `0`).getReg();
7595	Register LoReg = MI.getOperand(i: `1`).getReg();
7596	Register HiReg = MI.getOperand(i: `2`).getReg();
7597
7598	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg)
7599	.addReg(RegNo: LoReg, Flags: getKillRegState(B: MI.getOperand(i: `1`).isKill()));
7600	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg)
7601	.addReg(RegNo: TmpReg, Flags: RegState::Kill)
7602	.addReg(RegNo: HiReg, Flags: getKillRegState(B: MI.getOperand(i: `2`).isKill()));
7603	MI.eraseFromParent(); // The pseudo instruction is gone now.
7604	return BB;
7605	}
7606
7607	static bool isSelectPseudo(MachineInstr &MI) {
7608	switch (MI.getOpcode()) {
7609	default:
7610	return false;
7611	case LoongArch::Select_GPR_Using_CC_GPR:
7612	return true;
7613	}
7614	}
7615
7616	static MachineBasicBlock *
7617	emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
7618	const LoongArchSubtarget &Subtarget) {
7619	// To "insert" Select_ instructions, we actually have to insert the triangle*
7620	// control-flow pattern. The incoming instructions know the destination vreg
7621	// to set, the condition code register to branch on, the true/false values to
7622	// select between, and the condcode to use to select the appropriate branch.
7623	//
7624	// We produce the following control flow:
7625	// HeadMBB
7626	// \| \
7627	// \| IfFalseMBB
7628	// \| /
7629	// TailMBB
7630	//
7631	// When we find a sequence of selects we attempt to optimize their emission
7632	// by sharing the control flow. Currently we only handle cases where we have
7633	// multiple selects with the exact same condition (same LHS, RHS and CC).
7634	// The selects may be interleaved with other instructions if the other
7635	// instructions meet some requirements we deem safe:
7636	// - They are not pseudo instructions.
7637	// - They are debug instructions. Otherwise,
7638	// - They do not have side-effects, do not access memory and their inputs do
7639	// not depend on the results of the select pseudo-instructions.
7640	// The TrueV/FalseV operands of the selects cannot depend on the result of
7641	// previous selects in the sequence.
7642	// These conditions could be further relaxed. See the X86 target for a
7643	// related approach and more information.
7644
7645	Register LHS = MI.getOperand(i: `1`).getReg();
7646	Register RHS;
7647	if (MI.getOperand(i: `2`).isReg())
7648	RHS = MI.getOperand(i: `2`).getReg();
7649	auto CC = static_cast<unsigned>(MI.getOperand(i: `3`).getImm());
7650
7651	SmallVector<MachineInstr *, `4`> SelectDebugValues;
7652	SmallSet<Register, `4`> SelectDests;
7653	SelectDests.insert(V: MI.getOperand(i: `0`).getReg());
7654
7655	MachineInstr *LastSelectPseudo = &MI;
7656	for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator (MI);
7657	SequenceMBBI != E; ++SequenceMBBI) {
7658	if (SequenceMBBI ->isDebugInstr())
7659	continue;
7660	if (isSelectPseudo(MI&: *SequenceMBBI)) {
7661	if (SequenceMBBI ->getOperand(i: `1`).getReg() != LHS \|\|
7662	!SequenceMBBI ->getOperand(i: `2`).isReg() \|\|
7663	SequenceMBBI ->getOperand(i: `2`).getReg() != RHS \|\|
7664	SequenceMBBI ->getOperand(i: `3`).getImm() != CC \|\|
7665	SelectDests.count(V: SequenceMBBI ->getOperand(i: `4`).getReg()) \|\|
7666	SelectDests.count(V: SequenceMBBI ->getOperand(i: `5`).getReg()))
7667	break;
7668	LastSelectPseudo = &*SequenceMBBI;
7669	SequenceMBBI ->collectDebugValues(DbgValues&: SelectDebugValues);
7670	SelectDests.insert(V: SequenceMBBI ->getOperand(i: `0`).getReg());
7671	continue;
7672	}
7673	if (SequenceMBBI ->hasUnmodeledSideEffects() \|\|
7674	SequenceMBBI ->mayLoadOrStore() \|\|
7675	SequenceMBBI ->usesCustomInsertionHook())
7676	break;
7677	if (llvm::any_of(Range: SequenceMBBI ->operands(), P: [&](MachineOperand &MO) {
7678	return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
7679	}))
7680	break;
7681	}
7682
7683	const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7684	const BasicBlock *LLVM_BB = BB->getBasicBlock();
7685	DebugLoc DL = MI.getDebugLoc();
7686	MachineFunction::iterator I = ++BB->getIterator();
7687
7688	MachineBasicBlock *HeadMBB = BB;
7689	MachineFunction *F = BB->getParent();
7690	MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7691	MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7692
7693	F->insert(MBBI: I, MBB: IfFalseMBB);
7694	F->insert(MBBI: I, MBB: TailMBB);
7695
7696	// Set the call frame size on entry to the new basic blocks.
7697	unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
7698	IfFalseMBB->setCallFrameSize(CallFrameSize);
7699	TailMBB->setCallFrameSize(CallFrameSize);
7700
7701	// Transfer debug instructions associated with the selects to TailMBB.
7702	for (MachineInstr *DebugInstr : SelectDebugValues) {
7703	TailMBB->push_back(MI: DebugInstr->removeFromParent());
7704	}
7705
7706	// Move all instructions after the sequence to TailMBB.
7707	TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
7708	From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
7709	// Update machine-CFG edges by transferring all successors of the current
7710	// block to the new block which will contain the Phi nodes for the selects.
7711	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
7712	// Set the successors for HeadMBB.
7713	HeadMBB->addSuccessor(Succ: IfFalseMBB);
7714	HeadMBB->addSuccessor(Succ: TailMBB);
7715
7716	// Insert appropriate branch.
7717	if (MI.getOperand(i: `2`).isImm())
7718	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC))
7719	.addReg(RegNo: LHS)
7720	.addImm(Val: MI.getOperand(i: `2`).getImm())
7721	.addMBB(MBB: TailMBB);
7722	else
7723	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB);
7724
7725	// IfFalseMBB just falls through to TailMBB.
7726	IfFalseMBB->addSuccessor(Succ: TailMBB);
7727
7728	// Create PHIs for all of the select pseudo-instructions.
7729	auto SelectMBBI = MI.getIterator();
7730	auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
7731	auto InsertionPoint = TailMBB->begin();
7732	while (SelectMBBI != SelectEnd) {
7733	auto Next = std::next(x: SelectMBBI);
7734	if (isSelectPseudo(MI&: *SelectMBBI)) {
7735	// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7736	BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI ->getDebugLoc(),
7737	MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI ->getOperand(i: `0`).getReg())
7738	.addReg(RegNo: SelectMBBI ->getOperand(i: `4`).getReg())
7739	.addMBB(MBB: HeadMBB)
7740	.addReg(RegNo: SelectMBBI ->getOperand(i: `5`).getReg())
7741	.addMBB(MBB: IfFalseMBB);
7742	SelectMBBI ->eraseFromParent();
7743	}
7744	SelectMBBI = Next;
7745	}
7746
7747	F->getProperties().resetNoPHIs();
7748	return TailMBB;
7749	}
7750
7751	MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7752	MachineInstr &MI, MachineBasicBlock BB) const* {
7753	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7754	DebugLoc DL = MI.getDebugLoc();
7755
7756	switch (MI.getOpcode()) {
7757	default:
7758	llvm_unreachable("Unexpected instr type to insert");
7759	case LoongArch::DIV_W:
7760	case LoongArch::DIV_WU:
7761	case LoongArch::MOD_W:
7762	case LoongArch::MOD_WU:
7763	case LoongArch::DIV_D:
7764	case LoongArch::DIV_DU:
7765	case LoongArch::MOD_D:
7766	case LoongArch::MOD_DU:
7767	return insertDivByZeroTrap(MI, MBB: BB);
7768	break;
7769	case LoongArch::WRFCSR: {
7770	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
7771	DestReg: LoongArch::FCSR0 + MI.getOperand(i: `0`).getImm())
7772	.addReg(RegNo: MI.getOperand(i: `1`).getReg());
7773	MI.eraseFromParent();
7774	return BB;
7775	}
7776	case LoongArch::RDFCSR: {
7777	MachineInstr *ReadFCSR =
7778	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
7779	DestReg: MI.getOperand(i: `0`).getReg())
7780	.addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: `1`).getImm());
7781	ReadFCSR->getOperand(i: `1`).setIsUndef();
7782	MI.eraseFromParent();
7783	return BB;
7784	}
7785	case LoongArch::Select_GPR_Using_CC_GPR:
7786	return emitSelectPseudo(MI, BB, Subtarget);
7787	case LoongArch::BuildPairF64Pseudo:
7788	return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7789	case LoongArch::SplitPairF64Pseudo:
7790	return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7791	case LoongArch::PseudoVBZ:
7792	case LoongArch::PseudoVBZ_B:
7793	case LoongArch::PseudoVBZ_H:
7794	case LoongArch::PseudoVBZ_W:
7795	case LoongArch::PseudoVBZ_D:
7796	case LoongArch::PseudoVBNZ:
7797	case LoongArch::PseudoVBNZ_B:
7798	case LoongArch::PseudoVBNZ_H:
7799	case LoongArch::PseudoVBNZ_W:
7800	case LoongArch::PseudoVBNZ_D:
7801	case LoongArch::PseudoXVBZ:
7802	case LoongArch::PseudoXVBZ_B:
7803	case LoongArch::PseudoXVBZ_H:
7804	case LoongArch::PseudoXVBZ_W:
7805	case LoongArch::PseudoXVBZ_D:
7806	case LoongArch::PseudoXVBNZ:
7807	case LoongArch::PseudoXVBNZ_B:
7808	case LoongArch::PseudoXVBNZ_H:
7809	case LoongArch::PseudoXVBNZ_W:
7810	case LoongArch::PseudoXVBNZ_D:
7811	return emitVecCondBranchPseudo(MI, BB, Subtarget);
7812	case LoongArch::PseudoXVINSGR2VR_B:
7813	case LoongArch::PseudoXVINSGR2VR_H:
7814	return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7815	case LoongArch::PseudoCTPOP:
7816	return emitPseudoCTPOP(MI, BB, Subtarget);
7817	case LoongArch::PseudoVMSKLTZ_B:
7818	case LoongArch::PseudoVMSKLTZ_H:
7819	case LoongArch::PseudoVMSKLTZ_W:
7820	case LoongArch::PseudoVMSKLTZ_D:
7821	case LoongArch::PseudoVMSKGEZ_B:
7822	case LoongArch::PseudoVMSKEQZ_B:
7823	case LoongArch::PseudoVMSKNEZ_B:
7824	case LoongArch::PseudoXVMSKLTZ_B:
7825	case LoongArch::PseudoXVMSKLTZ_H:
7826	case LoongArch::PseudoXVMSKLTZ_W:
7827	case LoongArch::PseudoXVMSKLTZ_D:
7828	case LoongArch::PseudoXVMSKGEZ_B:
7829	case LoongArch::PseudoXVMSKEQZ_B:
7830	case LoongArch::PseudoXVMSKNEZ_B:
7831	return emitPseudoVMSKCOND(MI, BB, Subtarget);
7832	case TargetOpcode::STATEPOINT:
7833	// STATEPOINT is a pseudo instruction which has no implicit defs/uses
7834	// while bl call instruction (where statepoint will be lowered at the
7835	// end) has implicit def. This def is early-clobber as it will be set at
7836	// the moment of the call and earlier than any use is read.
7837	// Add this implicit dead def here as a workaround.
7838	MI.addOperand(MF&: *MI.getMF(),
7839	Op: MachineOperand::CreateReg(
7840	Reg: LoongArch::R1, /isDef/ true,
7841	/isImp/ true, /isKill/ false, /isDead/ true,
7842	/isUndef/ false, /isEarlyClobber/ true));
7843	if (!Subtarget.is64Bit())
7844	report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets");
7845	return emitPatchPoint(MI, MBB: BB);
7846	}
7847	}
7848
7849	bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
7850	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7851	unsigned Fast) const* {
7852	if (!Subtarget.hasUAL())
7853	return false;
7854
7855	// TODO: set reasonable speed number.
7856	if (Fast)
7857	*Fast = `1`;
7858	return true;
7859	}
7860
7861	//===----------------------------------------------------------------------===//
7862	// Calling Convention Implementation
7863	//===----------------------------------------------------------------------===//
7864
7865	// Eight general-purpose registers a0-a7 used for passing integer arguments,
7866	// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7867	// fixed-point arguments, and floating-point arguments when no FPR is available
7868	// or with soft float ABI.
7869	const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7870	LoongArch::R7, LoongArch::R8, LoongArch::R9,
7871	LoongArch::R10, LoongArch::R11};
7872
7873	// PreserveNone calling convention:
7874	// Arguments may be passed in any general-purpose registers except:
7875	// - R1 : return address register
7876	// - R22 : frame pointer
7877	// - R31 : base pointer
7878	//
7879	// All general-purpose registers are treated as caller-saved,
7880	// except R1 (RA) and R22 (FP).
7881	//
7882	// Non-volatile registers are allocated first so that a function
7883	// can call normal functions without having to spill and reload
7884	// argument registers.
7885	const MCPhysReg PreserveNoneArgGPRs[] = {
7886	LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7887	LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7888	LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7889	LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7890	LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7891	LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7892	LoongArch::R20};
7893
7894	// Eight floating-point registers fa0-fa7 used for passing floating-point
7895	// arguments, and fa0-fa1 are also used to return values.
7896	const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7897	LoongArch::F3, LoongArch::F4, LoongArch::F5,
7898	LoongArch::F6, LoongArch::F7};
7899	// FPR32 and FPR64 alias each other.
7900	const MCPhysReg ArgFPR64s[] = {
7901	LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7902	LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7903
7904	const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7905	LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7906	LoongArch::VR6, LoongArch::VR7};
7907
7908	const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7909	LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7910	LoongArch::XR6, LoongArch::XR7};
7911
7912	static Register allocateArgGPR(CCState &State) {
7913	switch (State.getCallingConv()) {
7914	case CallingConv::PreserveNone:
7915	if (!State.isVarArg())
7916	return State.AllocateReg(Regs: PreserveNoneArgGPRs);
7917	[[fallthrough]];
7918	default:
7919	return State.AllocateReg(Regs: ArgGPRs);
7920	}
7921	}
7922
7923	// Pass a 2GRLen argument that has been split into two GRLen values through*
7924	// registers or the stack as necessary.
7925	static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7926	CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7927	unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7928	ISD::ArgFlagsTy ArgFlags2) {
7929	unsigned GRLenInBytes = GRLen / `8`;
7930	if (Register Reg = allocateArgGPR(State)) {
7931	// At least one half can be passed via register.
7932	State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg,
7933	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
7934	} else {
7935	// Both halves must be passed on the stack, with proper alignment.
7936	Align StackAlign =
7937	std::max(a: Align (GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
7938	State.addLoc(
7939	V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
7940	Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
7941	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
7942	State.addLoc(V: CCValAssign::getMem(
7943	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align (GRLenInBytes)),
7944	LocVT: LocVT2, HTP: CCValAssign::Full));
7945	return false;
7946	}
7947	if (Register Reg = allocateArgGPR(State)) {
7948	// The second half can also be passed via register.
7949	State.addLoc(
7950	V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
7951	} else {
7952	// The second half is passed via the stack, without additional alignment.
7953	State.addLoc(V: CCValAssign::getMem(
7954	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align (GRLenInBytes)),
7955	LocVT: LocVT2, HTP: CCValAssign::Full));
7956	}
7957	return false;
7958	}
7959
7960	// Implements the LoongArch calling convention. Returns true upon failure.
7961	static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
7962	unsigned ValNo, MVT ValVT,
7963	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7964	CCState &State, bool IsRet, Type *OrigTy) {
7965	unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7966	assert((GRLen == `32` \|\| GRLen == `64`) && "Unspport GRLen");
7967	MVT GRLenVT = GRLen == `32` ? MVT::i32 : MVT::i64;
7968	MVT LocVT = ValVT;
7969
7970	// Any return value split into more than two values can't be returned
7971	// directly.
7972	if (IsRet && ValNo > `1`)
7973	return true;
7974
7975	// If passing a variadic argument, or if no FPR is available.
7976	bool UseGPRForFloat = true;
7977
7978	switch (ABI) {
7979	default:
7980	llvm_unreachable("Unexpected ABI");
7981	break;
7982	case LoongArchABI::ABI_ILP32F:
7983	case LoongArchABI::ABI_LP64F:
7984	case LoongArchABI::ABI_ILP32D:
7985	case LoongArchABI::ABI_LP64D:
7986	UseGPRForFloat = ArgFlags.isVarArg();
7987	break;
7988	case LoongArchABI::ABI_ILP32S:
7989	case LoongArchABI::ABI_LP64S:
7990	break;
7991	}
7992
7993	// If this is a variadic argument, the LoongArch calling convention requires
7994	// that it is assigned an 'even' or 'aligned' register if it has (2GRLen)/8*
7995	// byte alignment. An aligned register should be used regardless of whether
7996	// the original argument was split during legalisation or not. The argument
7997	// will not be passed by registers if the original type is larger than
7998	// 2GRLen, so the register alignment rule does not apply.*
7999	unsigned TwoGRLenInBytes = (`2` * GRLen) / `8`;
8000	if (ArgFlags.isVarArg() &&
8001	ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8002	DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
8003	unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
8004	// Skip 'odd' register if necessary.
8005	if (RegIdx != std::size(ArgGPRs) && RegIdx % `2` == `1`)
8006	State.AllocateReg(Regs: ArgGPRs);
8007	}
8008
8009	SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8010	SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8011	State.getPendingArgFlags();
8012
8013	assert(PendingLocs.size() == PendingArgFlags.size() &&
8014	"PendingLocs and PendingArgFlags out of sync");
8015
8016	// FPR32 and FPR64 alias each other.
8017	if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
8018	UseGPRForFloat = true;
8019
8020	if (UseGPRForFloat && ValVT == MVT::f32) {
8021	LocVT = GRLenVT;
8022	LocInfo = CCValAssign::BCvt;
8023	} else if (UseGPRForFloat && GRLen == `64` && ValVT == MVT::f64) {
8024	LocVT = MVT::i64;
8025	LocInfo = CCValAssign::BCvt;
8026	} else if (UseGPRForFloat && GRLen == `32` && ValVT == MVT::f64) {
8027	// Handle passing f64 on LA32D with a soft float ABI or when floating point
8028	// registers are exhausted.
8029	assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8030	// Depending on available argument GPRS, f64 may be passed in a pair of
8031	// GPRs, split between a GPR and the stack, or passed completely on the
8032	// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8033	// cases.
8034	MCRegister Reg = allocateArgGPR(State);
8035	if (!Reg) {
8036	int64_t StackOffset = State.AllocateStack(Size: `8`, Alignment: Align (`8`));
8037	State.addLoc(
8038	V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8039	return false;
8040	}
8041	LocVT = MVT::i32;
8042	State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8043	MCRegister HiReg = allocateArgGPR(State);
8044	if (HiReg) {
8045	State.addLoc(
8046	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo));
8047	} else {
8048	int64_t StackOffset = State.AllocateStack(Size: `4`, Alignment: Align (`4`));
8049	State.addLoc(
8050	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8051	}
8052	return false;
8053	}
8054
8055	// Split arguments might be passed indirectly, so keep track of the pending
8056	// values.
8057	if (ValVT.isScalarInteger() && (ArgFlags.isSplit() \|\| !PendingLocs.empty())) {
8058	LocVT = GRLenVT;
8059	LocInfo = CCValAssign::Indirect;
8060	PendingLocs.push_back(
8061	Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
8062	PendingArgFlags.push_back(Elt: ArgFlags);
8063	if (!ArgFlags.isSplitEnd()) {
8064	return false;
8065	}
8066	}
8067
8068	// If the split argument only had two elements, it should be passed directly
8069	// in registers or on the stack.
8070	if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8071	PendingLocs.size() <= `2`) {
8072	assert(PendingLocs.size() == `2` && "Unexpected PendingLocs.size()");
8073	// Apply the normal calling convention rules to the first half of the
8074	// split argument.
8075	CCValAssign VA = PendingLocs [`0`];
8076	ISD::ArgFlagsTy AF = PendingArgFlags [`0`];
8077	PendingLocs.clear();
8078	PendingArgFlags.clear();
8079	return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
8080	ArgFlags2: ArgFlags);
8081	}
8082
8083	// Allocate to a register if possible, or else a stack slot.
8084	Register Reg;
8085	unsigned StoreSizeBytes = GRLen / `8`;
8086	Align StackAlign = Align (GRLen / `8`);
8087
8088	if (ValVT == MVT::f32 && !UseGPRForFloat) {
8089	Reg = State.AllocateReg(Regs: ArgFPR32s);
8090	} else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8091	Reg = State.AllocateReg(Regs: ArgFPR64s);
8092	} else if (ValVT.is128BitVector()) {
8093	Reg = State.AllocateReg(Regs: ArgVRs);
8094	UseGPRForFloat = false;
8095	StoreSizeBytes = `16`;
8096	StackAlign = Align (`16`);
8097	} else if (ValVT.is256BitVector()) {
8098	Reg = State.AllocateReg(Regs: ArgXRs);
8099	UseGPRForFloat = false;
8100	StoreSizeBytes = `32`;
8101	StackAlign = Align (`32`);
8102	} else {
8103	Reg = allocateArgGPR(State);
8104	}
8105
8106	unsigned StackOffset =
8107	Reg ? `0` : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
8108
8109	// If we reach this point and PendingLocs is non-empty, we must be at the
8110	// end of a split argument that must be passed indirectly.
8111	if (!PendingLocs.empty()) {
8112	assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8113	assert(PendingLocs.size() > `2` && "Unexpected PendingLocs.size()");
8114	for (auto &It : PendingLocs) {
8115	if (Reg)
8116	It.convertToReg(Reg);
8117	else
8118	It.convertToMem(Offset: StackOffset);
8119	State.addLoc(V: It);
8120	}
8121	PendingLocs.clear();
8122	PendingArgFlags.clear();
8123	return false;
8124	}
8125	assert((!UseGPRForFloat \|\| LocVT == GRLenVT) &&
8126	"Expected an GRLenVT at this stage");
8127
8128	if (Reg) {
8129	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8130	return false;
8131	}
8132
8133	// When a floating-point value is passed on the stack, no bit-cast is needed.
8134	if (ValVT.isFloatingPoint()) {
8135	LocVT = ValVT;
8136	LocInfo = CCValAssign::Full;
8137	}
8138
8139	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8140	return false;
8141	}
8142
8143	void LoongArchTargetLowering::analyzeInputArgs(
8144	MachineFunction &MF, CCState &CCInfo,
8145	const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8146	LoongArchCCAssignFn Fn) const {
8147	FunctionType *FType = MF.getFunction().getFunctionType();
8148	for (unsigned i = `0`, e = Ins.size(); i != e; ++i) {
8149	MVT ArgVT = Ins [i].VT;
8150	Type ArgTy = nullptr*;
8151	if (IsRet)
8152	ArgTy = FType->getReturnType();
8153	else if (Ins [i].isOrigArg())
8154	ArgTy = FType->getParamType(i: Ins [i].getOrigArgIndex());
8155	LoongArchABI::ABI ABI =
8156	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8157	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins [i].Flags,
8158	CCInfo, IsRet, ArgTy)) {
8159	LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8160	<< `'\n'`);
8161	llvm_unreachable("");
8162	}
8163	}
8164	}
8165
8166	void LoongArchTargetLowering::analyzeOutputArgs(
8167	MachineFunction &MF, CCState &CCInfo,
8168	const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8169	CallLoweringInfo CLI, LoongArchCCAssignFn Fn) const* {
8170	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
8171	MVT ArgVT = Outs [i].VT;
8172	Type OrigTy = CLI ? CLI->getArgs()[Outs [i].OrigArgIndex].Ty : nullptr*;
8173	LoongArchABI::ABI ABI =
8174	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8175	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs [i].Flags,
8176	CCInfo, IsRet, OrigTy)) {
8177	LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8178	<< "\n");
8179	llvm_unreachable("");
8180	}
8181	}
8182	}
8183
8184	// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8185	// values.
8186	static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
8187	const CCValAssign &VA, const SDLoc &DL) {
8188	switch (VA.getLocInfo()) {
8189	default:
8190	llvm_unreachable("Unexpected CCValAssign::LocInfo");
8191	case CCValAssign::Full:
8192	case CCValAssign::Indirect:
8193	break;
8194	case CCValAssign::BCvt:
8195	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8196	Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
8197	else
8198	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
8199	break;
8200	}
8201	return Val;
8202	}
8203
8204	static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
8205	const CCValAssign &VA, const SDLoc &DL,
8206	const ISD::InputArg &In,
8207	const LoongArchTargetLowering &TLI) {
8208	MachineFunction &MF = DAG.getMachineFunction();
8209	MachineRegisterInfo &RegInfo = MF.getRegInfo();
8210	EVT LocVT = VA.getLocVT();
8211	SDValue Val;
8212	const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
8213	Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
8214	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
8215	Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
8216
8217	// If input is sign extended from 32 bits, note it for the OptW pass.
8218	if (In.isOrigArg()) {
8219	Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
8220	if (OrigArg->getType()->isIntegerTy()) {
8221	unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8222	// An input zero extended from i31 can also be considered sign extended.
8223	if ((BitWidth <= `32` && In.Flags.isSExt()) \|\|
8224	(BitWidth < `32` && In.Flags.isZExt())) {
8225	LoongArchMachineFunctionInfo *LAFI =
8226	MF.getInfo<LoongArchMachineFunctionInfo>();
8227	LAFI->addSExt32Register(Reg: VReg);
8228	}
8229	}
8230	}
8231
8232	return convertLocVTToValVT(DAG, Val, VA, DL);
8233	}
8234
8235	// The caller is responsible for loading the full value if the argument is
8236	// passed with CCValAssign::Indirect.
8237	static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
8238	const CCValAssign &VA, const SDLoc &DL) {
8239	MachineFunction &MF = DAG.getMachineFunction();
8240	MachineFrameInfo &MFI = MF.getFrameInfo();
8241	EVT ValVT = VA.getValVT();
8242	int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
8243	/IsImmutable=/true);
8244	SDValue FIN = DAG.getFrameIndex(
8245	FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: `0`)));
8246
8247	ISD::LoadExtType ExtType;
8248	switch (VA.getLocInfo()) {
8249	default:
8250	llvm_unreachable("Unexpected CCValAssign::LocInfo");
8251	case CCValAssign::Full:
8252	case CCValAssign::Indirect:
8253	case CCValAssign::BCvt:
8254	ExtType = ISD::NON_EXTLOAD;
8255	break;
8256	}
8257	return DAG.getExtLoad(
8258	ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
8259	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
8260	}
8261
8262	static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
8263	const CCValAssign &VA,
8264	const CCValAssign &HiVA,
8265	const SDLoc &DL) {
8266	assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8267	"Unexpected VA");
8268	MachineFunction &MF = DAG.getMachineFunction();
8269	MachineFrameInfo &MFI = MF.getFrameInfo();
8270	MachineRegisterInfo &RegInfo = MF.getRegInfo();
8271
8272	assert(VA.isRegLoc() && "Expected register VA assignment");
8273
8274	Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8275	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
8276	SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
8277	SDValue Hi;
8278	if (HiVA.isMemLoc()) {
8279	// Second half of f64 is passed on the stack.
8280	int FI = MFI.CreateFixedObject(Size: `4`, SPOffset: HiVA.getLocMemOffset(),
8281	/IsImmutable=/true);
8282	SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
8283	Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
8284	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
8285	} else {
8286	// Second half of f64 is passed in another GPR.
8287	Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8288	RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
8289	Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
8290	}
8291	return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
8292	}
8293
8294	static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
8295	const CCValAssign &VA, const SDLoc &DL) {
8296	EVT LocVT = VA.getLocVT();
8297
8298	switch (VA.getLocInfo()) {
8299	default:
8300	llvm_unreachable("Unexpected CCValAssign::LocInfo");
8301	case CCValAssign::Full:
8302	break;
8303	case CCValAssign::BCvt:
8304	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8305	Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
8306	else
8307	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
8308	break;
8309	}
8310	return Val;
8311	}
8312
8313	static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8314	CCValAssign::LocInfo LocInfo,
8315	ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8316	CCState &State) {
8317	if (LocVT == MVT::i32 \|\| LocVT == MVT::i64) {
8318	// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8319	// s0 s1 s2 s3 s4 s5 s6 s7 s8
8320	static const MCPhysReg GPRList[] = {
8321	LoongArch::R23, LoongArch::R24, LoongArch::R25,
8322	LoongArch::R26, LoongArch::R27, LoongArch::R28,
8323	LoongArch::R29, LoongArch::R30, LoongArch::R31};
8324	if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) {
8325	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8326	return false;
8327	}
8328	}
8329
8330	if (LocVT == MVT::f32) {
8331	// Pass in STG registers: F1, F2, F3, F4
8332	// fs0,fs1,fs2,fs3
8333	static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8334	LoongArch::F26, LoongArch::F27};
8335	if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) {
8336	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8337	return false;
8338	}
8339	}
8340
8341	if (LocVT == MVT::f64) {
8342	// Pass in STG registers: D1, D2, D3, D4
8343	// fs4,fs5,fs6,fs7
8344	static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8345	LoongArch::F30_64, LoongArch::F31_64};
8346	if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) {
8347	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8348	return false;
8349	}
8350	}
8351
8352	report_fatal_error(reason: "No registers left in GHC calling convention");
8353	return true;
8354	}
8355
8356	// Transform physical registers into virtual registers.
8357	SDValue LoongArchTargetLowering::LowerFormalArguments(
8358	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8359	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8360	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8361
8362	MachineFunction &MF = DAG.getMachineFunction();
8363	auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8364
8365	switch (CallConv) {
8366	default:
8367	llvm_unreachable("Unsupported calling convention");
8368	case CallingConv::C:
8369	case CallingConv::Fast:
8370	case CallingConv::PreserveNone:
8371	case CallingConv::PreserveMost:
8372	break;
8373	case CallingConv::GHC:
8374	if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) \|\|
8375	!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
8376	report_fatal_error(
8377	reason: "GHC calling convention requires the F and D extensions");
8378	}
8379
8380	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8381	MVT GRLenVT = Subtarget.getGRLenVT();
8382	unsigned GRLenInBytes = Subtarget.getGRLen() / `8`;
8383	// Used with varargs to acumulate store chains.
8384	std::vector<SDValue> OutChains;
8385
8386	// Assign locations to all of the incoming arguments.
8387	SmallVector<CCValAssign> ArgLocs;
8388	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8389
8390	if (CallConv == CallingConv::GHC)
8391	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
8392	else
8393	analyzeInputArgs(MF, CCInfo, Ins, /IsRet=/false, Fn: CC_LoongArch);
8394
8395	for (unsigned i = `0`, e = ArgLocs.size(), InsIdx = `0`; i != e; ++i, ++InsIdx) {
8396	CCValAssign &VA = ArgLocs [i];
8397	SDValue ArgValue;
8398	// Passing f64 on LA32D with a soft float ABI must be handled as a special
8399	// case.
8400	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8401	assert(VA.needsCustom());
8402	ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs [++i], DL);
8403	} else if (VA.isRegLoc())
8404	ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins [InsIdx], TLI: *this);
8405	else
8406	ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8407	if (VA.getLocInfo() == CCValAssign::Indirect) {
8408	// If the original argument was split and passed by reference, we need to
8409	// load all parts of it here (using the same address).
8410	InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
8411	PtrInfo: MachinePointerInfo ()));
8412	unsigned ArgIndex = Ins [InsIdx].OrigArgIndex;
8413	unsigned ArgPartOffset = Ins [InsIdx].PartOffset;
8414	assert(ArgPartOffset == `0`);
8415	while (i + `1` != e && Ins [InsIdx + `1`].OrigArgIndex == ArgIndex) {
8416	CCValAssign &PartVA = ArgLocs [i + `1`];
8417	unsigned PartOffset = Ins [InsIdx + `1`].PartOffset - ArgPartOffset;
8418	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
8419	SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
8420	InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
8421	PtrInfo: MachinePointerInfo ()));
8422	++i;
8423	++InsIdx;
8424	}
8425	continue;
8426	}
8427	InVals.push_back(Elt: ArgValue);
8428	if (Ins [InsIdx].Flags.isByVal())
8429	LoongArchFI->addIncomingByValArgs(Val: ArgValue);
8430	}
8431
8432	if (IsVarArg) {
8433	ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
8434	unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
8435	const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8436	MachineFrameInfo &MFI = MF.getFrameInfo();
8437	MachineRegisterInfo &RegInfo = MF.getRegInfo();
8438
8439	// Offset of the first variable argument from stack pointer, and size of
8440	// the vararg save area. For now, the varargs save area is either zero or
8441	// large enough to hold a0-a7.
8442	int VaArgOffset, VarArgsSaveSize;
8443
8444	// If all registers are allocated, then all varargs must be passed on the
8445	// stack and we don't need to save any argregs.
8446	if (ArgRegs.size() == Idx) {
8447	VaArgOffset = CCInfo.getStackSize();
8448	VarArgsSaveSize = `0`;
8449	} else {
8450	VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8451	VaArgOffset = -VarArgsSaveSize;
8452	}
8453
8454	// Record the frame index of the first variable argument
8455	// which is a value necessary to VASTART.
8456	int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
8457	LoongArchFI->setVarArgsFrameIndex(FI);
8458
8459	// If saving an odd number of registers then create an extra stack slot to
8460	// ensure that the frame pointer is 2GRLen-aligned, which in turn ensures*
8461	// offsets to even-numbered registered remain 2GRLen-aligned.*
8462	if (Idx % `2`) {
8463	MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
8464	IsImmutable: true);
8465	VarArgsSaveSize += GRLenInBytes;
8466	}
8467
8468	// Copy the integer registers that may have been used for passing varargs
8469	// to the vararg save area.
8470	for (unsigned I = Idx; I < ArgRegs.size();
8471	++I, VaArgOffset += GRLenInBytes) {
8472	const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
8473	RegInfo.addLiveIn(Reg: ArgRegs [I], vreg: Reg);
8474	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
8475	FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
8476	SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
8477	SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
8478	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
8479	cast<StoreSDNode>(Val: Store.getNode())
8480	->getMemOperand()
8481	->setValue((Value )nullptr*);
8482	OutChains.push_back(x: Store);
8483	}
8484	LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8485	}
8486
8487	LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8488
8489	// All stores are grouped in one node to allow the matching between
8490	// the size of Ins and InVals. This only happens for vararg functions.
8491	if (!OutChains.empty()) {
8492	OutChains.push_back(x: Chain);
8493	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
8494	}
8495
8496	return Chain;
8497	}
8498
8499	bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
8500	return CI->isTailCall();
8501	}
8502
8503	// Check if the return value is used as only a return value, as otherwise
8504	// we can't perform a tail-call.
8505	bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
8506	SDValue &Chain) const {
8507	if (N->getNumValues() != `1`)
8508	return false;
8509	if (!N->hasNUsesOfValue(NUses: `1`, Value: `0`))
8510	return false;
8511
8512	SDNode Copy = N->user_begin();
8513	if (Copy->getOpcode() != ISD::CopyToReg)
8514	return false;
8515
8516	// If the ISD::CopyToReg has a glue operand, we conservatively assume it
8517	// isn't safe to perform a tail call.
8518	if (Copy->getGluedNode())
8519	return false;
8520
8521	// The copy must be used by a LoongArchISD::RET, and nothing else.
8522	bool HasRet = false;
8523	for (SDNode *Node : Copy->users()) {
8524	if (Node->getOpcode() != LoongArchISD::RET)
8525	return false;
8526	HasRet = true;
8527	}
8528
8529	if (!HasRet)
8530	return false;
8531
8532	Chain = Copy->getOperand(Num: `0`);
8533	return true;
8534	}
8535
8536	// Check whether the call is eligible for tail call optimization.
8537	bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8538	CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8539	const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8540
8541	auto CalleeCC = CLI.CallConv;
8542	auto &Outs = CLI.Outs;
8543	auto &Caller = MF.getFunction();
8544	auto CallerCC = Caller.getCallingConv();
8545	auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8546
8547	// If the stack arguments for this call do not fit into our own save area then
8548	// the call cannot be made tail.
8549	if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8550	return false;
8551
8552	// Do not tail call opt if any parameters need to be passed indirectly.
8553	for (auto &VA : ArgLocs)
8554	if (VA.getLocInfo() == CCValAssign::Indirect)
8555	return false;
8556
8557	// Do not tail call opt if either caller or callee uses struct return
8558	// semantics.
8559	auto IsCallerStructRet = Caller.hasStructRetAttr();
8560	auto IsCalleeStructRet = Outs.empty() ? false : Outs [`0`].Flags.isSRet();
8561	if (IsCallerStructRet != IsCalleeStructRet)
8562	return false;
8563
8564	// Do not tail call opt if caller's and callee's byval arguments do not match.
8565	for (unsigned i = `0`, j = `0`; i < Outs.size(); i++) {
8566	if (!Outs [i].Flags.isByVal())
8567	continue;
8568	if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8569	return false;
8570	if (LoongArchFI->getIncomingByValArgs(Idx: i).getValueType() != Outs [i].ArgVT)
8571	return false;
8572	}
8573
8574	// The callee has to preserve all registers the caller needs to preserve.
8575	const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8576	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8577	if (CalleeCC != CallerCC) {
8578	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8579	if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
8580	return false;
8581	}
8582
8583	// If the callee takes no arguments then go on to check the results of the
8584	// call.
8585	const MachineRegisterInfo &MRI = MF.getRegInfo();
8586	const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8587	if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
8588	return false;
8589
8590	return true;
8591	}
8592
8593	static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
8594	return DAG.getDataLayout().getPrefTypeAlign(
8595	Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
8596	}
8597
8598	// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8599	// and output parameter nodes.
8600	SDValue
8601	LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
8602	SmallVectorImpl<SDValue> &InVals) const {
8603	SelectionDAG &DAG = CLI.DAG;
8604	SDLoc &DL = CLI.DL;
8605	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
8606	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8607	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
8608	SDValue Chain = CLI.Chain;
8609	SDValue Callee = CLI.Callee;
8610	CallingConv::ID CallConv = CLI.CallConv;
8611	bool IsVarArg = CLI.IsVarArg;
8612	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8613	MVT GRLenVT = Subtarget.getGRLenVT();
8614	bool &IsTailCall = CLI.IsTailCall;
8615
8616	MachineFunction &MF = DAG.getMachineFunction();
8617	auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8618
8619	// Analyze the operands of the call, assigning locations to each operand.
8620	SmallVector<CCValAssign> ArgLocs;
8621	CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8622
8623	if (CallConv == CallingConv::GHC)
8624	ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
8625	else
8626	analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /IsRet=/false, CLI: &CLI, Fn: CC_LoongArch);
8627
8628	// Check if it's really possible to do a tail call.
8629	if (IsTailCall)
8630	IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
8631
8632	if (IsTailCall)
8633	++NumTailCalls;
8634	else if (CLI.CB && CLI.CB->isMustTailCall())
8635	report_fatal_error(reason: "failed to perform tail call elimination on a call "
8636	"site marked musttail");
8637
8638	// Get a count of how many bytes are to be pushed on the stack.
8639	unsigned NumBytes = ArgCCInfo.getStackSize();
8640
8641	// Create local copies for byval args.
8642	SmallVector<SDValue> ByValArgs;
8643	for (unsigned i = `0`, j = `0`, e = Outs.size(); i != e; ++i) {
8644	ISD::ArgFlagsTy Flags = Outs [i].Flags;
8645	if (!Flags.isByVal())
8646	continue;
8647
8648	SDValue Arg = OutVals [i];
8649	unsigned Size = Flags.getByValSize();
8650	Align Alignment = Flags.getNonZeroByValAlign();
8651	SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
8652	SDValue Dst;
8653
8654	if (IsTailCall) {
8655	SDValue CallerArg = LoongArchFI->getIncomingByValArgs(Idx: j++);
8656	if (isa<GlobalAddressSDNode>(Val: Arg) \|\| isa<ExternalSymbolSDNode>(Val: Arg) \|\|
8657	isa<FrameIndexSDNode>(Val: Arg))
8658	Dst = CallerArg;
8659	} else {
8660	int FI =
8661	MF.getFrameInfo().CreateStackObject(Size, Alignment, /isSS=/isSpillSlot: false);
8662	Dst = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
8663	}
8664	if (Dst) {
8665	Chain =
8666	DAG.getMemcpy(Chain, dl: DL, Dst, Src: Arg, Size: SizeNode, Alignment,
8667	/IsVolatile=/isVol: false,
8668	/AlwaysInline=/false, /CI=/nullptr, OverrideTailCall: std::nullopt,
8669	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
8670	ByValArgs.push_back(Elt: Dst);
8671	}
8672	}
8673
8674	if (!IsTailCall)
8675	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: CLI.DL);
8676
8677	// During a tail call, stores to the argument area must happen after all of
8678	// the function's incoming arguments have been loaded because they may alias.
8679	// This is done by folding in a TokenFactor from LowerFormalArguments, but
8680	// there's no point in doing so repeatedly so this tracks whether that's
8681	// happened yet.
8682	bool AfterFormalArgLoads = false;
8683
8684	// Copy argument values to their designated locations.
8685	SmallVector<std::pair<Register, SDValue>> RegsToPass;
8686	SmallVector<SDValue> MemOpChains;
8687	SDValue StackPtr;
8688	for (unsigned i = `0`, j = `0`, e = ArgLocs.size(), OutIdx = `0`; i != e;
8689	++i, ++OutIdx) {
8690	CCValAssign &VA = ArgLocs [i];
8691	SDValue ArgValue = OutVals [OutIdx];
8692	ISD::ArgFlagsTy Flags = Outs [OutIdx].Flags;
8693
8694	// Handle passing f64 on LA32D with a soft float ABI as a special case.
8695	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8696	assert(VA.isRegLoc() && "Expected register VA assignment");
8697	assert(VA.needsCustom());
8698	SDValue SplitF64 =
8699	DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
8700	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
8701	SDValue Lo = SplitF64.getValue(R: `0`);
8702	SDValue Hi = SplitF64.getValue(R: `1`);
8703
8704	Register RegLo = VA.getLocReg();
8705	RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
8706
8707	// Get the CCValAssign for the Hi part.
8708	CCValAssign &HiVA = ArgLocs [++i];
8709
8710	if (HiVA.isMemLoc()) {
8711	// Second half of f64 is passed on the stack.
8712	if (!StackPtr.getNode())
8713	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
8714	SDValue Address =
8715	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
8716	N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
8717	// Emit the store.
8718	MemOpChains.push_back(Elt: DAG.getStore(
8719	Chain, dl: DL, Val: Hi, Ptr: Address,
8720	PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
8721	} else {
8722	// Second half of f64 is passed in another GPR.
8723	Register RegHigh = HiVA.getLocReg();
8724	RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
8725	}
8726	continue;
8727	}
8728
8729	// Promote the value if needed.
8730	// For now, only handle fully promoted and indirect arguments.
8731	if (VA.getLocInfo() == CCValAssign::Indirect) {
8732	// Store the argument in a stack slot and pass its address.
8733	Align StackAlign =
8734	std::max(a: getPrefTypeAlign(VT: Outs [OutIdx].ArgVT, DAG),
8735	b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
8736	TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8737	// If the original argument was split and passed by reference, we need to
8738	// store the required parts of it here (and pass just one address).
8739	unsigned ArgIndex = Outs [OutIdx].OrigArgIndex;
8740	unsigned ArgPartOffset = Outs [OutIdx].PartOffset;
8741	assert(ArgPartOffset == `0`);
8742	// Calculate the total size to store. We don't have access to what we're
8743	// actually storing other than performing the loop and collecting the
8744	// info.
8745	SmallVector<std::pair<SDValue, SDValue>> Parts;
8746	while (i + `1` != e && Outs [OutIdx + `1`].OrigArgIndex == ArgIndex) {
8747	SDValue PartValue = OutVals [OutIdx + `1`];
8748	unsigned PartOffset = Outs [OutIdx + `1`].PartOffset - ArgPartOffset;
8749	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
8750	EVT PartVT = PartValue.getValueType();
8751
8752	StoredSize += PartVT.getStoreSize();
8753	StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
8754	Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
8755	++i;
8756	++OutIdx;
8757	}
8758	SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
8759	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
8760	MemOpChains.push_back(
8761	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
8762	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
8763	for (const auto &Part : Parts) {
8764	SDValue PartValue = Part.first;
8765	SDValue PartOffset = Part.second;
8766	SDValue Address =
8767	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
8768	MemOpChains.push_back(
8769	Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
8770	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
8771	}
8772	ArgValue = SpillSlot;
8773	} else {
8774	ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
8775	}
8776
8777	// Use local copy if it is a byval arg.
8778	if (Flags.isByVal()) {
8779	if (!IsTailCall \|\| (isa<GlobalAddressSDNode>(Val: ArgValue) \|\|
8780	isa<ExternalSymbolSDNode>(Val: ArgValue) \|\|
8781	isa<FrameIndexSDNode>(Val: ArgValue)))
8782	ArgValue = ByValArgs [j++];
8783	}
8784
8785	if (VA.isRegLoc()) {
8786	// Queue up the argument copies and emit them at the end.
8787	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
8788	} else {
8789	assert(VA.isMemLoc() && "Argument not register or memory");
8790	SDValue DstAddr;
8791	MachinePointerInfo DstInfo;
8792	int32_t Offset = VA.getLocMemOffset();
8793
8794	// Work out the address of the stack slot.
8795	if (!StackPtr.getNode())
8796	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
8797
8798	if (IsTailCall) {
8799	unsigned OpSize = divideCeil(Numerator: VA.getValVT().getSizeInBits(), Denominator: `8`);
8800	int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
8801	DstAddr = DAG.getFrameIndex(FI, VT: PtrVT);
8802	DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8803	if (!AfterFormalArgLoads) {
8804	Chain = DAG.getStackArgumentTokenFactor(Chain);
8805	AfterFormalArgLoads = true;
8806	}
8807	} else {
8808	SDValue PtrOff = DAG.getIntPtrConstant(Val: Offset, DL);
8809	DstAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
8810	DstInfo = MachinePointerInfo::getStack(MF, Offset);
8811	}
8812
8813	// Emit the store.
8814	MemOpChains.push_back(
8815	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: DstAddr, PtrInfo: DstInfo));
8816	}
8817	}
8818
8819	// Join the stores, which are independent of one another.
8820	if (!MemOpChains.empty())
8821	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
8822
8823	SDValue Glue;
8824
8825	// Build a sequence of copy-to-reg nodes, chained and glued together.
8826	for (auto &Reg : RegsToPass) {
8827	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
8828	Glue = Chain.getValue(R: `1`);
8829	}
8830
8831	// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8832	// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8833	// split it and then direct call can be matched by PseudoCALL_SMALL.
8834	if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
8835	const GlobalValue *GV = S->getGlobal();
8836	unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8837	? LoongArchII::MO_CALL
8838	: LoongArchII::MO_CALL_PLT;
8839	Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: `0`, TargetFlags: OpFlags);
8840	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
8841	unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
8842	? LoongArchII::MO_CALL
8843	: LoongArchII::MO_CALL_PLT;
8844	Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
8845	}
8846
8847	// The first call operand is the chain and the second is the target address.
8848	SmallVector<SDValue> Ops;
8849	Ops.push_back(Elt: Chain);
8850	Ops.push_back(Elt: Callee);
8851
8852	// Add argument registers to the end of the list so that they are
8853	// known live into the call.
8854	for (auto &Reg : RegsToPass)
8855	Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
8856
8857	if (!IsTailCall) {
8858	// Add a register mask operand representing the call-preserved registers.
8859	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8860	const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8861	assert(Mask && "Missing call preserved mask for calling convention");
8862	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
8863	}
8864
8865	// Glue the call to the argument copies, if any.
8866	if (Glue.getNode())
8867	Ops.push_back(Elt: Glue);
8868
8869	// Emit the call.
8870	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
8871	unsigned Op;
8872	switch (DAG.getTarget().getCodeModel()) {
8873	default:
8874	report_fatal_error(reason: "Unsupported code model");
8875	case CodeModel::Small:
8876	Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8877	break;
8878	case CodeModel::Medium:
8879	Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8880	break;
8881	case CodeModel::Large:
8882	assert(Subtarget.is64Bit() && "Large code model requires LA64");
8883	Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8884	break;
8885	}
8886
8887	if (IsTailCall) {
8888	MF.getFrameInfo().setHasTailCall();
8889	SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
8890	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
8891	return Ret;
8892	}
8893
8894	Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
8895	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
8896	Glue = Chain.getValue(R: `1`);
8897
8898	// Mark the end of the call, which is glued to the call itself.
8899	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue, DL);
8900	Glue = Chain.getValue(R: `1`);
8901
8902	// Assign locations to each value returned by this call.
8903	SmallVector<CCValAssign> RVLocs;
8904	CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8905	analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /IsRet=/true, Fn: CC_LoongArch);
8906
8907	// Copy all of the result registers out of their specified physreg.
8908	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
8909	auto &VA = RVLocs [i];
8910	// Copy the value out.
8911	SDValue RetValue =
8912	DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
8913	// Glue the RetValue to the end of the call sequence.
8914	Chain = RetValue.getValue(R: `1`);
8915	Glue = RetValue.getValue(R: `2`);
8916
8917	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8918	assert(VA.needsCustom());
8919	SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs [++i].getLocReg(),
8920	VT: MVT::i32, Glue);
8921	Chain = RetValue2.getValue(R: `1`);
8922	Glue = RetValue2.getValue(R: `2`);
8923	RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64,
8924	N1: RetValue, N2: RetValue2);
8925	} else
8926	RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
8927
8928	InVals.push_back(Elt: RetValue);
8929	}
8930
8931	return Chain;
8932	}
8933
8934	bool LoongArchTargetLowering::CanLowerReturn(
8935	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8936	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8937	const Type RetTy) const* {
8938	SmallVector<CCValAssign> RVLocs;
8939	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8940
8941	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
8942	LoongArchABI::ABI ABI =
8943	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8944	if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs [i].VT, LocInfo: CCValAssign::Full,
8945	ArgFlags: Outs [i].Flags, State&: CCInfo, /IsRet=/true, OrigTy: nullptr))
8946	return false;
8947	}
8948	return true;
8949	}
8950
8951	SDValue LoongArchTargetLowering::LowerReturn(
8952	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8953	const SmallVectorImpl<ISD::OutputArg> &Outs,
8954	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8955	SelectionDAG &DAG) const {
8956	// Stores the assignment of the return value to a location.
8957	SmallVector<CCValAssign> RVLocs;
8958
8959	// Info about the registers and stack slot.
8960	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8961	*DAG.getContext());
8962
8963	analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /IsRet=/true,
8964	CLI: nullptr, Fn: CC_LoongArch);
8965	if (CallConv == CallingConv::GHC && !RVLocs.empty())
8966	report_fatal_error(reason: "GHC functions return void only");
8967	SDValue Glue;
8968	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
8969
8970	// Copy the result values into the output registers.
8971	for (unsigned i = `0`, e = RVLocs.size(), OutIdx = `0`; i < e; ++i, ++OutIdx) {
8972	SDValue Val = OutVals [OutIdx];
8973	CCValAssign &VA = RVLocs [i];
8974	assert(VA.isRegLoc() && "Can only return in registers!");
8975
8976	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8977	// Handle returning f64 on LA32D with a soft float ABI.
8978	assert(VA.isRegLoc() && "Expected return via registers");
8979	assert(VA.needsCustom());
8980	SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
8981	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
8982	SDValue Lo = SplitF64.getValue(R: `0`);
8983	SDValue Hi = SplitF64.getValue(R: `1`);
8984	Register RegLo = VA.getLocReg();
8985	Register RegHi = RVLocs [++i].getLocReg();
8986
8987	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
8988	Glue = Chain.getValue(R: `1`);
8989	RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
8990	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
8991	Glue = Chain.getValue(R: `1`);
8992	RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
8993	} else {
8994	// Handle a 'normal' return.
8995	Val = convertValVTToLocVT(DAG, Val, VA, DL);
8996	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
8997
8998	// Guarantee that all emitted copies are stuck together.
8999	Glue = Chain.getValue(R: `1`);
9000	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
9001	}
9002	}
9003
9004	RetOps [`0`] = Chain; // Update chain.
9005
9006	// Add the glue node if we have it.
9007	if (Glue.getNode())
9008	RetOps.push_back(Elt: Glue);
9009
9010	return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
9011	}
9012
9013	// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9014	// Note: The following prefixes are excluded:
9015	// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9016	// as they can be represented using [x]vrepli.[whb]
9017	std::pair<bool, uint64_t> LoongArchTargetLowering::isImmVLDILegalForMode1(
9018	const APInt &SplatValue, const unsigned SplatBitSize) const {
9019	uint64_t RequiredImm = `0`;
9020	uint64_t V = SplatValue.getZExtValue();
9021	if (SplatBitSize == `16` && !(V & `0x00FF`)) {
9022	// 4'b0101
9023	RequiredImm = (`0b10101` << `8`) \| (V >> `8`);
9024	return {true, RequiredImm};
9025	} else if (SplatBitSize == `32`) {
9026	// 4'b0001
9027	if (!(V & `0xFFFF00FF`)) {
9028	RequiredImm = (`0b10001` << `8`) \| (V >> `8`);
9029	return {true, RequiredImm};
9030	}
9031	// 4'b0010
9032	if (!(V & `0xFF00FFFF`)) {
9033	RequiredImm = (`0b10010` << `8`) \| (V >> `16`);
9034	return {true, RequiredImm};
9035	}
9036	// 4'b0011
9037	if (!(V & `0x00FFFFFF`)) {
9038	RequiredImm = (`0b10011` << `8`) \| (V >> `24`);
9039	return {true, RequiredImm};
9040	}
9041	// 4'b0110
9042	if ((V & `0xFFFF00FF`) == `0xFF`) {
9043	RequiredImm = (`0b10110` << `8`) \| (V >> `8`);
9044	return {true, RequiredImm};
9045	}
9046	// 4'b0111
9047	if ((V & `0xFF00FFFF`) == `0xFFFF`) {
9048	RequiredImm = (`0b10111` << `8`) \| (V >> `16`);
9049	return {true, RequiredImm};
9050	}
9051	// 4'b1010
9052	if ((V & `0x7E07FFFF`) == `0x3E000000` \|\| (V & `0x7E07FFFF`) == `0x40000000`) {
9053	RequiredImm =
9054	(`0b11010` << `8`) \| (((V >> `24`) & `0xC0`) ^ `0x40`) \| ((V >> `19`) & `0x3F`);
9055	return {true, RequiredImm};
9056	}
9057	} else if (SplatBitSize == `64`) {
9058	// 4'b1011
9059	if ((V & `0xFFFFFFFF7E07FFFFULL`) == `0x3E000000ULL` \|\|
9060	(V & `0xFFFFFFFF7E07FFFFULL`) == `0x40000000ULL`) {
9061	RequiredImm =
9062	(`0b11011` << `8`) \| (((V >> `24`) & `0xC0`) ^ `0x40`) \| ((V >> `19`) & `0x3F`);
9063	return {true, RequiredImm};
9064	}
9065	// 4'b1100
9066	if ((V & `0x7FC0FFFFFFFFFFFFULL`) == `0x4000000000000000ULL` \|\|
9067	(V & `0x7FC0FFFFFFFFFFFFULL`) == `0x3FC0000000000000ULL`) {
9068	RequiredImm =
9069	(`0b11100` << `8`) \| (((V >> `56`) & `0xC0`) ^ `0x40`) \| ((V >> `48`) & `0x3F`);
9070	return {true, RequiredImm};
9071	}
9072	// 4'b1001
9073	auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9074	uint8_t res = `0`;
9075	for (int i = `0`; i < `8`; ++i) {
9076	uint8_t byte = x & `0xFF`;
9077	if (byte == `0` \|\| byte == `0xFF`)
9078	res \|= ((byte & `1`) << i);
9079	else
9080	return {false, `0`};
9081	x >>= `8`;
9082	}
9083	return {true, res};
9084	};
9085	auto [IsSame, Suffix] = sameBitsPreByte (V);
9086	if (IsSame) {
9087	RequiredImm = (`0b11001` << `8`) \| Suffix;
9088	return {true, RequiredImm};
9089	}
9090	}
9091	return {false, RequiredImm};
9092	}
9093
9094	bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
9095	EVT VT) const {
9096	if (!Subtarget.hasExtLSX())
9097	return false;
9098
9099	if (VT == MVT::f32) {
9100	uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & `0x7e07ffff`;
9101	return (masked == `0x3e000000` \|\| masked == `0x40000000`);
9102	}
9103
9104	if (VT == MVT::f64) {
9105	uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & `0x7fc0ffffffffffff`;
9106	return (masked == `0x3fc0000000000000` \|\| masked == `0x4000000000000000`);
9107	}
9108
9109	return false;
9110	}
9111
9112	bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9113	bool ForCodeSize) const {
9114	// TODO: Maybe need more checks here after vector extension is supported.
9115	if (VT == MVT::f32 && !Subtarget.hasBasicF())
9116	return false;
9117	if (VT == MVT::f64 && !Subtarget.hasBasicD())
9118	return false;
9119	return (Imm.isZero() \|\| Imm.isExactlyValue(V: `1.0`) \|\| isFPImmVLDILegal(Imm, VT));
9120	}
9121
9122	bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type ) const* {
9123	return true;
9124	}
9125
9126	bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type ) const* {
9127	return true;
9128	}
9129
9130	bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9131	const Instruction I) const* {
9132	if (!Subtarget.is64Bit())
9133	return isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I);
9134
9135	if (isa<LoadInst>(Val: I))
9136	return true;
9137
9138	// On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9139	// require fences beacuse we can use amswap_db.[w/d].
9140	Type *Ty = I->getOperand(i: `0`)->getType();
9141	if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) {
9142	unsigned Size = Ty->getIntegerBitWidth();
9143	return (Size == `8` \|\| Size == `16`);
9144	}
9145
9146	return false;
9147	}
9148
9149	EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
9150	LLVMContext &Context,
9151	EVT VT) const {
9152	if (!VT.isVector())
9153	return getPointerTy(DL);
9154	return VT.changeVectorElementTypeToInteger();
9155	}
9156
9157	bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
9158	EVT VT = Y.getValueType();
9159
9160	if (VT.isVector())
9161	return Subtarget.hasExtLSX() && VT.isInteger();
9162
9163	return VT.isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
9164	}
9165
9166	void LoongArchTargetLowering::getTgtMemIntrinsic(
9167	SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
9168	MachineFunction &MF, unsigned Intrinsic) const {
9169	switch (Intrinsic) {
9170	default:
9171	return;
9172	case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9173	case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9174	case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9175	case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9176	IntrinsicInfo Info;
9177	Info.opc = ISD::INTRINSIC_W_CHAIN;
9178	Info.memVT = MVT::i32;
9179	Info.ptrVal = I.getArgOperand(i: `0`);
9180	Info.offset = `0`;
9181	Info.align = Align (`4`);
9182	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
9183	MachineMemOperand::MOVolatile;
9184	Infos.push_back(Elt: Info);
9185	return;
9186	// TODO: Add more Intrinsics later.
9187	}
9188	}
9189	}
9190
9191	// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9192	// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9193	// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9194	// regression, we need to implement it manually.
9195	void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst AI) const* {
9196	AtomicRMWInst::BinOp Op = AI->getOperation();
9197
9198	assert((Op == AtomicRMWInst::Or \|\| Op == AtomicRMWInst::Xor \|\|
9199	Op == AtomicRMWInst::And) &&
9200	"Unable to expand");
9201	unsigned MinWordSize = `4`;
9202
9203	IRBuilder<> Builder(AI);
9204	LLVMContext &Ctx = Builder.getContext();
9205	const DataLayout &DL = AI->getDataLayout();
9206	Type *ValueType = AI->getType();
9207	Type WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize `8`);
9208
9209	Value *Addr = AI->getPointerOperand();
9210	PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
9211	IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
9212
9213	Value *AlignedAddr = Builder.CreateIntrinsic(
9214	ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
9215	Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - `1`))}, FMFSource: nullptr,
9216	Name: "AlignedAddr");
9217
9218	Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
9219	Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - `1`, Name: "PtrLSB");
9220	Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: `3`);
9221	ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt");
9222	Value *Mask = Builder.CreateShl(
9223	LHS: ConstantInt::get(Ty: WordType,
9224	V: (`1` << (DL.getTypeStoreSize(Ty: ValueType) * `8`)) - `1`),
9225	RHS: ShiftAmt, Name: "Mask");
9226	Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask");
9227	Value *ValOperand_Shifted =
9228	Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType),
9229	RHS: ShiftAmt, Name: "ValOperand_Shifted");
9230	Value *NewOperand;
9231	if (Op == AtomicRMWInst::And)
9232	NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand");
9233	else
9234	NewOperand = ValOperand_Shifted;
9235
9236	AtomicRMWInst *NewAI =
9237	Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align (MinWordSize),
9238	Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
9239
9240	Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted");
9241	Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted");
9242	Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType);
9243	AI->replaceAllUsesWith(V: FinalOldResult);
9244	AI->eraseFromParent();
9245	}
9246
9247	TargetLowering::AtomicExpansionKind
9248	LoongArchTargetLowering::shouldExpandAtomicRMWInIR(
9249	const AtomicRMWInst AI) const* {
9250	// TODO: Add more AtomicRMWInst that needs to be extended.
9251
9252	// Since floating-point operation requires a non-trivial set of data
9253	// operations, use CmpXChg to expand.
9254	if (AI->isFloatingPointOperation() \|\|
9255	AI->getOperation() == AtomicRMWInst::UIncWrap \|\|
9256	AI->getOperation() == AtomicRMWInst::UDecWrap \|\|
9257	AI->getOperation() == AtomicRMWInst::USubCond \|\|
9258	AI->getOperation() == AtomicRMWInst::USubSat)
9259	return AtomicExpansionKind::CmpXChg;
9260
9261	if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9262	(AI->getOperation() == AtomicRMWInst::Xchg \|\|
9263	AI->getOperation() == AtomicRMWInst::Add \|\|
9264	AI->getOperation() == AtomicRMWInst::Sub)) {
9265	return AtomicExpansionKind::None;
9266	}
9267
9268	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9269	if (Subtarget.hasLAMCAS()) {
9270	if (Size < `32` && (AI->getOperation() == AtomicRMWInst::And \|\|
9271	AI->getOperation() == AtomicRMWInst::Or \|\|
9272	AI->getOperation() == AtomicRMWInst::Xor))
9273	return AtomicExpansionKind::CustomExpand;
9274	if (AI->getOperation() == AtomicRMWInst::Nand \|\| Size < `32`)
9275	return AtomicExpansionKind::CmpXChg;
9276	}
9277
9278	if (Size == `8` \|\| Size == `16`)
9279	return AtomicExpansionKind::MaskedIntrinsic;
9280	return AtomicExpansionKind::None;
9281	}
9282
9283	static Intrinsic::ID
9284	getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
9285	AtomicRMWInst::BinOp BinOp) {
9286	if (GRLen == `64`) {
9287	switch (BinOp) {
9288	default:
9289	llvm_unreachable("Unexpected AtomicRMW BinOp");
9290	case AtomicRMWInst::Xchg:
9291	return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9292	case AtomicRMWInst::Add:
9293	return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9294	case AtomicRMWInst::Sub:
9295	return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9296	case AtomicRMWInst::Nand:
9297	return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9298	case AtomicRMWInst::UMax:
9299	return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9300	case AtomicRMWInst::UMin:
9301	return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9302	case AtomicRMWInst::Max:
9303	return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9304	case AtomicRMWInst::Min:
9305	return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9306	// TODO: support other AtomicRMWInst.
9307	}
9308	}
9309
9310	if (GRLen == `32`) {
9311	switch (BinOp) {
9312	default:
9313	llvm_unreachable("Unexpected AtomicRMW BinOp");
9314	case AtomicRMWInst::Xchg:
9315	return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9316	case AtomicRMWInst::Add:
9317	return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9318	case AtomicRMWInst::Sub:
9319	return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9320	case AtomicRMWInst::Nand:
9321	return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9322	case AtomicRMWInst::UMax:
9323	return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9324	case AtomicRMWInst::UMin:
9325	return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9326	case AtomicRMWInst::Max:
9327	return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9328	case AtomicRMWInst::Min:
9329	return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9330	// TODO: support other AtomicRMWInst.
9331	}
9332	}
9333
9334	llvm_unreachable("Unexpected GRLen\n");
9335	}
9336
9337	TargetLowering::AtomicExpansionKind
9338	LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
9339	const AtomicCmpXchgInst CI) const* {
9340
9341	if (Subtarget.hasLAMCAS())
9342	return AtomicExpansionKind::None;
9343
9344	unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
9345	if (Size == `8` \|\| Size == `16`)
9346	return AtomicExpansionKind::MaskedIntrinsic;
9347	return AtomicExpansionKind::None;
9348	}
9349
9350	Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
9351	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
9352	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
9353	unsigned GRLen = Subtarget.getGRLen();
9354	AtomicOrdering FailOrd = CI->getFailureOrdering();
9355	Value *FailureOrdering =
9356	Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
9357	Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9358	if (GRLen == `64`) {
9359	CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9360	CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
9361	NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
9362	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
9363	}
9364	Type *Tys[] = {AlignedAddr->getType()};
9365	Value *Result = Builder.CreateIntrinsic(
9366	ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9367	if (GRLen == `64`)
9368	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
9369	return Result;
9370	}
9371
9372	Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
9373	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
9374	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
9375	// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9376	// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9377	// mask, as this produces better code than the LL/SC loop emitted by
9378	// int_loongarch_masked_atomicrmw_xchg.
9379	if (AI->getOperation() == AtomicRMWInst::Xchg &&
9380	isa<ConstantInt>(Val: AI->getValOperand())) {
9381	ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
9382	if (CVal->isZero())
9383	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
9384	Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
9385	Align: AI->getAlign(), Ordering: Ord);
9386	if (CVal->isMinusOne())
9387	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
9388	Align: AI->getAlign(), Ordering: Ord);
9389	}
9390
9391	unsigned GRLen = Subtarget.getGRLen();
9392	Value *Ordering =
9393	Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
9394	Type *Tys[] = {AlignedAddr->getType()};
9395	Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
9396	M: AI->getModule(),
9397	id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
9398
9399	if (GRLen == `64`) {
9400	Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
9401	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
9402	ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
9403	}
9404
9405	Value *Result;
9406
9407	// Must pass the shift amount needed to sign extend the loaded value prior
9408	// to performing a signed comparison for min/max. ShiftAmt is the number of
9409	// bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9410	// is the number of bits to left+right shift the value in order to
9411	// sign-extend.
9412	if (AI->getOperation() == AtomicRMWInst::Min \|\|
9413	AI->getOperation() == AtomicRMWInst::Max) {
9414	const DataLayout &DL = AI->getDataLayout();
9415	unsigned ValWidth =
9416	DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
9417	Value *SextShamt =
9418	Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
9419	Result = Builder.CreateCall(Callee: LlwOpScwLoop,
9420	Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9421	} else {
9422	Result =
9423	Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
9424	}
9425
9426	if (GRLen == `64`)
9427	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
9428	return Result;
9429	}
9430
9431	bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
9432	const MachineFunction &MF, EVT VT) const {
9433	VT = VT.getScalarType();
9434
9435	if (!VT.isSimple())
9436	return false;
9437
9438	switch (VT.getSimpleVT().SimpleTy) {
9439	case MVT::f32:
9440	case MVT::f64:
9441	return true;
9442	default:
9443	break;
9444	}
9445
9446	return false;
9447	}
9448
9449	Register LoongArchTargetLowering::getExceptionPointerRegister(
9450	const Constant PersonalityFn) const* {
9451	return LoongArch::R4;
9452	}
9453
9454	Register LoongArchTargetLowering::getExceptionSelectorRegister(
9455	const Constant PersonalityFn) const* {
9456	return LoongArch::R5;
9457	}
9458
9459	//===----------------------------------------------------------------------===//
9460	// Target Optimization Hooks
9461	//===----------------------------------------------------------------------===//
9462
9463	static int getEstimateRefinementSteps(EVT VT,
9464	const LoongArchSubtarget &Subtarget) {
9465	// Feature FRECIPE instrucions relative accuracy is 2^-14.
9466	// IEEE float has 23 digits and double has 52 digits.
9467	int RefinementSteps = VT.getScalarType() == MVT::f64 ? `2` : `1`;
9468	return RefinementSteps;
9469	}
9470
9471	SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
9472	SelectionDAG &DAG, int Enabled,
9473	int &RefinementSteps,
9474	bool &UseOneConstNR,
9475	bool Reciprocal) const {
9476	if (Subtarget.hasFrecipe()) {
9477	SDLoc DL(Operand);
9478	EVT VT = Operand.getValueType();
9479
9480	if (VT == MVT::f32 \|\| (VT == MVT::f64 && Subtarget.hasBasicD()) \|\|
9481	(VT == MVT::v4f32 && Subtarget.hasExtLSX()) \|\|
9482	(VT == MVT::v2f64 && Subtarget.hasExtLSX()) \|\|
9483	(VT == MVT::v8f32 && Subtarget.hasExtLASX()) \|\|
9484	(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9485
9486	if (RefinementSteps == ReciprocalEstimate::Unspecified)
9487	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9488
9489	SDValue Estimate = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand);
9490	if (Reciprocal)
9491	Estimate = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Estimate);
9492
9493	return Estimate;
9494	}
9495	}
9496
9497	return SDValue ();
9498	}
9499
9500	SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
9501	SelectionDAG &DAG,
9502	int Enabled,
9503	int &RefinementSteps) const {
9504	if (Subtarget.hasFrecipe()) {
9505	SDLoc DL(Operand);
9506	EVT VT = Operand.getValueType();
9507
9508	if (VT == MVT::f32 \|\| (VT == MVT::f64 && Subtarget.hasBasicD()) \|\|
9509	(VT == MVT::v4f32 && Subtarget.hasExtLSX()) \|\|
9510	(VT == MVT::v2f64 && Subtarget.hasExtLSX()) \|\|
9511	(VT == MVT::v8f32 && Subtarget.hasExtLASX()) \|\|
9512	(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9513
9514	if (RefinementSteps == ReciprocalEstimate::Unspecified)
9515	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9516
9517	return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand);
9518	}
9519	}
9520
9521	return SDValue ();
9522	}
9523
9524	//===----------------------------------------------------------------------===//
9525	// LoongArch Inline Assembly Support
9526	//===----------------------------------------------------------------------===//
9527
9528	LoongArchTargetLowering::ConstraintType
9529	LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9530	// LoongArch specific constraints in GCC: config/loongarch/constraints.md
9531	//
9532	// 'f': A floating-point register (if available).
9533	// 'k': A memory operand whose address is formed by a base register and
9534	// (optionally scaled) index register.
9535	// 'l': A signed 16-bit constant.
9536	// 'm': A memory operand whose address is formed by a base register and
9537	// offset that is suitable for use in instructions with the same
9538	// addressing mode as st.w and ld.w.
9539	// 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9540	// instruction)
9541	// 'I': A signed 12-bit constant (for arithmetic instructions).
9542	// 'J': Integer zero.
9543	// 'K': An unsigned 12-bit constant (for logic instructions).
9544	// "ZB": An address that is held in a general-purpose register. The offset is
9545	// zero.
9546	// "ZC": A memory operand whose address is formed by a base register and
9547	// offset that is suitable for use in instructions with the same
9548	// addressing mode as ll.w and sc.w.
9549	if (Constraint.size() == `1`) {
9550	switch (Constraint [`0`]) {
9551	default:
9552	break;
9553	case `'f'`:
9554	case `'q'`:
9555	return C_RegisterClass;
9556	case `'l'`:
9557	case `'I'`:
9558	case `'J'`:
9559	case `'K'`:
9560	return C_Immediate;
9561	case `'k'`:
9562	return C_Memory;
9563	}
9564	}
9565
9566	if (Constraint == "ZC" \|\| Constraint == "ZB")
9567	return C_Memory;
9568
9569	// 'm' is handled here.
9570	return TargetLowering::getConstraintType(Constraint);
9571	}
9572
9573	InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9574	StringRef ConstraintCode) const {
9575	return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9576	.Case(S: "k", Value: InlineAsm::ConstraintCode::k)
9577	.Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
9578	.Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
9579	.Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9580	}
9581
9582	std::pair<unsigned, const TargetRegisterClass *>
9583	LoongArchTargetLowering::getRegForInlineAsmConstraint(
9584	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
9585	// First, see if this is a constraint that directly corresponds to a LoongArch
9586	// register class.
9587	if (Constraint.size() == `1`) {
9588	switch (Constraint [`0`]) {
9589	case `'r'`:
9590	// TODO: Support fixed vectors up to GRLen?
9591	if (VT.isVector())
9592	break;
9593	return std::make_pair(x: `0U`, y: &LoongArch::GPRRegClass);
9594	case `'q'`:
9595	return std::make_pair(x: `0U`, y: &LoongArch::GPRNoR0R1RegClass);
9596	case `'f'`:
9597	if (Subtarget.hasBasicF() && VT == MVT::f32)
9598	return std::make_pair(x: `0U`, y: &LoongArch::FPR32RegClass);
9599	if (Subtarget.hasBasicD() && VT == MVT::f64)
9600	return std::make_pair(x: `0U`, y: &LoongArch::FPR64RegClass);
9601	if (Subtarget.hasExtLSX() &&
9602	TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
9603	return std::make_pair(x: `0U`, y: &LoongArch::LSX128RegClass);
9604	if (Subtarget.hasExtLASX() &&
9605	TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
9606	return std::make_pair(x: `0U`, y: &LoongArch::LASX256RegClass);
9607	break;
9608	default:
9609	break;
9610	}
9611	}
9612
9613	// TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9614	// record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9615	// constraints while the official register name is prefixed with a '$'. So we
9616	// clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9617	// before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9618	// case insensitive, so no need to convert the constraint to upper case here.
9619	//
9620	// For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9621	// decode the usage of register name aliases into their official names. And
9622	// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9623	// official register names.
9624	if (Constraint.starts_with(Prefix: "{$r") \|\| Constraint.starts_with(Prefix: "{$f") \|\|
9625	Constraint.starts_with(Prefix: "{$vr") \|\| Constraint.starts_with(Prefix: "{$xr")) {
9626	bool IsFP = Constraint [`2`] == `'f'`;
9627	std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: `'$'`);
9628	std::pair<unsigned, const TargetRegisterClass *> R;
9629	R = TargetLowering::getRegForInlineAsmConstraint(
9630	TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
9631	// Match those names to the widest floating point register type available.
9632	if (IsFP) {
9633	unsigned RegNo = R.first;
9634	if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9635	if (Subtarget.hasBasicD() && (VT == MVT::f64 \|\| VT == MVT::Other)) {
9636	unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9637	return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
9638	}
9639	}
9640	}
9641	return R;
9642	}
9643
9644	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9645	}
9646
9647	void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9648	SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9649	SelectionDAG &DAG) const {
9650	// Currently only support length 1 constraints.
9651	if (Constraint.size() == `1`) {
9652	switch (Constraint [`0`]) {
9653	case `'l'`:
9654	// Validate & create a 16-bit signed immediate operand.
9655	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9656	uint64_t CVal = C->getSExtValue();
9657	if (isInt<`16`>(x: CVal))
9658	Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc (Op),
9659	VT: Subtarget.getGRLenVT()));
9660	}
9661	return;
9662	case `'I'`:
9663	// Validate & create a 12-bit signed immediate operand.
9664	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9665	uint64_t CVal = C->getSExtValue();
9666	if (isInt<`12`>(x: CVal))
9667	Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc (Op),
9668	VT: Subtarget.getGRLenVT()));
9669	}
9670	return;
9671	case `'J'`:
9672	// Validate & create an integer zero operand.
9673	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
9674	if (C->getZExtValue() == `0`)
9675	Ops.push_back(
9676	x: DAG.getTargetConstant(Val: `0`, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
9677	return;
9678	case `'K'`:
9679	// Validate & create a 12-bit unsigned immediate operand.
9680	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9681	uint64_t CVal = C->getZExtValue();
9682	if (isUInt<`12`>(x: CVal))
9683	Ops.push_back(
9684	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
9685	}
9686	return;
9687	default:
9688	break;
9689	}
9690	}
9691	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9692	}
9693
9694	#define GET_REGISTER_MATCHER
9695	#include "LoongArchGenAsmMatcher.inc"
9696
9697	Register
9698	LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
9699	const MachineFunction &MF) const {
9700	std::pair<StringRef, StringRef> Name = StringRef (RegName).split(Separator: `'$'`);
9701	std::string NewRegName = Name.second.str();
9702	Register Reg = MatchRegisterAltName(Name: NewRegName);
9703	if (!Reg)
9704	Reg = MatchRegisterName(Name: NewRegName);
9705	if (!Reg)
9706	return Reg;
9707	BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9708	if (!ReservedRegs.test(Idx: Reg))
9709	report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
9710	StringRef (RegName) + "\"."));
9711	return Reg;
9712	}
9713
9714	bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
9715	EVT VT, SDValue C) const {
9716	// TODO: Support vectors.
9717	if (!VT.isScalarInteger())
9718	return false;
9719
9720	// Omit the optimization if the data size exceeds GRLen.
9721	if (VT.getSizeInBits() > Subtarget.getGRLen())
9722	return false;
9723
9724	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
9725	const APInt &Imm = ConstNode->getAPIntValue();
9726	// Break MUL into (SLLI + ADD/SUB) or ALSL.
9727	if ((Imm + `1`).isPowerOf2() \|\| (Imm - `1`).isPowerOf2() \|\|
9728	(`1` - Imm).isPowerOf2() \|\| (-`1` - Imm).isPowerOf2())
9729	return true;
9730	// Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9731	if (ConstNode->hasOneUse() &&
9732	((Imm - `2`).isPowerOf2() \|\| (Imm - `4`).isPowerOf2() \|\|
9733	(Imm - `8`).isPowerOf2() \|\| (Imm - `16`).isPowerOf2()))
9734	return true;
9735	// Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9736	// in which the immediate has two set bits. Or Break (MUL x, imm)
9737	// into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9738	// equals to (1 << s0) - (1 << s1).
9739	if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -`2048`) && Imm.sle(RHS: `4095`))) {
9740	unsigned Shifts = Imm.countr_zero();
9741	// Reject immediates which can be composed via a single LUI.
9742	if (Shifts >= `12`)
9743	return false;
9744	// Reject multiplications can be optimized to
9745	// (SLLI (ALSL x, x, 1/2/3/4), s).
9746	APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
9747	if (ImmPop == `3` \|\| ImmPop == `5` \|\| ImmPop == `9` \|\| ImmPop == `17`)
9748	return false;
9749	// We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9750	// since it needs one more instruction than other 3 cases.
9751	APInt ImmSmall = APInt (Imm.getBitWidth(), `1ULL` << Shifts, true);
9752	if ((Imm - ImmSmall).isPowerOf2() \|\| (Imm + ImmSmall).isPowerOf2() \|\|
9753	(ImmSmall - Imm).isPowerOf2())
9754	return true;
9755	}
9756	}
9757
9758	return false;
9759	}
9760
9761	bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
9762	const AddrMode &AM,
9763	Type Ty, unsigned* AS,
9764	Instruction I) const* {
9765	// LoongArch has four basic addressing modes:
9766	// 1. reg
9767	// 2. reg + 12-bit signed offset
9768	// 3. reg + 14-bit signed offset left-shifted by 2
9769	// 4. reg1 + reg2
9770	// TODO: Add more checks after support vector extension.
9771
9772	// No global is ever allowed as a base.
9773	if (AM.BaseGV)
9774	return false;
9775
9776	// Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9777	// with `UAL` feature.
9778	if (!isInt<`12`>(x: AM.BaseOffs) &&
9779	!(isShiftedInt<`14`, `2`>(x: AM.BaseOffs) && Subtarget.hasUAL()))
9780	return false;
9781
9782	switch (AM.Scale) {
9783	case `0`:
9784	// "r+i" or just "i", depending on HasBaseReg.
9785	break;
9786	case `1`:
9787	// "r+r+i" is not allowed.
9788	if (AM.HasBaseReg && AM.BaseOffs)
9789	return false;
9790	// Otherwise we have "r+r" or "r+i".
9791	break;
9792	case `2`:
9793	// "2r+r" or "2r+i" is not allowed.
9794	if (AM.HasBaseReg \|\| AM.BaseOffs)
9795	return false;
9796	// Allow "2r" as "r+r".*
9797	break;
9798	default:
9799	return false;
9800	}
9801
9802	return true;
9803	}
9804
9805	bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
9806	return isInt<`12`>(x: Imm);
9807	}
9808
9809	bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
9810	return isInt<`12`>(x: Imm);
9811	}
9812
9813	bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
9814	// Zexts are free if they can be combined with a load.
9815	// Don't advertise i32->i64 zextload as being free for LA64. It interacts
9816	// poorly with type legalization of compares preferring sext.
9817	if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9818	EVT MemVT = LD->getMemoryVT();
9819	if ((MemVT == MVT::i8 \|\| MemVT == MVT::i16) &&
9820	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
9821	LD->getExtensionType() == ISD::ZEXTLOAD))
9822	return true;
9823	}
9824
9825	return TargetLowering::isZExtFree(Val, VT2);
9826	}
9827
9828	bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
9829	EVT DstVT) const {
9830	return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9831	}
9832
9833	bool LoongArchTargetLowering::signExtendConstant(const ConstantInt CI) const* {
9834	return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: `32`);
9835	}
9836
9837	bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
9838	// TODO: Support vectors.
9839	if (Y.getValueType().isVector())
9840	return false;
9841
9842	return !isa<ConstantSDNode>(Val: Y);
9843	}
9844
9845	ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
9846	// LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9847	return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9848	}
9849
9850	bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
9851	Type Ty, bool* IsSigned) const {
9852	if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: `32`))
9853	return true;
9854
9855	return IsSigned;
9856	}
9857
9858	bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
9859	// Return false to suppress the unnecessary extensions if the LibCall
9860	// arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9861	if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9862	Type.getSizeInBits() < Subtarget.getGRLen()))
9863	return false;
9864	return true;
9865	}
9866
9867	// memcpy, and other memory intrinsics, typically tries to use wider load/store
9868	// if the source/dest is aligned and the copy size is large enough. We therefore
9869	// want to align such objects passed to memory intrinsics.
9870	bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
9871	unsigned &MinSize,
9872	Align &PrefAlign) const {
9873	if (!isa<MemIntrinsic>(Val: CI))
9874	return false;
9875
9876	if (Subtarget.is64Bit()) {
9877	MinSize = `8`;
9878	PrefAlign = Align (`8`);
9879	} else {
9880	MinSize = `4`;
9881	PrefAlign = Align (`4`);
9882	}
9883
9884	return true;
9885	}
9886
9887	TargetLoweringBase::LegalizeTypeAction
9888	LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
9889	if (!VT.isScalableVector() && VT.getVectorNumElements() != `1` &&
9890	VT.getVectorElementType() != MVT::i1)
9891	return TypeWidenVector;
9892
9893	return TargetLoweringBase::getPreferredVectorAction(VT);
9894	}
9895
9896	bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9897	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9898	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9899	bool IsABIRegCopy = CC.has_value();
9900	EVT ValueVT = Val.getValueType();
9901
9902	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
9903	PartVT == MVT::f32) {
9904	// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9905	// nan, and cast to f32.
9906	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
9907	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
9908	Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
9909	N2: DAG.getConstant(Val: `0xFFFF0000`, DL, VT: MVT::i32));
9910	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val);
9911	Parts[`0`] = Val;
9912	return true;
9913	}
9914
9915	return false;
9916	}
9917
9918	SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9919	SelectionDAG &DAG, const SDLoc &DL, const SDValue Parts, unsigned* NumParts,
9920	MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9921	bool IsABIRegCopy = CC.has_value();
9922
9923	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
9924	PartVT == MVT::f32) {
9925	SDValue Val = Parts[`0`];
9926
9927	// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9928	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
9929	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
9930	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
9931	return Val;
9932	}
9933
9934	return SDValue ();
9935	}
9936
9937	MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9938	CallingConv::ID CC,
9939	EVT VT) const {
9940	// Use f32 to pass f16.
9941	if (VT == MVT::f16 && Subtarget.hasBasicF())
9942	return MVT::f32;
9943
9944	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
9945	}
9946
9947	unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9948	LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9949	// Use f32 to pass f16.
9950	if (VT == MVT::f16 && Subtarget.hasBasicF())
9951	return `1`;
9952
9953	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
9954	}
9955
9956	bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
9957	SDValue Op, const APInt &OriginalDemandedBits,
9958	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9959	unsigned Depth) const {
9960	EVT VT = Op.getValueType();
9961	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9962	unsigned Opc = Op.getOpcode();
9963	switch (Opc) {
9964	default:
9965	break;
9966	case LoongArchISD::VMSKLTZ:
9967	case LoongArchISD::XVMSKLTZ: {
9968	SDValue Src = Op.getOperand(i: `0`);
9969	MVT SrcVT = Src.getSimpleValueType();
9970	unsigned SrcBits = SrcVT.getScalarSizeInBits();
9971	unsigned NumElts = SrcVT.getVectorNumElements();
9972
9973	// If we don't need the sign bits at all just return zero.
9974	if (OriginalDemandedBits.countr_zero() >= NumElts)
9975	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
9976
9977	// Only demand the vector elements of the sign bits we need.
9978	APInt KnownUndef, KnownZero;
9979	APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts);
9980	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
9981	TLO, Depth: Depth + `1`))
9982	return true;
9983
9984	Known.Zero = KnownZero.zext(width: BitWidth);
9985	Known.Zero.setHighBits(BitWidth - NumElts);
9986
9987	// [X]VMSKLTZ only uses the MSB from each vector element.
9988	KnownBits KnownSrc;
9989	APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits);
9990	if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO,
9991	Depth: Depth + `1`))
9992	return true;
9993
9994	if (KnownSrc.One [SrcBits - `1`])
9995	Known.One.setLowBits(NumElts);
9996	else if (KnownSrc.Zero [SrcBits - `1`])
9997	Known.Zero.setLowBits(NumElts);
9998
9999	// Attempt to avoid multi-use ops if we don't need anything from it.
10000	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
10001	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
10002	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT, Operand: NewSrc));
10003	return false;
10004	}
10005	}
10006
10007	return TargetLowering::SimplifyDemandedBitsForTargetNode(
10008	Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
10009	}
10010
10011	bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
10012	unsigned Opc = VecOp.getOpcode();
10013
10014	// Assume target opcodes can't be scalarized.
10015	// TODO - do we have any exceptions?
10016	if (Opc >= ISD::BUILTIN_OP_END \|\| !isBinOp(Opcode: Opc))
10017	return false;
10018
10019	// If the vector op is not supported, try to convert to scalar.
10020	EVT VecVT = VecOp.getValueType();
10021	if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
10022	return true;
10023
10024	// If the vector op is supported, but the scalar op is not, the transform may
10025	// not be worthwhile.
10026	EVT ScalarVT = VecVT.getScalarType();
10027	return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
10028	}
10029
10030	bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
10031	unsigned Index) const {
10032	if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
10033	return false;
10034
10035	// Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10036	return Index == `0`;
10037	}
10038
10039	bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
10040	unsigned Index) const {
10041	EVT EltVT = VT.getScalarType();
10042
10043	// Extract a scalar FP value from index 0 of a vector is free.
10044	return (EltVT == MVT::f32 \|\| EltVT == MVT::f64) && Index == `0`;
10045	}
10046

Browse the source code of llvm_projects/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp