LoongArchISelLowering.cpp source code [llvm_projects/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp]

1	//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that LoongArch uses to lower LLVM code into
10	// a selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "LoongArchISelLowering.h"
15	#include "LoongArch.h"
16	#include "LoongArchMachineFunctionInfo.h"
17	#include "LoongArchRegisterInfo.h"
18	#include "LoongArchSubtarget.h"
19	#include "MCTargetDesc/LoongArchBaseInfo.h"
20	#include "MCTargetDesc/LoongArchMCTargetDesc.h"
21	#include "llvm/ADT/SmallSet.h"
22	#include "llvm/ADT/Statistic.h"
23	#include "llvm/ADT/StringExtras.h"
24	#include "llvm/CodeGen/ISDOpcodes.h"
25	#include "llvm/CodeGen/RuntimeLibcallUtil.h"
26	#include "llvm/CodeGen/SelectionDAGNodes.h"
27	#include "llvm/IR/IRBuilder.h"
28	#include "llvm/IR/IntrinsicInst.h"
29	#include "llvm/IR/IntrinsicsLoongArch.h"
30	#include "llvm/Support/CodeGen.h"
31	#include "llvm/Support/Debug.h"
32	#include "llvm/Support/ErrorHandling.h"
33	#include "llvm/Support/KnownBits.h"
34	#include "llvm/Support/MathExtras.h"
35	#include <llvm/Analysis/VectorUtils.h>
36
37	using namespace llvm;
38
39	#define DEBUG_TYPE "loongarch-isel-lowering"
40
41	STATISTIC(NumTailCalls, "Number of tail calls");
42
43	static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
44	cl::desc ("Trap on integer division by zero."),
45	cl::init(Val: false));
46
47	LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
48	const LoongArchSubtarget &STI)
49	: TargetLowering (TM), Subtarget(STI) {
50
51	MVT GRLenVT = Subtarget.getGRLenVT();
52
53	// Set up the register classes.
54
55	addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
56	if (Subtarget.hasBasicF())
57	addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
58	if (Subtarget.hasBasicD())
59	addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
60
61	static const MVT::SimpleValueType LSXVTs[] = {
62	MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
63	static const MVT::SimpleValueType LASXVTs[] = {
64	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
65
66	if (Subtarget.hasExtLSX())
67	for (MVT VT : LSXVTs)
68	addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
69
70	if (Subtarget.hasExtLASX())
71	for (MVT VT : LASXVTs)
72	addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
73
74	// Set operations for LA32 and LA64.
75
76	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
77	MemVT: MVT::i1, Action: Promote);
78
79	setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
80	setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
81	setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
82	setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
83	setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
84	setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
85
86	setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
87	ISD::JumpTable, ISD::GlobalTLSAddress},
88	VT: GRLenVT, Action: Custom);
89
90	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
91
92	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
93	setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
94	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
95	setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
96
97	setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
98	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
99
100	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
101	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
102	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
103
104	setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
105
106	// BITREV/REVB requires the 32S feature.
107	if (STI.has32S()) {
108	// Expand bitreverse.i16 with native-width bitrev and shift for now, before
109	// we get to know which of sll and revb.2h is faster.
110	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
111	setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
112
113	// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
114	// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
115	// and i32 could still be byte-swapped relatively cheaply.
116	setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
117	} else {
118	setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand);
119	setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand);
120	setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand);
121	setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand);
122	setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom);
123	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
124	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
125	}
126
127	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
128	setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
129	setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
130	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
131	setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
132
133	setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
134	setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
135
136	// Set operations for LA64 only.
137
138	if (Subtarget.is64Bit()) {
139	setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
140	setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
141	setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
142	setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
143	setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
144	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
145	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
146	setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
147	setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
148	setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
149	setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
150	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
151	setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
152	setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
153	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
154	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
155	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
156
157	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
158	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
159	setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32,
160	Action: Custom);
161	setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
162	}
163
164	// Set operations for LA32 only.
165
166	if (!Subtarget.is64Bit()) {
167	setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
168	setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
169	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
170	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
171	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
172	if (Subtarget.hasBasicD())
173	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
174	}
175
176	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
177
178	static const ISD::CondCode FPCCToExpand[] = {
179	ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
180	ISD::SETGE, ISD::SETNE, ISD::SETGT};
181
182	// Set operations for 'F' feature.
183
184	if (Subtarget.hasBasicF()) {
185	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
186	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
187	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
188	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
189	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
190
191	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
192	setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
193	setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
194	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
195	setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
196	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
197	setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
198	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
199	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
200	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
201	setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
202	setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
203	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
204	setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
205	setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand);
206	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32,
207	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
208	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32,
209	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
210	setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
211	setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
212	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
213
214	if (Subtarget.is64Bit())
215	setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
216
217	if (!Subtarget.hasBasicD()) {
218	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
219	if (Subtarget.is64Bit()) {
220	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
221	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
222	}
223	}
224	}
225
226	// Set operations for 'D' feature.
227
228	if (Subtarget.hasBasicD()) {
229	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
230	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
231	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
232	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
233	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
234	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
235	setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
236
237	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
238	setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
239	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
240	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
241	setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
242	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
243	setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
244	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
245	setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
246	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
247	setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
248	setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
249	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
250	setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
251	setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand);
252	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
253	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64,
254	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
255	setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
256	setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
257	Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
258
259	if (Subtarget.is64Bit())
260	setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
261	}
262
263	// Set operations for 'LSX' feature.
264
265	if (Subtarget.hasExtLSX()) {
266	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
267	// Expand all truncating stores and extending loads.
268	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
269	setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
270	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
271	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
272	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
273	}
274	// By default everything must be expanded. Then we will selectively turn
275	// on ones that can be effectively codegen'd.
276	for (unsigned Op = `0`; Op < ISD::BUILTIN_OP_END; ++Op)
277	setOperationAction(Op, VT, Action: Expand);
278	}
279
280	for (MVT VT : LSXVTs) {
281	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
282	setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
283	setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
284
285	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
286	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
287	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
288
289	setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
290	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
291	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
292	}
293	for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
294	setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
295	setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
296	Action: Legal);
297	setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
298	VT, Action: Legal);
299	setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
300	setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
301	setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
302	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
303	setCondCodeAction(
304	CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
305	Action: Expand);
306	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
307	setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
308	setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
309	}
310	for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
311	setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
312	for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
313	setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
314	for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
315	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
316	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
317	}
318	for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
319	setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
320	setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
321	setOperationAction(Op: ISD::FMA, VT, Action: Legal);
322	setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
323	setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
324	setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
325	ISD::SETUGE, ISD::SETUGT},
326	VT, Action: Expand);
327	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
328	}
329	setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal);
330	setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal);
331	setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal);
332	setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal);
333	setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal);
334
335	for (MVT VT :
336	{MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
337	MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
338	setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom);
339	}
340	}
341
342	// Set operations for 'LASX' feature.
343
344	if (Subtarget.hasExtLASX()) {
345	for (MVT VT : LASXVTs) {
346	setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
347	setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
348	setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
349
350	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
351	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
352	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
353	setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Legal);
354
355	setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
356	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
357	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
358	}
359	for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
360	setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
361	setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
362	Action: Legal);
363	setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
364	VT, Action: Legal);
365	setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
366	setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
367	setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
368	setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
369	setCondCodeAction(
370	CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
371	Action: Expand);
372	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
373	setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
374	setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
375	}
376	for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
377	setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
378	for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
379	setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
380	for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
381	setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
382	setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
383	}
384	for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
385	setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
386	setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
387	setOperationAction(Op: ISD::FMA, VT, Action: Legal);
388	setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
389	setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
390	setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
391	ISD::SETUGE, ISD::SETUGT},
392	VT, Action: Expand);
393	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
394	}
395	}
396
397	// Set DAG combine for LA32 and LA64.
398
399	setTargetDAGCombine(ISD::AND);
400	setTargetDAGCombine(ISD::OR);
401	setTargetDAGCombine(ISD::SRL);
402	setTargetDAGCombine(ISD::SETCC);
403
404	// Set DAG combine for 'LSX' feature.
405
406	if (Subtarget.hasExtLSX()) {
407	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
408	setTargetDAGCombine(ISD::BITCAST);
409	}
410
411	// Compute derived properties from the register classes.
412	computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
413
414	setStackPointerRegisterToSaveRestore(LoongArch::R3);
415
416	setBooleanContents(ZeroOrOneBooleanContent);
417	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
418
419	setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
420
421	setMinCmpXchgSizeInBits(`32`);
422
423	// Function alignments.
424	setMinFunctionAlignment(Align (`4`));
425	// Set preferred alignments.
426	setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
427	setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
428	setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
429
430	// cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
431	if (Subtarget.hasLAMCAS())
432	setMinCmpXchgSizeInBits(`8`);
433
434	if (Subtarget.hasSCQ()) {
435	setMaxAtomicSizeInBitsSupported(`128`);
436	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom);
437	}
438	}
439
440	bool LoongArchTargetLowering::isOffsetFoldingLegal(
441	const GlobalAddressSDNode GA) const* {
442	// In order to maximise the opportunity for common subexpression elimination,
443	// keep a separate ADD node for the global address offset instead of folding
444	// it in the global address node. Later peephole optimisations may choose to
445	// fold it back in when profitable.
446	return false;
447	}
448
449	SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
450	SelectionDAG &DAG) const {
451	switch (Op.getOpcode()) {
452	case ISD::ATOMIC_FENCE:
453	return lowerATOMIC_FENCE(Op, DAG);
454	case ISD::EH_DWARF_CFA:
455	return lowerEH_DWARF_CFA(Op, DAG);
456	case ISD::GlobalAddress:
457	return lowerGlobalAddress(Op, DAG);
458	case ISD::GlobalTLSAddress:
459	return lowerGlobalTLSAddress(Op, DAG);
460	case ISD::INTRINSIC_WO_CHAIN:
461	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
462	case ISD::INTRINSIC_W_CHAIN:
463	return lowerINTRINSIC_W_CHAIN(Op, DAG);
464	case ISD::INTRINSIC_VOID:
465	return lowerINTRINSIC_VOID(Op, DAG);
466	case ISD::BlockAddress:
467	return lowerBlockAddress(Op, DAG);
468	case ISD::JumpTable:
469	return lowerJumpTable(Op, DAG);
470	case ISD::SHL_PARTS:
471	return lowerShiftLeftParts(Op, DAG);
472	case ISD::SRA_PARTS:
473	return lowerShiftRightParts(Op, DAG, IsSRA: true);
474	case ISD::SRL_PARTS:
475	return lowerShiftRightParts(Op, DAG, IsSRA: false);
476	case ISD::ConstantPool:
477	return lowerConstantPool(Op, DAG);
478	case ISD::FP_TO_SINT:
479	return lowerFP_TO_SINT(Op, DAG);
480	case ISD::BITCAST:
481	return lowerBITCAST(Op, DAG);
482	case ISD::UINT_TO_FP:
483	return lowerUINT_TO_FP(Op, DAG);
484	case ISD::SINT_TO_FP:
485	return lowerSINT_TO_FP(Op, DAG);
486	case ISD::VASTART:
487	return lowerVASTART(Op, DAG);
488	case ISD::FRAMEADDR:
489	return lowerFRAMEADDR(Op, DAG);
490	case ISD::RETURNADDR:
491	return lowerRETURNADDR(Op, DAG);
492	case ISD::WRITE_REGISTER:
493	return lowerWRITE_REGISTER(Op, DAG);
494	case ISD::INSERT_VECTOR_ELT:
495	return lowerINSERT_VECTOR_ELT(Op, DAG);
496	case ISD::EXTRACT_VECTOR_ELT:
497	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
498	case ISD::BUILD_VECTOR:
499	return lowerBUILD_VECTOR(Op, DAG);
500	case ISD::VECTOR_SHUFFLE:
501	return lowerVECTOR_SHUFFLE(Op, DAG);
502	case ISD::BITREVERSE:
503	return lowerBITREVERSE(Op, DAG);
504	case ISD::SCALAR_TO_VECTOR:
505	return lowerSCALAR_TO_VECTOR(Op, DAG);
506	case ISD::PREFETCH:
507	return lowerPREFETCH(Op, DAG);
508	case ISD::SELECT:
509	return lowerSELECT(Op, DAG);
510	case ISD::FP_TO_FP16:
511	return lowerFP_TO_FP16(Op, DAG);
512	case ISD::FP16_TO_FP:
513	return lowerFP16_TO_FP(Op, DAG);
514	case ISD::FP_TO_BF16:
515	return lowerFP_TO_BF16(Op, DAG);
516	case ISD::BF16_TO_FP:
517	return lowerBF16_TO_FP(Op, DAG);
518	}
519	return SDValue ();
520	}
521
522	SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
523	SelectionDAG &DAG) const {
524	unsigned IsData = Op.getConstantOperandVal(i: `4`);
525
526	// We don't support non-data prefetch.
527	// Just preserve the chain.
528	if (!IsData)
529	return Op.getOperand(i: `0`);
530
531	return Op;
532	}
533
534	// Return true if Val is equal to (setcc LHS, RHS, CC).
535	// Return false if Val is the inverse of (setcc LHS, RHS, CC).
536	// Otherwise, return std::nullopt.
537	static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
538	ISD::CondCode CC, SDValue Val) {
539	assert(Val->getOpcode() == ISD::SETCC);
540	SDValue LHS2 = Val.getOperand(i: `0`);
541	SDValue RHS2 = Val.getOperand(i: `1`);
542	ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: `2`))->get();
543
544	if (LHS == LHS2 && RHS == RHS2) {
545	if (CC == CC2)
546	return true;
547	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
548	return false;
549	} else if (LHS == RHS2 && RHS == LHS2) {
550	CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
551	if (CC == CC2)
552	return true;
553	if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
554	return false;
555	}
556
557	return std::nullopt;
558	}
559
560	static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
561	const LoongArchSubtarget &Subtarget) {
562	SDValue CondV = N->getOperand(Num: `0`);
563	SDValue TrueV = N->getOperand(Num: `1`);
564	SDValue FalseV = N->getOperand(Num: `2`);
565	MVT VT = N->getSimpleValueType(ResNo: `0`);
566	SDLoc DL(N);
567
568	// (select c, -1, y) -> -c \| y
569	if (isAllOnesConstant(V: TrueV)) {
570	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
571	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
572	}
573	// (select c, y, -1) -> (c-1) \| y
574	if (isAllOnesConstant(V: FalseV)) {
575	SDValue Neg =
576	DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
577	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
578	}
579
580	// (select c, 0, y) -> (c-1) & y
581	if (isNullConstant(V: TrueV)) {
582	SDValue Neg =
583	DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
584	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
585	}
586	// (select c, y, 0) -> -c & y
587	if (isNullConstant(V: FalseV)) {
588	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
589	return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
590	}
591
592	// select c, ~x, x --> xor -c, x
593	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
594	const APInt &TrueVal = TrueV ->getAsAPIntVal();
595	const APInt &FalseVal = FalseV ->getAsAPIntVal();
596	if (~TrueVal == FalseVal) {
597	SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
598	return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
599	}
600	}
601
602	// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
603	// when both truev and falsev are also setcc.
604	if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
605	FalseV.getOpcode() == ISD::SETCC) {
606	SDValue LHS = CondV.getOperand(i: `0`);
607	SDValue RHS = CondV.getOperand(i: `1`);
608	ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
609
610	// (select x, x, y) -> x \| y
611	// (select !x, x, y) -> x & y
612	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
613	return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
614	N2: DAG.getFreeze(V: FalseV));
615	}
616	// (select x, y, x) -> x & y
617	// (select !x, y, x) -> x \| y
618	if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
619	return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
620	N1: DAG.getFreeze(V: TrueV), N2: FalseV);
621	}
622	}
623
624	return SDValue ();
625	}
626
627	// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
628	// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
629	// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
630	// being `0` or `-1`. In such cases we can replace `select` with `and`.
631	// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
632	// than `c0`?
633	static SDValue
634	foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
635	const LoongArchSubtarget &Subtarget) {
636	unsigned SelOpNo = `0`;
637	SDValue Sel = BO->getOperand(Num: `0`);
638	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
639	SelOpNo = `1`;
640	Sel = BO->getOperand(Num: `1`);
641	}
642
643	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
644	return SDValue ();
645
646	unsigned ConstSelOpNo = `1`;
647	unsigned OtherSelOpNo = `2`;
648	if (!isa<ConstantSDNode>(Val: Sel ->getOperand(Num: ConstSelOpNo))) {
649	ConstSelOpNo = `2`;
650	OtherSelOpNo = `1`;
651	}
652	SDValue ConstSelOp = Sel ->getOperand(Num: ConstSelOpNo);
653	ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
654	if (!ConstSelOpNode \|\| ConstSelOpNode->isOpaque())
655	return SDValue ();
656
657	SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ `1`);
658	ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
659	if (!ConstBinOpNode \|\| ConstBinOpNode->isOpaque())
660	return SDValue ();
661
662	SDLoc DL(Sel);
663	EVT VT = BO->getValueType(ResNo: `0`);
664
665	SDValue NewConstOps[`2`] = {ConstSelOp, ConstBinOp};
666	if (SelOpNo == `1`)
667	std::swap(a&: NewConstOps[`0`], b&: NewConstOps[`1`]);
668
669	SDValue NewConstOp =
670	DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
671	if (!NewConstOp)
672	return SDValue ();
673
674	const APInt &NewConstAPInt = NewConstOp ->getAsAPIntVal();
675	if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
676	return SDValue ();
677
678	SDValue OtherSelOp = Sel ->getOperand(Num: OtherSelOpNo);
679	SDValue NewNonConstOps[`2`] = {OtherSelOp, ConstBinOp};
680	if (SelOpNo == `1`)
681	std::swap(a&: NewNonConstOps[`0`], b&: NewNonConstOps[`1`]);
682	SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
683
684	SDValue NewT = (ConstSelOpNo == `1`) ? NewConstOp : NewNonConstOp;
685	SDValue NewF = (ConstSelOpNo == `1`) ? NewNonConstOp : NewConstOp;
686	return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: `0`), LHS: NewT, RHS: NewF);
687	}
688
689	// Changes the condition code and swaps operands if necessary, so the SetCC
690	// operation matches one of the comparisons supported directly by branches
691	// in the LoongArch ISA. May adjust compares to favor compare with 0 over
692	// compare with 1/-1.
693	static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
694	ISD::CondCode &CC, SelectionDAG &DAG) {
695	// If this is a single bit test that can't be handled by ANDI, shift the
696	// bit to be tested to the MSB and perform a signed compare with 0.
697	if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
698	LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
699	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`))) {
700	uint64_t Mask = LHS.getConstantOperandVal(i: `1`);
701	if ((isPowerOf2_64(Value: Mask) \|\| isMask_64(Value: Mask)) && !isInt<`12`>(x: Mask)) {
702	unsigned ShAmt = `0`;
703	if (isPowerOf2_64(Value: Mask)) {
704	CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
705	ShAmt = LHS.getValueSizeInBits() - `1` - Log2_64(Value: Mask);
706	} else {
707	ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
708	}
709
710	LHS = LHS.getOperand(i: `0`);
711	if (ShAmt != `0`)
712	LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
713	N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
714	return;
715	}
716	}
717
718	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
719	int64_t C = RHSC->getSExtValue();
720	switch (CC) {
721	default:
722	break;
723	case ISD::SETGT:
724	// Convert X > -1 to X >= 0.
725	if (C == -`1`) {
726	RHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
727	CC = ISD::SETGE;
728	return;
729	}
730	break;
731	case ISD::SETLT:
732	// Convert X < 1 to 0 >= X.
733	if (C == `1`) {
734	RHS = LHS;
735	LHS = DAG.getConstant(Val: `0`, DL, VT: RHS.getValueType());
736	CC = ISD::SETGE;
737	return;
738	}
739	break;
740	}
741	}
742
743	switch (CC) {
744	default:
745	break;
746	case ISD::SETGT:
747	case ISD::SETLE:
748	case ISD::SETUGT:
749	case ISD::SETULE:
750	CC = ISD::getSetCCSwappedOperands(Operation: CC);
751	std::swap(a&: LHS, b&: RHS);
752	break;
753	}
754	}
755
756	SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
757	SelectionDAG &DAG) const {
758	SDValue CondV = Op.getOperand(i: `0`);
759	SDValue TrueV = Op.getOperand(i: `1`);
760	SDValue FalseV = Op.getOperand(i: `2`);
761	SDLoc DL(Op);
762	MVT VT = Op.getSimpleValueType();
763	MVT GRLenVT = Subtarget.getGRLenVT();
764
765	if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
766	return V;
767
768	if (Op.hasOneUse()) {
769	unsigned UseOpc = Op ->user_begin()->getOpcode();
770	if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
771	SDNode BinOp = Op ->user_begin();
772	if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op ->user_begin(),
773	DAG, Subtarget)) {
774	DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
775	// Opcode check is necessary because foldBinOpIntoSelectIfProfitable
776	// may return a constant node and cause crash in lowerSELECT.
777	if (NewSel.getOpcode() == ISD::SELECT)
778	return lowerSELECT(Op: NewSel, DAG);
779	return NewSel;
780	}
781	}
782	}
783
784	// If the condition is not an integer SETCC which operates on GRLenVT, we need
785	// to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
786	// (select condv, truev, falsev)
787	// -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
788	if (CondV.getOpcode() != ISD::SETCC \|\|
789	CondV.getOperand(i: `0`).getSimpleValueType() != GRLenVT) {
790	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: GRLenVT);
791	SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
792
793	SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
794
795	return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
796	}
797
798	// If the CondV is the output of a SETCC node which operates on GRLenVT
799	// inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
800	// to take advantage of the integer compare+branch instructions. i.e.: (select
801	// (setcc lhs, rhs, cc), truev, falsev)
802	// -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
803	SDValue LHS = CondV.getOperand(i: `0`);
804	SDValue RHS = CondV.getOperand(i: `1`);
805	ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: `2`))->get();
806
807	// Special case for a select of 2 constants that have a difference of 1.
808	// Normally this is done by DAGCombine, but if the select is introduced by
809	// type legalization or op legalization, we miss it. Restricting to SETLT
810	// case for now because that is what signed saturating add/sub need.
811	// FIXME: We don't need the condition to be SETLT or even a SETCC,
812	// but we would probably want to swap the true/false values if the condition
813	// is SETGE/SETLE to avoid an XORI.
814	if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
815	CCVal == ISD::SETLT) {
816	const APInt &TrueVal = TrueV ->getAsAPIntVal();
817	const APInt &FalseVal = FalseV ->getAsAPIntVal();
818	if (TrueVal - `1` == FalseVal)
819	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
820	if (TrueVal + `1` == FalseVal)
821	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
822	}
823
824	translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
825	// 1 < x ? x : 1 -> 0 < x ? x : 1
826	if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT \|\| CCVal == ISD::SETULT) &&
827	RHS == TrueV && LHS == FalseV) {
828	LHS = DAG.getConstant(Val: `0`, DL, VT);
829	// 0 <u x is the same as x != 0.
830	if (CCVal == ISD::SETULT) {
831	std::swap(a&: LHS, b&: RHS);
832	CCVal = ISD::SETNE;
833	}
834	}
835
836	// x <s -1 ? x : -1 -> x <s 0 ? x : -1
837	if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
838	RHS == FalseV) {
839	RHS = DAG.getConstant(Val: `0`, DL, VT);
840	}
841
842	SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
843
844	if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
845	// (select (setcc lhs, rhs, CC), constant, falsev)
846	// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
847	std::swap(a&: TrueV, b&: FalseV);
848	TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
849	}
850
851	SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
852	return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
853	}
854
855	SDValue
856	LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
857	SelectionDAG &DAG) const {
858	SDLoc DL(Op);
859	MVT OpVT = Op.getSimpleValueType();
860
861	SDValue Vector = DAG.getUNDEF(VT: OpVT);
862	SDValue Val = Op.getOperand(i: `0`);
863	SDValue Idx = DAG.getConstant(Val: `0`, DL, VT: Subtarget.getGRLenVT());
864
865	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx);
866	}
867
868	SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
869	SelectionDAG &DAG) const {
870	EVT ResTy = Op ->getValueType(ResNo: `0`);
871	SDValue Src = Op ->getOperand(Num: `0`);
872	SDLoc DL(Op);
873
874	EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
875	unsigned int OrigEltNum = ResTy.getVectorNumElements();
876	unsigned int NewEltNum = NewVT.getVectorNumElements();
877
878	SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src);
879
880	SmallVector<SDValue, `8`> Ops;
881	for (unsigned int i = `0`; i < NewEltNum; i++) {
882	SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc,
883	N2: DAG.getConstant(Val: i, DL, VT: MVT::i64));
884	unsigned RevOp = (ResTy == MVT::v16i8 \|\| ResTy == MVT::v32i8)
885	? (unsigned)LoongArchISD::BITREV_8B
886	: (unsigned)ISD::BITREVERSE;
887	Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op));
888	}
889	SDValue Res =
890	DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops));
891
892	switch (ResTy.getSimpleVT().SimpleTy) {
893	default:
894	return SDValue ();
895	case MVT::v16i8:
896	case MVT::v32i8:
897	return Res;
898	case MVT::v8i16:
899	case MVT::v16i16:
900	case MVT::v4i32:
901	case MVT::v8i32: {
902	SmallVector<int, `32`> Mask;
903	for (unsigned int i = `0`; i < NewEltNum; i++)
904	for (int j = OrigEltNum / NewEltNum - `1`; j >= `0`; j--)
905	Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i);
906	return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask);
907	}
908	}
909	}
910
911	// Widen element type to get a new mask value (if possible).
912	// For example:
913	// shufflevector <4 x i32> %a, <4 x i32> %b,
914	// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
915	// is equivalent to:
916	// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
917	// can be lowered to:
918	// VPACKOD_D vr0, vr0, vr1
919	static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
920	SDValue V1, SDValue V2, SelectionDAG &DAG) {
921	unsigned EltBits = VT.getScalarSizeInBits();
922
923	if (EltBits > `32` \|\| EltBits == `1`)
924	return SDValue ();
925
926	SmallVector<int, `8`> NewMask;
927	if (widenShuffleMaskElts(M: Mask, NewMask)) {
928	MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * `2`)
929	: MVT::getIntegerVT(BitWidth: EltBits * `2`);
930	MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / `2`);
931	if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) {
932	SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1);
933	SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2);
934	return DAG.getBitcast(
935	VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask));
936	}
937	}
938
939	return SDValue ();
940	}
941
942	/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
943	/// instruction.
944	// The funciton matches elements from one of the input vector shuffled to the
945	// left or right with zeroable elements 'shifted in'. It handles both the
946	// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
947	// lane.
948	// Mostly copied from X86.
949	static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
950	unsigned ScalarSizeInBits, ArrayRef<int> Mask,
951	int MaskOffset, const APInt &Zeroable) {
952	int Size = Mask.size();
953	unsigned SizeInBits = Size * ScalarSizeInBits;
954
955	auto CheckZeros = [&](int Shift, int Scale, bool Left) {
956	for (int i = `0`; i < Size; i += Scale)
957	for (int j = `0`; j < Shift; ++j)
958	if (!Zeroable [i + j + (Left ? `0` : (Scale - Shift))])
959	return false;
960
961	return true;
962	};
963
964	auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
965	int Step = `1`) {
966	for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
967	if (!(Mask [i] == -`1` \|\| Mask [i] == Low))
968	return false;
969	return true;
970	};
971
972	auto MatchShift = [&](int Shift, int Scale, bool Left) {
973	for (int i = `0`; i != Size; i += Scale) {
974	unsigned Pos = Left ? i + Shift : i;
975	unsigned Low = Left ? i : i + Shift;
976	unsigned Len = Scale - Shift;
977	if (!isSequentialOrUndefInRange (Pos, Len, Low + MaskOffset))
978	return -`1`;
979	}
980
981	int ShiftEltBits = ScalarSizeInBits * Scale;
982	bool ByteShift = ShiftEltBits > `64`;
983	Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
984	: (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
985	int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? `8` : `1`);
986
987	// Normalize the scale for byte shifts to still produce an i64 element
988	// type.
989	Scale = ByteShift ? Scale / `2` : Scale;
990
991	// We need to round trip through the appropriate type for the shift.
992	MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale);
993	ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / `8`)
994	: MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale);
995	return (int)ShiftAmt;
996	};
997
998	unsigned MaxWidth = `128`;
999	for (int Scale = `2`; Scale * ScalarSizeInBits <= MaxWidth; Scale *= `2`)
1000	for (int Shift = `1`; Shift != Scale; ++Shift)
1001	for (bool Left : {true, false})
1002	if (CheckZeros (Shift, Scale, Left)) {
1003	int ShiftAmt = MatchShift (Shift, Scale, Left);
1004	if (`0` < ShiftAmt)
1005	return ShiftAmt;
1006	}
1007
1008	// no match
1009	return -`1`;
1010	}
1011
1012	/// Lower VECTOR_SHUFFLE as shift (if possible).
1013	///
1014	/// For example:
1015	/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1016	/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1017	/// is lowered to:
1018	/// (VBSLL_V $v0, $v0, 4)
1019	///
1020	/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1021	/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1022	/// is lowered to:
1023	/// (VSLLI_D $v0, $v0, 32)
1024	static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1025	MVT VT, SDValue V1, SDValue V2,
1026	SelectionDAG &DAG,
1027	const APInt &Zeroable) {
1028	int Size = Mask.size();
1029	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1030
1031	MVT ShiftVT;
1032	SDValue V = V1;
1033	unsigned Opcode;
1034
1035	// Try to match shuffle against V1 shift.
1036	int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1037	Mask, MaskOffset: `0`, Zeroable);
1038
1039	// If V1 failed, try to match shuffle against V2 shift.
1040	if (ShiftAmt < `0`) {
1041	ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1042	Mask, MaskOffset: Size, Zeroable);
1043	V = V2;
1044	}
1045
1046	if (ShiftAmt < `0`)
1047	return SDValue ();
1048
1049	assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1050	"Illegal integer vector type");
1051	V = DAG.getBitcast(VT: ShiftVT, V);
1052	V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V,
1053	N2: DAG.getConstant(Val: ShiftAmt, DL, VT: MVT::i64));
1054	return DAG.getBitcast(VT, V);
1055	}
1056
1057	/// Determine whether a range fits a regular pattern of values.
1058	/// This function accounts for the possibility of jumping over the End iterator.
1059	template <typename ValType>
1060	static bool
1061	fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1062	unsigned CheckStride,
1063	typename SmallVectorImpl<ValType>::const_iterator End,
1064	ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1065	auto &I = Begin;
1066
1067	while (I != End) {
1068	if (I != -`1` && I != ExpectedIndex)
1069	return false;
1070	ExpectedIndex += ExpectedIndexStride;
1071
1072	// Incrementing past End is undefined behaviour so we must increment one
1073	// step at a time and check for End at each step.
1074	for (unsigned n = `0`; n < CheckStride && I != End; ++n, ++I)
1075	; // Empty loop body.
1076	}
1077	return true;
1078	}
1079
1080	/// Compute whether each element of a shuffle is zeroable.
1081	///
1082	/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1083	static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1084	SDValue V2, APInt &KnownUndef,
1085	APInt &KnownZero) {
1086	int Size = Mask.size();
1087	KnownUndef = KnownZero = APInt::getZero(numBits: Size);
1088
1089	V1 = peekThroughBitcasts(V: V1);
1090	V2 = peekThroughBitcasts(V: V2);
1091
1092	bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode());
1093	bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode());
1094
1095	int VectorSizeInBits = V1.getValueSizeInBits();
1096	int ScalarSizeInBits = VectorSizeInBits / Size;
1097	assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1098	(void)ScalarSizeInBits;
1099
1100	for (int i = `0`; i < Size; ++i) {
1101	int M = Mask [i];
1102	if (M < `0`) {
1103	KnownUndef.setBit(i);
1104	continue;
1105	}
1106	if ((M >= `0` && M < Size && V1IsZero) \|\| (M >= Size && V2IsZero)) {
1107	KnownZero.setBit(i);
1108	continue;
1109	}
1110	}
1111	}
1112
1113	/// Test whether a shuffle mask is equivalent within each sub-lane.
1114	///
1115	/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1116	/// non-trivial to compute in the face of undef lanes. The representation is
1117	/// suitable for use with existing 128-bit shuffles as entries from the second
1118	/// vector have been remapped to [LaneSize, 2LaneSize).*
1119	static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1120	ArrayRef<int> Mask,
1121	SmallVectorImpl<int> &RepeatedMask) {
1122	auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1123	RepeatedMask.assign(NumElts: LaneSize, Elt: -`1`);
1124	int Size = Mask.size();
1125	for (int i = `0`; i < Size; ++i) {
1126	assert(Mask[i] == -`1` \|\| Mask[i] >= `0`);
1127	if (Mask [i] < `0`)
1128	continue;
1129	if ((Mask [i] % Size) / LaneSize != i / LaneSize)
1130	// This entry crosses lanes, so there is no way to model this shuffle.
1131	return false;
1132
1133	// Ok, handle the in-lane shuffles by detecting if and when they repeat.
1134	// Adjust second vector indices to start at LaneSize instead of Size.
1135	int LocalM =
1136	Mask [i] < Size ? Mask [i] % LaneSize : Mask [i] % LaneSize + LaneSize;
1137	if (RepeatedMask [i % LaneSize] < `0`)
1138	// This is the first non-undef entry in this slot of a 128-bit lane.
1139	RepeatedMask [i % LaneSize] = LocalM;
1140	else if (RepeatedMask [i % LaneSize] != LocalM)
1141	// Found a mismatch with the repeated mask.
1142	return false;
1143	}
1144	return true;
1145	}
1146
1147	/// Attempts to match vector shuffle as byte rotation.
1148	static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1149	ArrayRef<int> Mask) {
1150
1151	SDValue Lo, Hi;
1152	SmallVector<int, `16`> RepeatedMask;
1153
1154	if (!isRepeatedShuffleMask(LaneSizeInBits: `128`, VT, Mask, RepeatedMask))
1155	return -`1`;
1156
1157	int NumElts = RepeatedMask.size();
1158	int Rotation = `0`;
1159	int Scale = `16` / NumElts;
1160
1161	for (int i = `0`; i < NumElts; ++i) {
1162	int M = RepeatedMask [i];
1163	assert((M == -`1` \|\| (`0` <= M && M < (`2` * NumElts))) &&
1164	"Unexpected mask index.");
1165	if (M < `0`)
1166	continue;
1167
1168	// Determine where a rotated vector would have started.
1169	int StartIdx = i - (M % NumElts);
1170	if (StartIdx == `0`)
1171	return -`1`;
1172
1173	// If we found the tail of a vector the rotation must be the missing
1174	// front. If we found the head of a vector, it must be how much of the
1175	// head.
1176	int CandidateRotation = StartIdx < `0` ? -StartIdx : NumElts - StartIdx;
1177
1178	if (Rotation == `0`)
1179	Rotation = CandidateRotation;
1180	else if (Rotation != CandidateRotation)
1181	return -`1`;
1182
1183	// Compute which value this mask is pointing at.
1184	SDValue MaskV = M < NumElts ? V1 : V2;
1185
1186	// Compute which of the two target values this index should be assigned
1187	// to. This reflects whether the high elements are remaining or the low
1188	// elements are remaining.
1189	SDValue &TargetV = StartIdx < `0` ? Hi : Lo;
1190
1191	// Either set up this value if we've not encountered it before, or check
1192	// that it remains consistent.
1193	if (!TargetV)
1194	TargetV = MaskV;
1195	else if (TargetV != MaskV)
1196	return -`1`;
1197	}
1198
1199	// Check that we successfully analyzed the mask, and normalize the results.
1200	assert(Rotation != `0` && "Failed to locate a viable rotation!");
1201	assert((Lo \|\| Hi) && "Failed to find a rotated input vector!");
1202	if (!Lo)
1203	Lo = Hi;
1204	else if (!Hi)
1205	Hi = Lo;
1206
1207	V1 = Lo;
1208	V2 = Hi;
1209
1210	return Rotation * Scale;
1211	}
1212
1213	/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1214	///
1215	/// For example:
1216	/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1217	/// <2 x i32> <i32 3, i32 0>
1218	/// is lowered to:
1219	/// (VBSRL_V $v1, $v1, 8)
1220	/// (VBSLL_V $v0, $v0, 8)
1221	/// (VOR_V $v0, $V0, $v1)
1222	static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL,
1223	ArrayRef<int> Mask, MVT VT,
1224	SDValue V1, SDValue V2,
1225	SelectionDAG &DAG) {
1226
1227	SDValue Lo = V1, Hi = V2;
1228	int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask);
1229	if (ByteRotation <= `0`)
1230	return SDValue ();
1231
1232	MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / `8`);
1233	Lo = DAG.getBitcast(VT: ByteVT, V: Lo);
1234	Hi = DAG.getBitcast(VT: ByteVT, V: Hi);
1235
1236	int LoByteShift = `16` - ByteRotation;
1237	int HiByteShift = ByteRotation;
1238
1239	SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo,
1240	N2: DAG.getConstant(Val: LoByteShift, DL, VT: MVT::i64));
1241	SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi,
1242	N2: DAG.getConstant(Val: HiByteShift, DL, VT: MVT::i64));
1243	return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift));
1244	}
1245
1246	/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1247	///
1248	/// For example:
1249	/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1250	/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1251	/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1252	/// is lowered to:
1253	/// (VREPLI $v1, 0)
1254	/// (VILVL $v0, $v1, $v0)
1255	static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1256	ArrayRef<int> Mask, MVT VT,
1257	SDValue V1, SDValue V2,
1258	SelectionDAG &DAG,
1259	const APInt &Zeroable) {
1260	int Bits = VT.getSizeInBits();
1261	int EltBits = VT.getScalarSizeInBits();
1262	int NumElements = VT.getVectorNumElements();
1263
1264	if (Zeroable.isAllOnes())
1265	return DAG.getConstant(Val: `0`, DL, VT);
1266
1267	// Define a helper function to check a particular ext-scale and lower to it if
1268	// valid.
1269	auto Lower = [&](int Scale) -> SDValue {
1270	SDValue InputV;
1271	bool AnyExt = true;
1272	int Offset = `0`;
1273	for (int i = `0`; i < NumElements; i++) {
1274	int M = Mask [i];
1275	if (M < `0`)
1276	continue;
1277	if (i % Scale != `0`) {
1278	// Each of the extended elements need to be zeroable.
1279	if (!Zeroable [i])
1280	return SDValue ();
1281
1282	AnyExt = false;
1283	continue;
1284	}
1285
1286	// Each of the base elements needs to be consecutive indices into the
1287	// same input vector.
1288	SDValue V = M < NumElements ? V1 : V2;
1289	M = M % NumElements;
1290	if (!InputV) {
1291	InputV = V;
1292	Offset = M - (i / Scale);
1293
1294	// These offset can't be handled
1295	if (Offset % (NumElements / Scale))
1296	return SDValue ();
1297	} else if (InputV != V)
1298	return SDValue ();
1299
1300	if (M != (Offset + (i / Scale)))
1301	return SDValue (); // Non-consecutive strided elements.
1302	}
1303
1304	// If we fail to find an input, we have a zero-shuffle which should always
1305	// have already been handled.
1306	if (!InputV)
1307	return SDValue ();
1308
1309	do {
1310	unsigned VilVLoHi = LoongArchISD::VILVL;
1311	if (Offset >= (NumElements / `2`)) {
1312	VilVLoHi = LoongArchISD::VILVH;
1313	Offset -= (NumElements / `2`);
1314	}
1315
1316	MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements);
1317	SDValue Ext =
1318	AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: `0`, DL, VT: InputVT);
1319	InputV = DAG.getBitcast(VT: InputVT, V: InputV);
1320	InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV);
1321	Scale /= `2`;
1322	EltBits *= `2`;
1323	NumElements /= `2`;
1324	} while (Scale > `1`);
1325	return DAG.getBitcast(VT, V: InputV);
1326	};
1327
1328	// Each iteration, try extending the elements half as much, but into twice as
1329	// many elements.
1330	for (int NumExtElements = Bits / `64`; NumExtElements < NumElements;
1331	NumExtElements *= `2`) {
1332	if (SDValue V = Lower (NumElements / NumExtElements))
1333	return V;
1334	}
1335	return SDValue ();
1336	}
1337
1338	/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1339	///
1340	/// VREPLVEI performs vector broadcast based on an element specified by an
1341	/// integer immediate, with its mask being similar to:
1342	/// <x, x, x, ...>
1343	/// where x is any valid index.
1344	///
1345	/// When undef's appear in the mask they are treated as if they were whatever
1346	/// value is necessary in order to fit the above form.
1347	static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
1348	MVT VT, SDValue V1, SDValue V2,
1349	SelectionDAG &DAG) {
1350	int SplatIndex = -`1`;
1351	for (const auto &M : Mask) {
1352	if (M != -`1`) {
1353	SplatIndex = M;
1354	break;
1355	}
1356	}
1357
1358	if (SplatIndex == -`1`)
1359	return DAG.getUNDEF(VT);
1360
1361	assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1362	if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: `1`, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: `0`)) {
1363	APInt Imm(`64`, SplatIndex);
1364	return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1365	N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1366	}
1367
1368	return SDValue ();
1369	}
1370
1371	/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1372	///
1373	/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1374	/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1375	///
1376	/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1377	/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1378	/// When undef's appear they are treated as if they were whatever value is
1379	/// necessary in order to fit the above forms.
1380	///
1381	/// For example:
1382	/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1383	/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1384	/// i32 7, i32 6, i32 5, i32 4>
1385	/// is lowered to:
1386	/// (VSHUF4I_H $v0, $v1, 27)
1387	/// where the 27 comes from:
1388	/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1389	static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1390	MVT VT, SDValue V1, SDValue V2,
1391	SelectionDAG &DAG) {
1392
1393	unsigned SubVecSize = `4`;
1394	if (VT == MVT::v2f64 \|\| VT == MVT::v2i64)
1395	SubVecSize = `2`;
1396
1397	int SubMask[`4`] = {-`1`, -`1`, -`1`, -`1`};
1398	for (unsigned i = `0`; i < SubVecSize; ++i) {
1399	for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1400	int M = Mask [j];
1401
1402	// Convert from vector index to 4-element subvector index
1403	// If an index refers to an element outside of the subvector then give up
1404	if (M != -`1`) {
1405	M -= `4` * (j / SubVecSize);
1406	if (M < `0` \|\| M >= `4`)
1407	return SDValue ();
1408	}
1409
1410	// If the mask has an undef, replace it with the current index.
1411	// Note that it might still be undef if the current index is also undef
1412	if (SubMask[i] == -`1`)
1413	SubMask[i] = M;
1414	// Check that non-undef values are the same as in the mask. If they
1415	// aren't then give up
1416	else if (M != -`1` && M != SubMask[i])
1417	return SDValue ();
1418	}
1419	}
1420
1421	// Calculate the immediate. Replace any remaining undefs with zero
1422	APInt Imm(`64`, `0`);
1423	for (int i = SubVecSize - `1`; i >= `0`; --i) {
1424	int M = SubMask[i];
1425
1426	if (M == -`1`)
1427	M = `0`;
1428
1429	Imm <<= `2`;
1430	Imm \|= M & `0x3`;
1431	}
1432
1433	// Return vshuf4i.d
1434	if (VT == MVT::v2f64 \|\| VT == MVT::v2i64)
1435	return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1, N2: V2,
1436	N3: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1437
1438	return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
1439	N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1440	}
1441
1442	/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1443	///
1444	/// VPACKEV interleaves the even elements from each vector.
1445	///
1446	/// It is possible to lower into VPACKEV when the mask consists of two of the
1447	/// following forms interleaved:
1448	/// <0, 2, 4, ...>
1449	/// <n, n+2, n+4, ...>
1450	/// where n is the number of elements in the vector.
1451	/// For example:
1452	/// <0, 0, 2, 2, 4, 4, ...>
1453	/// <0, n, 2, n+2, 4, n+4, ...>
1454	///
1455	/// When undef's appear in the mask they are treated as if they were whatever
1456	/// value is necessary in order to fit the above forms.
1457	static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1458	MVT VT, SDValue V1, SDValue V2,
1459	SelectionDAG &DAG) {
1460
1461	const auto &Begin = Mask.begin();
1462	const auto &End = Mask.end();
1463	SDValue OriV1 = V1, OriV2 = V2;
1464
1465	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
1466	V1 = OriV1;
1467	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
1468	V1 = OriV2;
1469	else
1470	return SDValue ();
1471
1472	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
1473	V2 = OriV1;
1474	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
1475	V2 = OriV2;
1476	else
1477	return SDValue ();
1478
1479	return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
1480	}
1481
1482	/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1483	///
1484	/// VPACKOD interleaves the odd elements from each vector.
1485	///
1486	/// It is possible to lower into VPACKOD when the mask consists of two of the
1487	/// following forms interleaved:
1488	/// <1, 3, 5, ...>
1489	/// <n+1, n+3, n+5, ...>
1490	/// where n is the number of elements in the vector.
1491	/// For example:
1492	/// <1, 1, 3, 3, 5, 5, ...>
1493	/// <1, n+1, 3, n+3, 5, n+5, ...>
1494	///
1495	/// When undef's appear in the mask they are treated as if they were whatever
1496	/// value is necessary in order to fit the above forms.
1497	static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1498	MVT VT, SDValue V1, SDValue V2,
1499	SelectionDAG &DAG) {
1500
1501	const auto &Begin = Mask.begin();
1502	const auto &End = Mask.end();
1503	SDValue OriV1 = V1, OriV2 = V2;
1504
1505	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
1506	V1 = OriV1;
1507	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
1508	V1 = OriV2;
1509	else
1510	return SDValue ();
1511
1512	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
1513	V2 = OriV1;
1514	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
1515	V2 = OriV2;
1516	else
1517	return SDValue ();
1518
1519	return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
1520	}
1521
1522	/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1523	///
1524	/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1525	/// of each vector.
1526	///
1527	/// It is possible to lower into VILVH when the mask consists of two of the
1528	/// following forms interleaved:
1529	/// <x, x+1, x+2, ...>
1530	/// <n+x, n+x+1, n+x+2, ...>
1531	/// where n is the number of elements in the vector and x is half n.
1532	/// For example:
1533	/// <x, x, x+1, x+1, x+2, x+2, ...>
1534	/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1535	///
1536	/// When undef's appear in the mask they are treated as if they were whatever
1537	/// value is necessary in order to fit the above forms.
1538	static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
1539	MVT VT, SDValue V1, SDValue V2,
1540	SelectionDAG &DAG) {
1541
1542	const auto &Begin = Mask.begin();
1543	const auto &End = Mask.end();
1544	unsigned HalfSize = Mask.size() / `2`;
1545	SDValue OriV1 = V1, OriV2 = V2;
1546
1547	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
1548	V1 = OriV1;
1549	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
1550	V1 = OriV2;
1551	else
1552	return SDValue ();
1553
1554	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
1555	V2 = OriV1;
1556	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size() + HalfSize,
1557	ExpectedIndexStride: `1`))
1558	V2 = OriV2;
1559	else
1560	return SDValue ();
1561
1562	return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
1563	}
1564
1565	/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1566	///
1567	/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1568	/// of each vector.
1569	///
1570	/// It is possible to lower into VILVL when the mask consists of two of the
1571	/// following forms interleaved:
1572	/// <0, 1, 2, ...>
1573	/// <n, n+1, n+2, ...>
1574	/// where n is the number of elements in the vector.
1575	/// For example:
1576	/// <0, 0, 1, 1, 2, 2, ...>
1577	/// <0, n, 1, n+1, 2, n+2, ...>
1578	///
1579	/// When undef's appear in the mask they are treated as if they were whatever
1580	/// value is necessary in order to fit the above forms.
1581	static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
1582	MVT VT, SDValue V1, SDValue V2,
1583	SelectionDAG &DAG) {
1584
1585	const auto &Begin = Mask.begin();
1586	const auto &End = Mask.end();
1587	SDValue OriV1 = V1, OriV2 = V2;
1588
1589	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `1`))
1590	V1 = OriV1;
1591	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`))
1592	V1 = OriV2;
1593	else
1594	return SDValue ();
1595
1596	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: `0`, ExpectedIndexStride: `1`))
1597	V2 = OriV1;
1598	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`))
1599	V2 = OriV2;
1600	else
1601	return SDValue ();
1602
1603	return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
1604	}
1605
1606	/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1607	///
1608	/// VPICKEV copies the even elements of each vector into the result vector.
1609	///
1610	/// It is possible to lower into VPICKEV when the mask consists of two of the
1611	/// following forms concatenated:
1612	/// <0, 2, 4, ...>
1613	/// <n, n+2, n+4, ...>
1614	/// where n is the number of elements in the vector.
1615	/// For example:
1616	/// <0, 2, 4, ..., 0, 2, 4, ...>
1617	/// <0, 2, 4, ..., n, n+2, n+4, ...>
1618	///
1619	/// When undef's appear in the mask they are treated as if they were whatever
1620	/// value is necessary in order to fit the above forms.
1621	static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1622	MVT VT, SDValue V1, SDValue V2,
1623	SelectionDAG &DAG) {
1624
1625	const auto &Begin = Mask.begin();
1626	const auto &Mid = Mask.begin() + Mask.size() / `2`;
1627	const auto &End = Mask.end();
1628	SDValue OriV1 = V1, OriV2 = V2;
1629
1630	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
1631	V1 = OriV1;
1632	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
1633	V1 = OriV2;
1634	else
1635	return SDValue ();
1636
1637	if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: `0`, ExpectedIndexStride: `2`))
1638	V2 = OriV1;
1639	else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`))
1640	V2 = OriV2;
1641
1642	else
1643	return SDValue ();
1644
1645	return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
1646	}
1647
1648	/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1649	///
1650	/// VPICKOD copies the odd elements of each vector into the result vector.
1651	///
1652	/// It is possible to lower into VPICKOD when the mask consists of two of the
1653	/// following forms concatenated:
1654	/// <1, 3, 5, ...>
1655	/// <n+1, n+3, n+5, ...>
1656	/// where n is the number of elements in the vector.
1657	/// For example:
1658	/// <1, 3, 5, ..., 1, 3, 5, ...>
1659	/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1660	///
1661	/// When undef's appear in the mask they are treated as if they were whatever
1662	/// value is necessary in order to fit the above forms.
1663	static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1664	MVT VT, SDValue V1, SDValue V2,
1665	SelectionDAG &DAG) {
1666
1667	const auto &Begin = Mask.begin();
1668	const auto &Mid = Mask.begin() + Mask.size() / `2`;
1669	const auto &End = Mask.end();
1670	SDValue OriV1 = V1, OriV2 = V2;
1671
1672	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
1673	V1 = OriV1;
1674	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
1675	V1 = OriV2;
1676	else
1677	return SDValue ();
1678
1679	if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: `1`, ExpectedIndexStride: `2`))
1680	V2 = OriV1;
1681	else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`))
1682	V2 = OriV2;
1683	else
1684	return SDValue ();
1685
1686	return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
1687	}
1688
1689	/// Lower VECTOR_SHUFFLE into VSHUF.
1690	///
1691	/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1692	/// adding it as an operand to the resulting VSHUF.
1693	static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1694	MVT VT, SDValue V1, SDValue V2,
1695	SelectionDAG &DAG) {
1696
1697	SmallVector<SDValue, `16`> Ops;
1698	for (auto M : Mask)
1699	Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64));
1700
1701	EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1702	SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
1703
1704	// VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1705	// <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1706	// VSHF concatenates the vectors in a bitwise fashion:
1707	// <0b00, 0b01> + <0b10, 0b11> ->
1708	// 0b0100 + 0b1110 -> 0b01001110
1709	// <0b10, 0b11, 0b00, 0b01>
1710	// We must therefore swap the operands to get the correct result.
1711	return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
1712	}
1713
1714	/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1715	///
1716	/// This routine breaks down the specific type of 128-bit shuffle and
1717	/// dispatches to the lowering routines accordingly.
1718	static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1719	SDValue V1, SDValue V2, SelectionDAG &DAG) {
1720	assert((VT.SimpleTy == MVT::v16i8 \|\| VT.SimpleTy == MVT::v8i16 \|\|
1721	VT.SimpleTy == MVT::v4i32 \|\| VT.SimpleTy == MVT::v2i64 \|\|
1722	VT.SimpleTy == MVT::v4f32 \|\| VT.SimpleTy == MVT::v2f64) &&
1723	"Vector type is unsupported for lsx!");
1724	assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1725	"Two operands have different types!");
1726	assert(VT.getVectorNumElements() == Mask.size() &&
1727	"Unexpected mask size for shuffle!");
1728	assert(Mask.size() % `2` == `0` && "Expected even mask size.");
1729
1730	APInt KnownUndef, KnownZero;
1731	computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1732	APInt Zeroable = KnownUndef \| KnownZero;
1733
1734	SDValue Result;
1735	// TODO: Add more comparison patterns.
1736	if (V2.isUndef()) {
1737	if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
1738	return Result;
1739	if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1740	return Result;
1741
1742	// TODO: This comment may be enabled in the future to better match the
1743	// pattern for instruction selection.
1744	/ V2 = V1; /
1745	}
1746
1747	// It is recommended not to change the pattern comparison order for better
1748	// performance.
1749	if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1750	return Result;
1751	if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1752	return Result;
1753	if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1754	return Result;
1755	if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1756	return Result;
1757	if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1758	return Result;
1759	if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1760	return Result;
1761	if ((VT.SimpleTy == MVT::v2i64 \|\| VT.SimpleTy == MVT::v2f64) &&
1762	(Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1763	return Result;
1764	if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1765	Zeroable)))
1766	return Result;
1767	if ((Result =
1768	lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
1769	return Result;
1770	if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
1771	return Result;
1772	if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1773	return NewShuffle;
1774	if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1775	return Result;
1776	return SDValue ();
1777	}
1778
1779	/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1780	///
1781	/// It is a XVREPLVEI when the mask is:
1782	/// <x, x, x, ..., x+n, x+n, x+n, ...>
1783	/// where the number of x is equal to n and n is half the length of vector.
1784	///
1785	/// When undef's appear in the mask they are treated as if they were whatever
1786	/// value is necessary in order to fit the above form.
1787	static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
1788	ArrayRef<int> Mask, MVT VT,
1789	SDValue V1, SDValue V2,
1790	SelectionDAG &DAG) {
1791	int SplatIndex = -`1`;
1792	for (const auto &M : Mask) {
1793	if (M != -`1`) {
1794	SplatIndex = M;
1795	break;
1796	}
1797	}
1798
1799	if (SplatIndex == -`1`)
1800	return DAG.getUNDEF(VT);
1801
1802	const auto &Begin = Mask.begin();
1803	const auto &End = Mask.end();
1804	unsigned HalfSize = Mask.size() / `2`;
1805
1806	assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1807	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: `0`) &&
1808	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `1`, End, ExpectedIndex: SplatIndex + HalfSize,
1809	ExpectedIndexStride: `0`)) {
1810	APInt Imm(`64`, SplatIndex);
1811	return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1812	N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1813	}
1814
1815	return SDValue ();
1816	}
1817
1818	/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1819	static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1820	MVT VT, SDValue V1, SDValue V2,
1821	SelectionDAG &DAG) {
1822	// When the size is less than or equal to 4, lower cost instructions may be
1823	// used.
1824	if (Mask.size() <= `4`)
1825	return SDValue ();
1826	return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1827	}
1828
1829	/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1830	static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1831	MVT VT, SDValue V1, SDValue V2,
1832	SelectionDAG &DAG) {
1833	return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1834	}
1835
1836	/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1837	static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1838	MVT VT, SDValue V1, SDValue V2,
1839	SelectionDAG &DAG) {
1840	return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1841	}
1842
1843	/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1844	static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
1845	MVT VT, SDValue V1, SDValue V2,
1846	SelectionDAG &DAG) {
1847
1848	const auto &Begin = Mask.begin();
1849	const auto &End = Mask.end();
1850	unsigned HalfSize = Mask.size() / `2`;
1851	unsigned LeftSize = HalfSize / `2`;
1852	SDValue OriV1 = V1, OriV2 = V2;
1853
1854	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
1855	ExpectedIndexStride: `1`) &&
1856	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: `1`))
1857	V1 = OriV1;
1858	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize,
1859	ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: `1`) &&
1860	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End,
1861	ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: `1`))
1862	V1 = OriV2;
1863	else
1864	return SDValue ();
1865
1866	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
1867	ExpectedIndexStride: `1`) &&
1868	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize + LeftSize,
1869	ExpectedIndexStride: `1`))
1870	V2 = OriV1;
1871	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize,
1872	ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: `1`) &&
1873	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End,
1874	ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: `1`))
1875	V2 = OriV2;
1876	else
1877	return SDValue ();
1878
1879	return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
1880	}
1881
1882	/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1883	static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
1884	MVT VT, SDValue V1, SDValue V2,
1885	SelectionDAG &DAG) {
1886
1887	const auto &Begin = Mask.begin();
1888	const auto &End = Mask.end();
1889	unsigned HalfSize = Mask.size() / `2`;
1890	SDValue OriV1 = V1, OriV2 = V2;
1891
1892	if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: `0`, ExpectedIndexStride: `1`) &&
1893	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
1894	V1 = OriV1;
1895	else if (fitsRegularPattern<int>(Begin, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: `1`) &&
1896	fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: `2`, End,
1897	ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
1898	V1 = OriV2;
1899	else
1900	return SDValue ();
1901
1902	if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: `0`, ExpectedIndexStride: `1`) &&
1903	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `1`))
1904	V2 = OriV1;
1905	else if (fitsRegularPattern<int>(Begin: Begin + `1`, CheckStride: `2`, End: End - HalfSize, ExpectedIndex: Mask.size(),
1906	ExpectedIndexStride: `1`) &&
1907	fitsRegularPattern<int>(Begin: Begin + `1` + HalfSize, CheckStride: `2`, End,
1908	ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `1`))
1909	V2 = OriV2;
1910	else
1911	return SDValue ();
1912
1913	return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
1914	}
1915
1916	/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1917	static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1918	MVT VT, SDValue V1, SDValue V2,
1919	SelectionDAG &DAG) {
1920
1921	const auto &Begin = Mask.begin();
1922	const auto &LeftMid = Mask.begin() + Mask.size() / `4`;
1923	const auto &Mid = Mask.begin() + Mask.size() / `2`;
1924	const auto &RightMid = Mask.end() - Mask.size() / `4`;
1925	const auto &End = Mask.end();
1926	unsigned HalfSize = Mask.size() / `2`;
1927	SDValue OriV1 = V1, OriV2 = V2;
1928
1929	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: `0`, ExpectedIndexStride: `2`) &&
1930	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: `2`))
1931	V1 = OriV1;
1932	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`) &&
1933	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `2`))
1934	V1 = OriV2;
1935	else
1936	return SDValue ();
1937
1938	if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: `0`, ExpectedIndexStride: `2`) &&
1939	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: HalfSize, ExpectedIndexStride: `2`))
1940	V2 = OriV1;
1941	else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: `2`) &&
1942	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: `2`))
1943	V2 = OriV2;
1944
1945	else
1946	return SDValue ();
1947
1948	return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
1949	}
1950
1951	/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1952	static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1953	MVT VT, SDValue V1, SDValue V2,
1954	SelectionDAG &DAG) {
1955
1956	const auto &Begin = Mask.begin();
1957	const auto &LeftMid = Mask.begin() + Mask.size() / `4`;
1958	const auto &Mid = Mask.begin() + Mask.size() / `2`;
1959	const auto &RightMid = Mask.end() - Mask.size() / `4`;
1960	const auto &End = Mask.end();
1961	unsigned HalfSize = Mask.size() / `2`;
1962	SDValue OriV1 = V1, OriV2 = V2;
1963
1964	if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: `1`, ExpectedIndexStride: `2`) &&
1965	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: HalfSize + `1`, ExpectedIndexStride: `2`))
1966	V1 = OriV1;
1967	else if (fitsRegularPattern<int>(Begin, CheckStride: `1`, End: LeftMid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`) &&
1968	fitsRegularPattern<int>(Begin: Mid, CheckStride: `1`, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + `1`,
1969	ExpectedIndexStride: `2`))
1970	V1 = OriV2;
1971	else
1972	return SDValue ();
1973
1974	if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: `1`, ExpectedIndexStride: `2`) &&
1975	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: HalfSize + `1`, ExpectedIndexStride: `2`))
1976	V2 = OriV1;
1977	else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: `1`, End: Mid, ExpectedIndex: Mask.size() + `1`, ExpectedIndexStride: `2`) &&
1978	fitsRegularPattern<int>(Begin: RightMid, CheckStride: `1`, End, ExpectedIndex: Mask.size() + HalfSize + `1`,
1979	ExpectedIndexStride: `2`))
1980	V2 = OriV2;
1981	else
1982	return SDValue ();
1983
1984	return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
1985	}
1986
1987	/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1988	static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1989	MVT VT, SDValue V1, SDValue V2,
1990	SelectionDAG &DAG) {
1991
1992	int MaskSize = Mask.size();
1993	int HalfSize = Mask.size() / `2`;
1994	const auto &Begin = Mask.begin();
1995	const auto &Mid = Mask.begin() + HalfSize;
1996	const auto &End = Mask.end();
1997
1998	// VECTOR_SHUFFLE concatenates the vectors:
1999	// <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2000	// shuffling ->
2001	// <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2002	//
2003	// XVSHUF concatenates the vectors:
2004	// <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2005	// shuffling ->
2006	// <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2007	SmallVector<SDValue, `8`> MaskAlloc;
2008	for (auto it = Begin; it < Mid; it++) {
2009	if (it < `0`) // UNDEF*
2010	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i64));
2011	else if ((it >= `0` && it < HalfSize) \|\|
2012	(it >= MaskSize && it < MaskSize + HalfSize)) {
2013	int M = it < HalfSize ? it : *it - HalfSize;
2014	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2015	} else
2016	return SDValue ();
2017	}
2018	assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2019
2020	for (auto it = Mid; it < End; it++) {
2021	if (it < `0`) // UNDEF*
2022	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: `0`, DL, VT: MVT::i64));
2023	else if ((it >= HalfSize && it < MaskSize) \|\|
2024	(it >= MaskSize + HalfSize && it < MaskSize * `2`)) {
2025	int M = it < MaskSize ? it - HalfSize : *it - MaskSize;
2026	MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2027	} else
2028	return SDValue ();
2029	}
2030	assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2031
2032	EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2033	SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
2034	return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2035	}
2036
2037	/// Shuffle vectors by lane to generate more optimized instructions.
2038	/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2039	///
2040	/// Therefore, except for the following four cases, other cases are regarded
2041	/// as cross-lane shuffles, where optimization is relatively limited.
2042	///
2043	/// - Shuffle high, low lanes of two inputs vector
2044	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2045	/// - Shuffle low, high lanes of two inputs vector
2046	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2047	/// - Shuffle low, low lanes of two inputs vector
2048	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2049	/// - Shuffle high, high lanes of two inputs vector
2050	/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2051	///
2052	/// The first case is the closest to LoongArch instructions and the other
2053	/// cases need to be converted to it for processing.
2054	///
2055	/// This function may modify V1, V2 and Mask
2056	static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
2057	MutableArrayRef<int> Mask, MVT VT,
2058	SDValue &V1, SDValue &V2,
2059	SelectionDAG &DAG) {
2060
2061	enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2062
2063	int MaskSize = Mask.size();
2064	int HalfSize = Mask.size() / `2`;
2065
2066	HalfMaskType preMask = None, postMask = None;
2067
2068	if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2069	return M < `0` \|\| (M >= `0` && M < HalfSize) \|\|
2070	(M >= MaskSize && M < MaskSize + HalfSize);
2071	}))
2072	preMask = HighLaneTy;
2073	else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2074	return M < `0` \|\| (M >= HalfSize && M < MaskSize) \|\|
2075	(M >= MaskSize + HalfSize && M < MaskSize * `2`);
2076	}))
2077	preMask = LowLaneTy;
2078
2079	if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2080	return M < `0` \|\| (M >= `0` && M < HalfSize) \|\|
2081	(M >= MaskSize && M < MaskSize + HalfSize);
2082	}))
2083	postMask = HighLaneTy;
2084	else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2085	return M < `0` \|\| (M >= HalfSize && M < MaskSize) \|\|
2086	(M >= MaskSize + HalfSize && M < MaskSize * `2`);
2087	}))
2088	postMask = LowLaneTy;
2089
2090	// The pre-half of mask is high lane type, and the post-half of mask
2091	// is low lane type, which is closest to the LoongArch instructions.
2092	//
2093	// Note: In the LoongArch architecture, the high lane of mask corresponds
2094	// to the lower 128-bit of vector register, and the low lane of mask
2095	// corresponds the higher 128-bit of vector register.
2096	if (preMask == HighLaneTy && postMask == LowLaneTy) {
2097	return;
2098	}
2099	if (preMask == LowLaneTy && postMask == HighLaneTy) {
2100	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2101	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2102	N2: DAG.getConstant(Val: `0b01001110`, DL, VT: MVT::i64));
2103	V1 = DAG.getBitcast(VT, V: V1);
2104
2105	if (!V2.isUndef()) {
2106	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2107	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2108	N2: DAG.getConstant(Val: `0b01001110`, DL, VT: MVT::i64));
2109	V2 = DAG.getBitcast(VT, V: V2);
2110	}
2111
2112	for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2113	it = it < `0` ? it : it - HalfSize;
2114	}
2115	for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2116	it = it < `0` ? it : it + HalfSize;
2117	}
2118	} else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2119	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2120	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2121	N2: DAG.getConstant(Val: `0b11101110`, DL, VT: MVT::i64));
2122	V1 = DAG.getBitcast(VT, V: V1);
2123
2124	if (!V2.isUndef()) {
2125	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2126	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2127	N2: DAG.getConstant(Val: `0b11101110`, DL, VT: MVT::i64));
2128	V2 = DAG.getBitcast(VT, V: V2);
2129	}
2130
2131	for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2132	it = it < `0` ? it : it - HalfSize;
2133	}
2134	} else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2135	V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2136	V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2137	N2: DAG.getConstant(Val: `0b01000100`, DL, VT: MVT::i64));
2138	V1 = DAG.getBitcast(VT, V: V1);
2139
2140	if (!V2.isUndef()) {
2141	V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2142	V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2143	N2: DAG.getConstant(Val: `0b01000100`, DL, VT: MVT::i64));
2144	V2 = DAG.getBitcast(VT, V: V2);
2145	}
2146
2147	for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2148	it = it < `0` ? it : it + HalfSize;
2149	}
2150	} else { // cross-lane
2151	return;
2152	}
2153	}
2154
2155	/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2156	/// Only for 256-bit vector.
2157	///
2158	/// For example:
2159	/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2160	/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2161	/// is lowerded to:
2162	/// (XVPERMI $xr2, $xr0, 78)
2163	/// (XVSHUF $xr1, $xr2, $xr0)
2164	/// (XVORI $xr0, $xr1, 0)
2165	static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2166	ArrayRef<int> Mask,
2167	MVT VT, SDValue V1,
2168	SDValue V2,
2169	SelectionDAG &DAG) {
2170	assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2171	int Size = Mask.size();
2172	int LaneSize = Size / `2`;
2173
2174	bool LaneCrossing[`2`] = {false, false};
2175	for (int i = `0`; i < Size; ++i)
2176	if (Mask [i] >= `0` && ((Mask [i] % Size) / LaneSize) != (i / LaneSize))
2177	LaneCrossing[(Mask [i] % Size) / LaneSize] = true;
2178
2179	// Ensure that all lanes ared involved.
2180	if (!LaneCrossing[`0`] && !LaneCrossing[`1`])
2181	return SDValue ();
2182
2183	SmallVector<int> InLaneMask;
2184	InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end());
2185	for (int i = `0`; i < Size; ++i) {
2186	int &M = InLaneMask [i];
2187	if (M < `0`)
2188	continue;
2189	if (((M % Size) / LaneSize) != (i / LaneSize))
2190	M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2191	}
2192
2193	SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2194	Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped,
2195	N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {`2`, `3`, `0`, `1`});
2196	Flipped = DAG.getBitcast(VT, V: Flipped);
2197	return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask);
2198	}
2199
2200	/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2201	///
2202	/// This routine breaks down the specific type of 256-bit shuffle and
2203	/// dispatches to the lowering routines accordingly.
2204	static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2205	SDValue V1, SDValue V2, SelectionDAG &DAG) {
2206	assert((VT.SimpleTy == MVT::v32i8 \|\| VT.SimpleTy == MVT::v16i16 \|\|
2207	VT.SimpleTy == MVT::v8i32 \|\| VT.SimpleTy == MVT::v4i64 \|\|
2208	VT.SimpleTy == MVT::v8f32 \|\| VT.SimpleTy == MVT::v4f64) &&
2209	"Vector type is unsupported for lasx!");
2210	assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2211	"Two operands have different types!");
2212	assert(VT.getVectorNumElements() == Mask.size() &&
2213	"Unexpected mask size for shuffle!");
2214	assert(Mask.size() % `2` == `0` && "Expected even mask size.");
2215	assert(Mask.size() >= `4` && "Mask size is less than 4.");
2216
2217	// canonicalize non cross-lane shuffle vector
2218	SmallVector<int> NewMask(Mask);
2219	canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG);
2220
2221	APInt KnownUndef, KnownZero;
2222	computeZeroableShuffleElements(Mask: NewMask, V1, V2, KnownUndef, KnownZero);
2223	APInt Zeroable = KnownUndef \| KnownZero;
2224
2225	SDValue Result;
2226	// TODO: Add more comparison patterns.
2227	if (V2.isUndef()) {
2228	if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG)))
2229	return Result;
2230	if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG)))
2231	return Result;
2232	if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT,
2233	V1, V2, DAG)))
2234	return Result;
2235
2236	// TODO: This comment may be enabled in the future to better match the
2237	// pattern for instruction selection.
2238	/ V2 = V1; /
2239	}
2240
2241	// It is recommended not to change the pattern comparison order for better
2242	// performance.
2243	if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
2244	return Result;
2245	if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
2246	return Result;
2247	if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG)))
2248	return Result;
2249	if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG)))
2250	return Result;
2251	if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
2252	return Result;
2253	if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
2254	return Result;
2255	if ((Result =
2256	lowerVECTOR_SHUFFLEAsShift(DL, Mask: NewMask, VT, V1, V2, DAG, Zeroable)))
2257	return Result;
2258	if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask: NewMask, VT, V1, V2, DAG)))
2259	return Result;
2260	if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG))
2261	return NewShuffle;
2262	if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
2263	return Result;
2264
2265	return SDValue ();
2266	}
2267
2268	SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2269	SelectionDAG &DAG) const {
2270	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
2271	ArrayRef<int> OrigMask = SVOp->getMask();
2272	SDValue V1 = Op.getOperand(i: `0`);
2273	SDValue V2 = Op.getOperand(i: `1`);
2274	MVT VT = Op.getSimpleValueType();
2275	int NumElements = VT.getVectorNumElements();
2276	SDLoc DL(Op);
2277
2278	bool V1IsUndef = V1.isUndef();
2279	bool V2IsUndef = V2.isUndef();
2280	if (V1IsUndef && V2IsUndef)
2281	return DAG.getUNDEF(VT);
2282
2283	// When we create a shuffle node we put the UNDEF node to second operand,
2284	// but in some cases the first operand may be transformed to UNDEF.
2285	// In this case we should just commute the node.
2286	if (V1IsUndef)
2287	return DAG.getCommutedVectorShuffle(SV: *SVOp);
2288
2289	// Check for non-undef masks pointing at an undef vector and make the masks
2290	// undef as well. This makes it easier to match the shuffle based solely on
2291	// the mask.
2292	if (V2IsUndef &&
2293	any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
2294	SmallVector<int, `8`> NewMask(OrigMask);
2295	for (int &M : NewMask)
2296	if (M >= NumElements)
2297	M = -`1`;
2298	return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
2299	}
2300
2301	// Check for illegal shuffle mask element index values.
2302	int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? `1` : `2`);
2303	(void)MaskUpperLimit;
2304	assert(llvm::all_of(OrigMask,
2305	[&](int M) { return -`1` <= M && M < MaskUpperLimit; }) &&
2306	"Out of bounds shuffle index");
2307
2308	// For each vector width, delegate to a specialized lowering routine.
2309	if (VT.is128BitVector())
2310	return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
2311
2312	if (VT.is256BitVector())
2313	return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
2314
2315	return SDValue ();
2316	}
2317
2318	SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2319	SelectionDAG &DAG) const {
2320	// Custom lower to ensure the libcall return is passed in an FPR on hard
2321	// float ABIs.
2322	SDLoc DL(Op);
2323	MakeLibCallOptions CallOptions;
2324	SDValue Op0 = Op.getOperand(i: `0`);
2325	SDValue Chain = SDValue ();
2326	RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
2327	SDValue Res;
2328	std::tie(args&: Res, args&: Chain) =
2329	makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
2330	if (Subtarget.is64Bit())
2331	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2332	return DAG.getBitcast(VT: MVT::i32, V: Res);
2333	}
2334
2335	SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2336	SelectionDAG &DAG) const {
2337	// Custom lower to ensure the libcall argument is passed in an FPR on hard
2338	// float ABIs.
2339	SDLoc DL(Op);
2340	MakeLibCallOptions CallOptions;
2341	SDValue Op0 = Op.getOperand(i: `0`);
2342	SDValue Chain = SDValue ();
2343	SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2344	DL, VT: MVT::f32, Operand: Op0)
2345	: DAG.getBitcast(VT: MVT::f32, V: Op0);
2346	SDValue Res;
2347	std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
2348	CallOptions, dl: DL, Chain);
2349	return Res;
2350	}
2351
2352	SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2353	SelectionDAG &DAG) const {
2354	assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2355	SDLoc DL(Op);
2356	MakeLibCallOptions CallOptions;
2357	RTLIB::Libcall LC =
2358	RTLIB::getFPROUND(OpVT: Op.getOperand(i: `0`).getValueType(), RetVT: MVT::bf16);
2359	SDValue Res =
2360	makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: `0`), CallOptions, dl: DL).first;
2361	if (Subtarget.is64Bit())
2362	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2363	return DAG.getBitcast(VT: MVT::i32, V: Res);
2364	}
2365
2366	SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2367	SelectionDAG &DAG) const {
2368	assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2369	MVT VT = Op.getSimpleValueType();
2370	SDLoc DL(Op);
2371	Op = DAG.getNode(
2372	Opcode: ISD::SHL, DL, VT: Op.getOperand(i: `0`).getValueType(), N1: Op.getOperand(i: `0`),
2373	N2: DAG.getShiftAmountConstant(Val: `16`, VT: Op.getOperand(i: `0`).getValueType(), DL));
2374	SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2375	DL, VT: MVT::f32, Operand: Op)
2376	: DAG.getBitcast(VT: MVT::f32, V: Op);
2377	if (VT != MVT::f32)
2378	return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
2379	return Res;
2380	}
2381
2382	static bool isConstantOrUndef(const SDValue Op) {
2383	if (Op ->isUndef())
2384	return true;
2385	if (isa<ConstantSDNode>(Val: Op))
2386	return true;
2387	if (isa<ConstantFPSDNode>(Val: Op))
2388	return true;
2389	return false;
2390	}
2391
2392	static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2393	for (unsigned i = `0`; i < Op->getNumOperands(); ++i)
2394	if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
2395	return true;
2396	return false;
2397	}
2398
2399	// Lower BUILD_VECTOR as broadcast load (if possible).
2400	// For example:
2401	// %a = load i8, ptr %ptr
2402	// %b = build_vector %a, %a, %a, %a
2403	// is lowered to :
2404	// (VLDREPL_B $a0, 0)
2405	static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
2406	const SDLoc &DL,
2407	SelectionDAG &DAG) {
2408	MVT VT = BVOp->getSimpleValueType(ResNo: `0`);
2409	int NumOps = BVOp->getNumOperands();
2410
2411	assert((VT.is128BitVector() \|\| VT.is256BitVector()) &&
2412	"Unsupported vector type for broadcast.");
2413
2414	SDValue IdentitySrc;
2415	bool IsIdeneity = true;
2416
2417	for (int i = `0`; i != NumOps; i++) {
2418	SDValue Op = BVOp->getOperand(Num: i);
2419	if (Op.getOpcode() != ISD::LOAD \|\| (IdentitySrc && Op != IdentitySrc)) {
2420	IsIdeneity = false;
2421	break;
2422	}
2423	IdentitySrc = BVOp->getOperand(Num: `0`);
2424	}
2425
2426	// make sure that this load is valid and only has one user.
2427	if (!IdentitySrc \|\| !BVOp->isOnlyUserOf(N: IdentitySrc.getNode()))
2428	return SDValue ();
2429
2430	if (IsIdeneity) {
2431	auto *LN = cast<LoadSDNode>(Val&: IdentitySrc);
2432	SDVTList Tys =
2433	LN->isIndexed()
2434	? DAG.getVTList(VT1: VT, VT2: LN->getBasePtr().getValueType(), VT3: MVT::Other)
2435	: DAG.getVTList(VT1: VT, VT2: MVT::Other);
2436	SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2437	SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops);
2438	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LN, `1`), To: BCast.getValue(R: `1`));
2439	return BCast;
2440	}
2441	return SDValue ();
2442	}
2443
2444	SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2445	SelectionDAG &DAG) const {
2446	BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
2447	EVT ResTy = Op ->getValueType(ResNo: `0`);
2448	SDLoc DL(Op);
2449	APInt SplatValue, SplatUndef;
2450	unsigned SplatBitSize;
2451	bool HasAnyUndefs;
2452	bool Is128Vec = ResTy.is128BitVector();
2453	bool Is256Vec = ResTy.is256BitVector();
2454
2455	if ((!Subtarget.hasExtLSX() \|\| !Is128Vec) &&
2456	(!Subtarget.hasExtLASX() \|\| !Is256Vec))
2457	return SDValue ();
2458
2459	if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG))
2460	return Result;
2461
2462	if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2463	/MinSplatBits=/`8`) &&
2464	SplatBitSize <= `64`) {
2465	// We can only cope with 8, 16, 32, or 64-bit elements.
2466	if (SplatBitSize != `8` && SplatBitSize != `16` && SplatBitSize != `32` &&
2467	SplatBitSize != `64`)
2468	return SDValue ();
2469
2470	EVT ViaVecTy;
2471
2472	switch (SplatBitSize) {
2473	default:
2474	return SDValue ();
2475	case `8`:
2476	ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2477	break;
2478	case `16`:
2479	ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2480	break;
2481	case `32`:
2482	ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2483	break;
2484	case `64`:
2485	ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2486	break;
2487	}
2488
2489	// SelectionDAG::getConstant will promote SplatValue appropriately.
2490	SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
2491
2492	// Bitcast to the type we originally wanted.
2493	if (ViaVecTy != ResTy)
2494	Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc (Node), VT: ResTy, Operand: Result);
2495
2496	return Result;
2497	}
2498
2499	if (DAG.isSplatValue(V: Op, /AllowUndefs=/false))
2500	return Op;
2501
2502	if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
2503	// Use INSERT_VECTOR_ELT operations rather than expand to stores.
2504	// The resulting code is the same length as the expansion, but it doesn't
2505	// use memory operations.
2506	EVT ResTy = Node->getValueType(ResNo: `0`);
2507
2508	assert(ResTy.isVector());
2509
2510	unsigned NumElts = ResTy.getVectorNumElements();
2511	SDValue Vector = DAG.getUNDEF(VT: ResTy);
2512	for (unsigned i = `0`; i < NumElts; ++i) {
2513	Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
2514	N2: Node->getOperand(Num: i),
2515	N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
2516	}
2517	return Vector;
2518	}
2519
2520	return SDValue ();
2521	}
2522
2523	SDValue
2524	LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2525	SelectionDAG &DAG) const {
2526	EVT VecTy = Op ->getOperand(Num: `0`)->getValueType(ResNo: `0`);
2527	SDValue Idx = Op ->getOperand(Num: `1`);
2528	EVT EltTy = VecTy.getVectorElementType();
2529	unsigned NumElts = VecTy.getVectorNumElements();
2530
2531	if (isa<ConstantSDNode>(Val: Idx) &&
2532	(EltTy == MVT::i32 \|\| EltTy == MVT::i64 \|\| EltTy == MVT::f32 \|\|
2533	EltTy == MVT::f64 \|\| Idx ->getAsZExtVal() < NumElts / `2`))
2534	return Op;
2535
2536	return SDValue ();
2537	}
2538
2539	SDValue
2540	LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2541	SelectionDAG &DAG) const {
2542	if (isa<ConstantSDNode>(Val: Op ->getOperand(Num: `2`)))
2543	return Op;
2544	return SDValue ();
2545	}
2546
2547	SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2548	SelectionDAG &DAG) const {
2549	SDLoc DL(Op);
2550	SyncScope::ID FenceSSID =
2551	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
2552
2553	// singlethread fences only synchronize with signal handlers on the same
2554	// thread and thus only need to preserve instruction order, not actually
2555	// enforce memory ordering.
2556	if (FenceSSID == SyncScope::SingleThread)
2557	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
2558	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
2559
2560	return Op;
2561	}
2562
2563	SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2564	SelectionDAG &DAG) const {
2565
2566	if (Subtarget.is64Bit() && Op.getOperand(i: `2`).getValueType() == MVT::i32) {
2567	DAG.getContext()->emitError(
2568	ErrorStr: "On LA64, only 64-bit registers can be written.");
2569	return Op.getOperand(i: `0`);
2570	}
2571
2572	if (!Subtarget.is64Bit() && Op.getOperand(i: `2`).getValueType() == MVT::i64) {
2573	DAG.getContext()->emitError(
2574	ErrorStr: "On LA32, only 32-bit registers can be written.");
2575	return Op.getOperand(i: `0`);
2576	}
2577
2578	return Op;
2579	}
2580
2581	SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2582	SelectionDAG &DAG) const {
2583	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
2584	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
2585	"be a constant integer");
2586	return SDValue ();
2587	}
2588
2589	MachineFunction &MF = DAG.getMachineFunction();
2590	MF.getFrameInfo().setFrameAddressIsTaken(true);
2591	Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2592	EVT VT = Op.getValueType();
2593	SDLoc DL(Op);
2594	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
2595	unsigned Depth = Op.getConstantOperandVal(i: `0`);
2596	int GRLenInBytes = Subtarget.getGRLen() / `8`;
2597
2598	while (Depth--) {
2599	int Offset = -(GRLenInBytes * `2`);
2600	SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
2601	N2: DAG.getSignedConstant(Val: Offset, DL, VT));
2602	FrameAddr =
2603	DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo ());
2604	}
2605	return FrameAddr;
2606	}
2607
2608	SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2609	SelectionDAG &DAG) const {
2610	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
2611	return SDValue ();
2612
2613	// Currently only support lowering return address for current frame.
2614	if (Op.getConstantOperandVal(i: `0`) != `0`) {
2615	DAG.getContext()->emitError(
2616	ErrorStr: "return address can only be determined for the current frame");
2617	return SDValue ();
2618	}
2619
2620	MachineFunction &MF = DAG.getMachineFunction();
2621	MF.getFrameInfo().setReturnAddressIsTaken(true);
2622	MVT GRLenVT = Subtarget.getGRLenVT();
2623
2624	// Return the value of the return address register, marking it an implicit
2625	// live-in.
2626	Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
2627	RC: getRegClassFor(VT: GRLenVT));
2628	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc (Op), Reg, VT: GRLenVT);
2629	}
2630
2631	SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
2632	SelectionDAG &DAG) const {
2633	MachineFunction &MF = DAG.getMachineFunction();
2634	auto Size = Subtarget.getGRLen() / `8`;
2635	auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: `0`, IsImmutable: false);
2636	return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2637	}
2638
2639	SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
2640	SelectionDAG &DAG) const {
2641	MachineFunction &MF = DAG.getMachineFunction();
2642	auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
2643
2644	SDLoc DL(Op);
2645	SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
2646	VT: getPointerTy(DL: MF.getDataLayout()));
2647
2648	// vastart just stores the address of the VarArgsFrameIndex slot into the
2649	// memory location argument.
2650	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
2651	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: FI, Ptr: Op.getOperand(i: `1`),
2652	PtrInfo: MachinePointerInfo (SV));
2653	}
2654
2655	SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
2656	SelectionDAG &DAG) const {
2657	assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2658	!Subtarget.hasBasicD() && "unexpected target features");
2659
2660	SDLoc DL(Op);
2661	SDValue Op0 = Op.getOperand(i: `0`);
2662	if (Op0 ->getOpcode() == ISD::AND) {
2663	auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: `1`));
2664	if (C && C->getZExtValue() < UINT64_C(`0xFFFFFFFF`))
2665	return Op;
2666	}
2667
2668	if (Op0 ->getOpcode() == LoongArchISD::BSTRPICK &&
2669	Op0.getConstantOperandVal(i: `1`) < UINT64_C(`0X1F`) &&
2670	Op0.getConstantOperandVal(i: `2`) == UINT64_C(`0`))
2671	return Op;
2672
2673	if (Op0.getOpcode() == ISD::AssertZext &&
2674	dyn_cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT().bitsLT(VT: MVT::i32))
2675	return Op;
2676
2677	EVT OpVT = Op0.getValueType();
2678	EVT RetVT = Op.getValueType();
2679	RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
2680	MakeLibCallOptions CallOptions;
2681	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
2682	SDValue Chain = SDValue ();
2683	SDValue Result;
2684	std::tie(args&: Result, args&: Chain) =
2685	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
2686	return Result;
2687	}
2688
2689	SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
2690	SelectionDAG &DAG) const {
2691	assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2692	!Subtarget.hasBasicD() && "unexpected target features");
2693
2694	SDLoc DL(Op);
2695	SDValue Op0 = Op.getOperand(i: `0`);
2696
2697	if ((Op0.getOpcode() == ISD::AssertSext \|\|
2698	Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
2699	dyn_cast<VTSDNode>(Val: Op0.getOperand(i: `1`))->getVT().bitsLE(VT: MVT::i32))
2700	return Op;
2701
2702	EVT OpVT = Op0.getValueType();
2703	EVT RetVT = Op.getValueType();
2704	RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
2705	MakeLibCallOptions CallOptions;
2706	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
2707	SDValue Chain = SDValue ();
2708	SDValue Result;
2709	std::tie(args&: Result, args&: Chain) =
2710	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
2711	return Result;
2712	}
2713
2714	SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
2715	SelectionDAG &DAG) const {
2716
2717	SDLoc DL(Op);
2718	EVT VT = Op.getValueType();
2719	SDValue Op0 = Op.getOperand(i: `0`);
2720	EVT Op0VT = Op0.getValueType();
2721
2722	if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
2723	Subtarget.is64Bit() && Subtarget.hasBasicF()) {
2724	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
2725	return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
2726	}
2727	if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
2728	SDValue Lo, Hi;
2729	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
2730	return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
2731	}
2732	return Op;
2733	}
2734
2735	SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
2736	SelectionDAG &DAG) const {
2737
2738	SDLoc DL(Op);
2739	SDValue Op0 = Op.getOperand(i: `0`);
2740
2741	if (Op0.getValueType() == MVT::f16)
2742	Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
2743
2744	if (Op.getValueSizeInBits() > `32` && Subtarget.hasBasicF() &&
2745	!Subtarget.hasBasicD()) {
2746	SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0);
2747	return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
2748	}
2749
2750	EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
2751	SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0);
2752	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
2753	}
2754
2755	static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2756	SelectionDAG &DAG, unsigned Flags) {
2757	return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: Flags);
2758	}
2759
2760	static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2761	SelectionDAG &DAG, unsigned Flags) {
2762	return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
2763	TargetFlags: Flags);
2764	}
2765
2766	static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2767	SelectionDAG &DAG, unsigned Flags) {
2768	return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
2769	Offset: N->getOffset(), TargetFlags: Flags);
2770	}
2771
2772	static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2773	SelectionDAG &DAG, unsigned Flags) {
2774	return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
2775	}
2776
2777	template <class NodeTy>
2778	SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2779	CodeModel::Model M,
2780	bool IsLocal) const {
2781	SDLoc DL(N);
2782	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2783	SDValue Addr = getTargetNode(N, DL, Ty, DAG, `0`);
2784	SDValue Load;
2785
2786	switch (M) {
2787	default:
2788	report_fatal_error(reason: "Unsupported code model");
2789
2790	case CodeModel::Large: {
2791	assert(Subtarget.is64Bit() && "Large code model requires LA64");
2792
2793	// This is not actually used, but is necessary for successfully matching
2794	// the PseudoLA__LARGE nodes.*
2795	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
2796	if (IsLocal) {
2797	// This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
2798	// eventually becomes the desired 5-insn code sequence.
2799	Load = SDValue (DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
2800	Op1: Tmp, Op2: Addr),
2801	`0`);
2802	} else {
2803	// This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
2804	// eventually becomes the desired 5-insn code sequence.
2805	Load = SDValue (
2806	DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
2807	`0`);
2808	}
2809	break;
2810	}
2811
2812	case CodeModel::Small:
2813	case CodeModel::Medium:
2814	if (IsLocal) {
2815	// This generates the pattern (PseudoLA_PCREL sym), which expands to
2816	// (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
2817	Load = SDValue (
2818	DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), `0`);
2819	} else {
2820	// This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
2821	// (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
2822	Load =
2823	SDValue (DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), `0`);
2824	}
2825	}
2826
2827	if (!IsLocal) {
2828	// Mark the load instruction as invariant to enable hoisting in MachineLICM.
2829	MachineFunction &MF = DAG.getMachineFunction();
2830	MachineMemOperand *MemOp = MF.getMachineMemOperand(
2831	PtrInfo: MachinePointerInfo::getGOT(MF),
2832	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
2833	MachineMemOperand::MOInvariant,
2834	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
2835	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
2836	}
2837
2838	return Load;
2839	}
2840
2841	SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
2842	SelectionDAG &DAG) const {
2843	return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
2844	M: DAG.getTarget().getCodeModel());
2845	}
2846
2847	SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
2848	SelectionDAG &DAG) const {
2849	return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
2850	M: DAG.getTarget().getCodeModel());
2851	}
2852
2853	SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
2854	SelectionDAG &DAG) const {
2855	return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
2856	M: DAG.getTarget().getCodeModel());
2857	}
2858
2859	SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
2860	SelectionDAG &DAG) const {
2861	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
2862	assert(N->getOffset() == `0` && "unexpected offset in global node");
2863	auto CM = DAG.getTarget().getCodeModel();
2864	const GlobalValue *GV = N->getGlobal();
2865
2866	if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
2867	if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
2868	CM = *GCM;
2869	}
2870
2871	return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
2872	}
2873
2874	SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2875	SelectionDAG &DAG,
2876	unsigned Opc, bool UseGOT,
2877	bool Large) const {
2878	SDLoc DL(N);
2879	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2880	MVT GRLenVT = Subtarget.getGRLenVT();
2881
2882	// This is not actually used, but is necessary for successfully matching the
2883	// PseudoLA__LARGE nodes.*
2884	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
2885	SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: `0`);
2886
2887	// Only IE needs an extra argument for large code model.
2888	SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
2889	? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
2890	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
2891
2892	// If it is LE for normal/medium code model, the add tp operation will occur
2893	// during the pseudo-instruction expansion.
2894	if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
2895	return Offset;
2896
2897	if (UseGOT) {
2898	// Mark the load instruction as invariant to enable hoisting in MachineLICM.
2899	MachineFunction &MF = DAG.getMachineFunction();
2900	MachineMemOperand *MemOp = MF.getMachineMemOperand(
2901	PtrInfo: MachinePointerInfo::getGOT(MF),
2902	f: MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
2903	MachineMemOperand::MOInvariant,
2904	MemTy: LLT (Ty.getSimpleVT()), base_alignment: Align (Ty.getFixedSizeInBits() / `8`));
2905	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
2906	}
2907
2908	// Add the thread pointer.
2909	return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
2910	N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
2911	}
2912
2913	SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2914	SelectionDAG &DAG,
2915	unsigned Opc,
2916	bool Large) const {
2917	SDLoc DL(N);
2918	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2919	IntegerType CallTy = Type::getIntNTy(C&: DAG.getContext(), N: Ty.getSizeInBits());
2920
2921	// This is not actually used, but is necessary for successfully matching the
2922	// PseudoLA__LARGE nodes.*
2923	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
2924
2925	// Use a PC-relative addressing mode to access the dynamic GOT address.
2926	SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: `0`, TargetFlags: `0`);
2927	SDValue Load = Large ? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
2928	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
2929
2930	// Prepare argument list to generate call.
2931	ArgListTy Args;
2932	ArgListEntry Entry;
2933	Entry.Node = Load;
2934	Entry.Ty = CallTy;
2935	Args.push_back(x: Entry);
2936
2937	// Setup call to __tls_get_addr.
2938	TargetLowering::CallLoweringInfo CLI(DAG);
2939	CLI.setDebugLoc(DL)
2940	.setChain(DAG.getEntryNode())
2941	.setLibCallee(CC: CallingConv::C, ResultType: CallTy,
2942	Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
2943	ArgsList: std::move(Args));
2944
2945	return LowerCallTo(CLI).first;
2946	}
2947
2948	SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
2949	SelectionDAG &DAG, unsigned Opc,
2950	bool Large) const {
2951	SDLoc DL(N);
2952	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2953	const GlobalValue *GV = N->getGlobal();
2954
2955	// This is not actually used, but is necessary for successfully matching the
2956	// PseudoLA__LARGE nodes.*
2957	SDValue Tmp = DAG.getConstant(Val: `0`, DL, VT: Ty);
2958
2959	// Use a PC-relative addressing mode to access the global dynamic GOT address.
2960	// This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
2961	SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: `0`, TargetFlags: `0`);
2962	return Large ? SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), `0`)
2963	: SDValue (DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), `0`);
2964	}
2965
2966	SDValue
2967	LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2968	SelectionDAG &DAG) const {
2969	if (DAG.getMachineFunction().getFunction().getCallingConv() ==
2970	CallingConv::GHC)
2971	report_fatal_error(reason: "In GHC calling convention TLS is not supported");
2972
2973	bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
2974	assert((!Large \|\| Subtarget.is64Bit()) && "Large code model requires LA64");
2975
2976	GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
2977	assert(N->getOffset() == `0` && "unexpected offset in global node");
2978
2979	if (DAG.getTarget().useEmulatedTLS())
2980	reportFatalUsageError(reason: "the emulated TLS is prohibited");
2981
2982	bool IsDesc = DAG.getTarget().useTLSDESC();
2983
2984	switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
2985	case TLSModel::GeneralDynamic:
2986	// In this model, application code calls the dynamic linker function
2987	// __tls_get_addr to locate TLS offsets into the dynamic thread vector at
2988	// runtime.
2989	if (!IsDesc)
2990	return getDynamicTLSAddr(N, DAG,
2991	Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
2992	: LoongArch::PseudoLA_TLS_GD,
2993	Large);
2994	break;
2995	case TLSModel::LocalDynamic:
2996	// Same as GeneralDynamic, except for assembly modifiers and relocation
2997	// records.
2998	if (!IsDesc)
2999	return getDynamicTLSAddr(N, DAG,
3000	Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3001	: LoongArch::PseudoLA_TLS_LD,
3002	Large);
3003	break;
3004	case TLSModel::InitialExec:
3005	// This model uses the GOT to resolve TLS offsets.
3006	return getStaticTLSAddr(N, DAG,
3007	Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3008	: LoongArch::PseudoLA_TLS_IE,
3009	/UseGOT=/true, Large);
3010	case TLSModel::LocalExec:
3011	// This model is used when static linking as the TLS offsets are resolved
3012	// during program linking.
3013	//
3014	// This node doesn't need an extra argument for the large code model.
3015	return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
3016	/UseGOT=/false, Large);
3017	}
3018
3019	return getTLSDescAddr(N, DAG,
3020	Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3021	: LoongArch::PseudoLA_TLS_DESC,
3022	Large);
3023	}
3024
3025	template <unsigned N>
3026	static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3027	SelectionDAG &DAG, bool IsSigned = false) {
3028	auto *CImm = cast<ConstantSDNode>(Val: Op ->getOperand(Num: ImmOp));
3029	// Check the ImmArg.
3030	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
3031	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3032	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) +
3033	": argument out of range.");
3034	return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc (Op), VT: Op.getValueType());
3035	}
3036	return SDValue ();
3037	}
3038
3039	SDValue
3040	LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3041	SelectionDAG &DAG) const {
3042	switch (Op.getConstantOperandVal(i: `0`)) {
3043	default:
3044	return SDValue (); // Don't custom lower most intrinsics.
3045	case Intrinsic::thread_pointer: {
3046	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3047	return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
3048	}
3049	case Intrinsic::loongarch_lsx_vpickve2gr_d:
3050	case Intrinsic::loongarch_lsx_vpickve2gr_du:
3051	case Intrinsic::loongarch_lsx_vreplvei_d:
3052	case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3053	return checkIntrinsicImmArg<`1`>(Op, ImmOp: `2`, DAG);
3054	case Intrinsic::loongarch_lsx_vreplvei_w:
3055	case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3056	case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3057	case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3058	case Intrinsic::loongarch_lasx_xvpickve_d:
3059	case Intrinsic::loongarch_lasx_xvpickve_d_f:
3060	return checkIntrinsicImmArg<`2`>(Op, ImmOp: `2`, DAG);
3061	case Intrinsic::loongarch_lasx_xvinsve0_d:
3062	return checkIntrinsicImmArg<`2`>(Op, ImmOp: `3`, DAG);
3063	case Intrinsic::loongarch_lsx_vsat_b:
3064	case Intrinsic::loongarch_lsx_vsat_bu:
3065	case Intrinsic::loongarch_lsx_vrotri_b:
3066	case Intrinsic::loongarch_lsx_vsllwil_h_b:
3067	case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3068	case Intrinsic::loongarch_lsx_vsrlri_b:
3069	case Intrinsic::loongarch_lsx_vsrari_b:
3070	case Intrinsic::loongarch_lsx_vreplvei_h:
3071	case Intrinsic::loongarch_lasx_xvsat_b:
3072	case Intrinsic::loongarch_lasx_xvsat_bu:
3073	case Intrinsic::loongarch_lasx_xvrotri_b:
3074	case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3075	case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3076	case Intrinsic::loongarch_lasx_xvsrlri_b:
3077	case Intrinsic::loongarch_lasx_xvsrari_b:
3078	case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3079	case Intrinsic::loongarch_lasx_xvpickve_w:
3080	case Intrinsic::loongarch_lasx_xvpickve_w_f:
3081	return checkIntrinsicImmArg<`3`>(Op, ImmOp: `2`, DAG);
3082	case Intrinsic::loongarch_lasx_xvinsve0_w:
3083	return checkIntrinsicImmArg<`3`>(Op, ImmOp: `3`, DAG);
3084	case Intrinsic::loongarch_lsx_vsat_h:
3085	case Intrinsic::loongarch_lsx_vsat_hu:
3086	case Intrinsic::loongarch_lsx_vrotri_h:
3087	case Intrinsic::loongarch_lsx_vsllwil_w_h:
3088	case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3089	case Intrinsic::loongarch_lsx_vsrlri_h:
3090	case Intrinsic::loongarch_lsx_vsrari_h:
3091	case Intrinsic::loongarch_lsx_vreplvei_b:
3092	case Intrinsic::loongarch_lasx_xvsat_h:
3093	case Intrinsic::loongarch_lasx_xvsat_hu:
3094	case Intrinsic::loongarch_lasx_xvrotri_h:
3095	case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3096	case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3097	case Intrinsic::loongarch_lasx_xvsrlri_h:
3098	case Intrinsic::loongarch_lasx_xvsrari_h:
3099	case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3100	return checkIntrinsicImmArg<`4`>(Op, ImmOp: `2`, DAG);
3101	case Intrinsic::loongarch_lsx_vsrlni_b_h:
3102	case Intrinsic::loongarch_lsx_vsrani_b_h:
3103	case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3104	case Intrinsic::loongarch_lsx_vsrarni_b_h:
3105	case Intrinsic::loongarch_lsx_vssrlni_b_h:
3106	case Intrinsic::loongarch_lsx_vssrani_b_h:
3107	case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3108	case Intrinsic::loongarch_lsx_vssrani_bu_h:
3109	case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3110	case Intrinsic::loongarch_lsx_vssrarni_b_h:
3111	case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3112	case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3113	case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3114	case Intrinsic::loongarch_lasx_xvsrani_b_h:
3115	case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3116	case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3117	case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3118	case Intrinsic::loongarch_lasx_xvssrani_b_h:
3119	case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3120	case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3121	case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3122	case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3123	case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3124	case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3125	return checkIntrinsicImmArg<`4`>(Op, ImmOp: `3`, DAG);
3126	case Intrinsic::loongarch_lsx_vsat_w:
3127	case Intrinsic::loongarch_lsx_vsat_wu:
3128	case Intrinsic::loongarch_lsx_vrotri_w:
3129	case Intrinsic::loongarch_lsx_vsllwil_d_w:
3130	case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3131	case Intrinsic::loongarch_lsx_vsrlri_w:
3132	case Intrinsic::loongarch_lsx_vsrari_w:
3133	case Intrinsic::loongarch_lsx_vslei_bu:
3134	case Intrinsic::loongarch_lsx_vslei_hu:
3135	case Intrinsic::loongarch_lsx_vslei_wu:
3136	case Intrinsic::loongarch_lsx_vslei_du:
3137	case Intrinsic::loongarch_lsx_vslti_bu:
3138	case Intrinsic::loongarch_lsx_vslti_hu:
3139	case Intrinsic::loongarch_lsx_vslti_wu:
3140	case Intrinsic::loongarch_lsx_vslti_du:
3141	case Intrinsic::loongarch_lsx_vbsll_v:
3142	case Intrinsic::loongarch_lsx_vbsrl_v:
3143	case Intrinsic::loongarch_lasx_xvsat_w:
3144	case Intrinsic::loongarch_lasx_xvsat_wu:
3145	case Intrinsic::loongarch_lasx_xvrotri_w:
3146	case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3147	case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3148	case Intrinsic::loongarch_lasx_xvsrlri_w:
3149	case Intrinsic::loongarch_lasx_xvsrari_w:
3150	case Intrinsic::loongarch_lasx_xvslei_bu:
3151	case Intrinsic::loongarch_lasx_xvslei_hu:
3152	case Intrinsic::loongarch_lasx_xvslei_wu:
3153	case Intrinsic::loongarch_lasx_xvslei_du:
3154	case Intrinsic::loongarch_lasx_xvslti_bu:
3155	case Intrinsic::loongarch_lasx_xvslti_hu:
3156	case Intrinsic::loongarch_lasx_xvslti_wu:
3157	case Intrinsic::loongarch_lasx_xvslti_du:
3158	case Intrinsic::loongarch_lasx_xvbsll_v:
3159	case Intrinsic::loongarch_lasx_xvbsrl_v:
3160	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `2`, DAG);
3161	case Intrinsic::loongarch_lsx_vseqi_b:
3162	case Intrinsic::loongarch_lsx_vseqi_h:
3163	case Intrinsic::loongarch_lsx_vseqi_w:
3164	case Intrinsic::loongarch_lsx_vseqi_d:
3165	case Intrinsic::loongarch_lsx_vslei_b:
3166	case Intrinsic::loongarch_lsx_vslei_h:
3167	case Intrinsic::loongarch_lsx_vslei_w:
3168	case Intrinsic::loongarch_lsx_vslei_d:
3169	case Intrinsic::loongarch_lsx_vslti_b:
3170	case Intrinsic::loongarch_lsx_vslti_h:
3171	case Intrinsic::loongarch_lsx_vslti_w:
3172	case Intrinsic::loongarch_lsx_vslti_d:
3173	case Intrinsic::loongarch_lasx_xvseqi_b:
3174	case Intrinsic::loongarch_lasx_xvseqi_h:
3175	case Intrinsic::loongarch_lasx_xvseqi_w:
3176	case Intrinsic::loongarch_lasx_xvseqi_d:
3177	case Intrinsic::loongarch_lasx_xvslei_b:
3178	case Intrinsic::loongarch_lasx_xvslei_h:
3179	case Intrinsic::loongarch_lasx_xvslei_w:
3180	case Intrinsic::loongarch_lasx_xvslei_d:
3181	case Intrinsic::loongarch_lasx_xvslti_b:
3182	case Intrinsic::loongarch_lasx_xvslti_h:
3183	case Intrinsic::loongarch_lasx_xvslti_w:
3184	case Intrinsic::loongarch_lasx_xvslti_d:
3185	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `2`, DAG, /IsSigned=/true);
3186	case Intrinsic::loongarch_lsx_vsrlni_h_w:
3187	case Intrinsic::loongarch_lsx_vsrani_h_w:
3188	case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3189	case Intrinsic::loongarch_lsx_vsrarni_h_w:
3190	case Intrinsic::loongarch_lsx_vssrlni_h_w:
3191	case Intrinsic::loongarch_lsx_vssrani_h_w:
3192	case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3193	case Intrinsic::loongarch_lsx_vssrani_hu_w:
3194	case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3195	case Intrinsic::loongarch_lsx_vssrarni_h_w:
3196	case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3197	case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3198	case Intrinsic::loongarch_lsx_vfrstpi_b:
3199	case Intrinsic::loongarch_lsx_vfrstpi_h:
3200	case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3201	case Intrinsic::loongarch_lasx_xvsrani_h_w:
3202	case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3203	case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3204	case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3205	case Intrinsic::loongarch_lasx_xvssrani_h_w:
3206	case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3207	case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3208	case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3209	case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3210	case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3211	case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3212	case Intrinsic::loongarch_lasx_xvfrstpi_b:
3213	case Intrinsic::loongarch_lasx_xvfrstpi_h:
3214	return checkIntrinsicImmArg<`5`>(Op, ImmOp: `3`, DAG);
3215	case Intrinsic::loongarch_lsx_vsat_d:
3216	case Intrinsic::loongarch_lsx_vsat_du:
3217	case Intrinsic::loongarch_lsx_vrotri_d:
3218	case Intrinsic::loongarch_lsx_vsrlri_d:
3219	case Intrinsic::loongarch_lsx_vsrari_d:
3220	case Intrinsic::loongarch_lasx_xvsat_d:
3221	case Intrinsic::loongarch_lasx_xvsat_du:
3222	case Intrinsic::loongarch_lasx_xvrotri_d:
3223	case Intrinsic::loongarch_lasx_xvsrlri_d:
3224	case Intrinsic::loongarch_lasx_xvsrari_d:
3225	return checkIntrinsicImmArg<`6`>(Op, ImmOp: `2`, DAG);
3226	case Intrinsic::loongarch_lsx_vsrlni_w_d:
3227	case Intrinsic::loongarch_lsx_vsrani_w_d:
3228	case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3229	case Intrinsic::loongarch_lsx_vsrarni_w_d:
3230	case Intrinsic::loongarch_lsx_vssrlni_w_d:
3231	case Intrinsic::loongarch_lsx_vssrani_w_d:
3232	case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3233	case Intrinsic::loongarch_lsx_vssrani_wu_d:
3234	case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3235	case Intrinsic::loongarch_lsx_vssrarni_w_d:
3236	case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3237	case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3238	case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3239	case Intrinsic::loongarch_lasx_xvsrani_w_d:
3240	case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3241	case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3242	case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3243	case Intrinsic::loongarch_lasx_xvssrani_w_d:
3244	case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3245	case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3246	case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3247	case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3248	case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3249	case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3250	return checkIntrinsicImmArg<`6`>(Op, ImmOp: `3`, DAG);
3251	case Intrinsic::loongarch_lsx_vsrlni_d_q:
3252	case Intrinsic::loongarch_lsx_vsrani_d_q:
3253	case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3254	case Intrinsic::loongarch_lsx_vsrarni_d_q:
3255	case Intrinsic::loongarch_lsx_vssrlni_d_q:
3256	case Intrinsic::loongarch_lsx_vssrani_d_q:
3257	case Intrinsic::loongarch_lsx_vssrlni_du_q:
3258	case Intrinsic::loongarch_lsx_vssrani_du_q:
3259	case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3260	case Intrinsic::loongarch_lsx_vssrarni_d_q:
3261	case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3262	case Intrinsic::loongarch_lsx_vssrarni_du_q:
3263	case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3264	case Intrinsic::loongarch_lasx_xvsrani_d_q:
3265	case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3266	case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3267	case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3268	case Intrinsic::loongarch_lasx_xvssrani_d_q:
3269	case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3270	case Intrinsic::loongarch_lasx_xvssrani_du_q:
3271	case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3272	case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3273	case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3274	case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3275	return checkIntrinsicImmArg<`7`>(Op, ImmOp: `3`, DAG);
3276	case Intrinsic::loongarch_lsx_vnori_b:
3277	case Intrinsic::loongarch_lsx_vshuf4i_b:
3278	case Intrinsic::loongarch_lsx_vshuf4i_h:
3279	case Intrinsic::loongarch_lsx_vshuf4i_w:
3280	case Intrinsic::loongarch_lasx_xvnori_b:
3281	case Intrinsic::loongarch_lasx_xvshuf4i_b:
3282	case Intrinsic::loongarch_lasx_xvshuf4i_h:
3283	case Intrinsic::loongarch_lasx_xvshuf4i_w:
3284	case Intrinsic::loongarch_lasx_xvpermi_d:
3285	return checkIntrinsicImmArg<`8`>(Op, ImmOp: `2`, DAG);
3286	case Intrinsic::loongarch_lsx_vshuf4i_d:
3287	case Intrinsic::loongarch_lsx_vpermi_w:
3288	case Intrinsic::loongarch_lsx_vbitseli_b:
3289	case Intrinsic::loongarch_lsx_vextrins_b:
3290	case Intrinsic::loongarch_lsx_vextrins_h:
3291	case Intrinsic::loongarch_lsx_vextrins_w:
3292	case Intrinsic::loongarch_lsx_vextrins_d:
3293	case Intrinsic::loongarch_lasx_xvshuf4i_d:
3294	case Intrinsic::loongarch_lasx_xvpermi_w:
3295	case Intrinsic::loongarch_lasx_xvpermi_q:
3296	case Intrinsic::loongarch_lasx_xvbitseli_b:
3297	case Intrinsic::loongarch_lasx_xvextrins_b:
3298	case Intrinsic::loongarch_lasx_xvextrins_h:
3299	case Intrinsic::loongarch_lasx_xvextrins_w:
3300	case Intrinsic::loongarch_lasx_xvextrins_d:
3301	return checkIntrinsicImmArg<`8`>(Op, ImmOp: `3`, DAG);
3302	case Intrinsic::loongarch_lsx_vrepli_b:
3303	case Intrinsic::loongarch_lsx_vrepli_h:
3304	case Intrinsic::loongarch_lsx_vrepli_w:
3305	case Intrinsic::loongarch_lsx_vrepli_d:
3306	case Intrinsic::loongarch_lasx_xvrepli_b:
3307	case Intrinsic::loongarch_lasx_xvrepli_h:
3308	case Intrinsic::loongarch_lasx_xvrepli_w:
3309	case Intrinsic::loongarch_lasx_xvrepli_d:
3310	return checkIntrinsicImmArg<`10`>(Op, ImmOp: `1`, DAG, /IsSigned=/true);
3311	case Intrinsic::loongarch_lsx_vldi:
3312	case Intrinsic::loongarch_lasx_xvldi:
3313	return checkIntrinsicImmArg<`13`>(Op, ImmOp: `1`, DAG, /IsSigned=/true);
3314	}
3315	}
3316
3317	// Helper function that emits error message for intrinsics with chain and return
3318	// merge values of a UNDEF and the chain.
3319	static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
3320	StringRef ErrorMsg,
3321	SelectionDAG &DAG) {
3322	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
3323	return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: `0`)},
3324	dl: SDLoc (Op));
3325	}
3326
3327	SDValue
3328	LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3329	SelectionDAG &DAG) const {
3330	SDLoc DL(Op);
3331	MVT GRLenVT = Subtarget.getGRLenVT();
3332	EVT VT = Op.getValueType();
3333	SDValue Chain = Op.getOperand(i: `0`);
3334	const StringRef ErrorMsgOOR = "argument out of range";
3335	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3336	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3337
3338	switch (Op.getConstantOperandVal(i: `1`)) {
3339	default:
3340	return Op;
3341	case Intrinsic::loongarch_crc_w_b_w:
3342	case Intrinsic::loongarch_crc_w_h_w:
3343	case Intrinsic::loongarch_crc_w_w_w:
3344	case Intrinsic::loongarch_crc_w_d_w:
3345	case Intrinsic::loongarch_crcc_w_b_w:
3346	case Intrinsic::loongarch_crcc_w_h_w:
3347	case Intrinsic::loongarch_crcc_w_w_w:
3348	case Intrinsic::loongarch_crcc_w_d_w:
3349	return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
3350	case Intrinsic::loongarch_csrrd_w:
3351	case Intrinsic::loongarch_csrrd_d: {
3352	unsigned Imm = Op.getConstantOperandVal(i: `2`);
3353	return !isUInt<`14`>(x: Imm)
3354	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3355	: DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
3356	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3357	}
3358	case Intrinsic::loongarch_csrwr_w:
3359	case Intrinsic::loongarch_csrwr_d: {
3360	unsigned Imm = Op.getConstantOperandVal(i: `3`);
3361	return !isUInt<`14`>(x: Imm)
3362	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3363	: DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
3364	Ops: {Chain, Op.getOperand(i: `2`),
3365	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3366	}
3367	case Intrinsic::loongarch_csrxchg_w:
3368	case Intrinsic::loongarch_csrxchg_d: {
3369	unsigned Imm = Op.getConstantOperandVal(i: `4`);
3370	return !isUInt<`14`>(x: Imm)
3371	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3372	: DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
3373	Ops: {Chain, Op.getOperand(i: `2`), Op.getOperand(i: `3`),
3374	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3375	}
3376	case Intrinsic::loongarch_iocsrrd_d: {
3377	return DAG.getNode(
3378	Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
3379	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: `2`))});
3380	}
3381	#define IOCSRRD_CASE(NAME, NODE) \
3382	case Intrinsic::loongarch_##NAME: { \
3383	return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3384	{Chain, Op.getOperand(2)}); \
3385	}
3386	IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3387	IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3388	IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3389	#undef IOCSRRD_CASE
3390	case Intrinsic::loongarch_cpucfg: {
3391	return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
3392	Ops: {Chain, Op.getOperand(i: `2`)});
3393	}
3394	case Intrinsic::loongarch_lddir_d: {
3395	unsigned Imm = Op.getConstantOperandVal(i: `3`);
3396	return !isUInt<`8`>(x: Imm)
3397	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3398	: Op;
3399	}
3400	case Intrinsic::loongarch_movfcsr2gr: {
3401	if (!Subtarget.hasBasicF())
3402	return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
3403	unsigned Imm = Op.getConstantOperandVal(i: `2`);
3404	return !isUInt<`2`>(x: Imm)
3405	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3406	: DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
3407	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3408	}
3409	case Intrinsic::loongarch_lsx_vld:
3410	case Intrinsic::loongarch_lsx_vldrepl_b:
3411	case Intrinsic::loongarch_lasx_xvld:
3412	case Intrinsic::loongarch_lasx_xvldrepl_b:
3413	return !isInt<`12`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
3414	? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3415	: SDValue ();
3416	case Intrinsic::loongarch_lsx_vldrepl_h:
3417	case Intrinsic::loongarch_lasx_xvldrepl_h:
3418	return !isShiftedInt<`11`, `1`>(
3419	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
3420	? emitIntrinsicWithChainErrorMessage(
3421	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
3422	: SDValue ();
3423	case Intrinsic::loongarch_lsx_vldrepl_w:
3424	case Intrinsic::loongarch_lasx_xvldrepl_w:
3425	return !isShiftedInt<`10`, `2`>(
3426	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
3427	? emitIntrinsicWithChainErrorMessage(
3428	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
3429	: SDValue ();
3430	case Intrinsic::loongarch_lsx_vldrepl_d:
3431	case Intrinsic::loongarch_lasx_xvldrepl_d:
3432	return !isShiftedInt<`9`, `3`>(
3433	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `3`))->getSExtValue())
3434	? emitIntrinsicWithChainErrorMessage(
3435	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
3436	: SDValue ();
3437	}
3438	}
3439
3440	// Helper function that emits error message for intrinsics with void return
3441	// value and return the chain.
3442	static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
3443	SelectionDAG &DAG) {
3444
3445	DAG.getContext()->emitError(ErrorStr: Op ->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
3446	return Op.getOperand(i: `0`);
3447	}
3448
3449	SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3450	SelectionDAG &DAG) const {
3451	SDLoc DL(Op);
3452	MVT GRLenVT = Subtarget.getGRLenVT();
3453	SDValue Chain = Op.getOperand(i: `0`);
3454	uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: `1`);
3455	SDValue Op2 = Op.getOperand(i: `2`);
3456	const StringRef ErrorMsgOOR = "argument out of range";
3457	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3458	const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3459	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3460
3461	switch (IntrinsicEnum) {
3462	default:
3463	// TODO: Add more Intrinsics.
3464	return SDValue ();
3465	case Intrinsic::loongarch_cacop_d:
3466	case Intrinsic::loongarch_cacop_w: {
3467	if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3468	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
3469	if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3470	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
3471	// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3472	unsigned Imm1 = Op2 ->getAsZExtVal();
3473	int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue();
3474	if (!isUInt<`5`>(x: Imm1) \|\| !isInt<`12`>(x: Imm2))
3475	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
3476	return Op;
3477	}
3478	case Intrinsic::loongarch_dbar: {
3479	unsigned Imm = Op2 ->getAsZExtVal();
3480	return !isUInt<`15`>(x: Imm)
3481	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3482	: DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
3483	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3484	}
3485	case Intrinsic::loongarch_ibar: {
3486	unsigned Imm = Op2 ->getAsZExtVal();
3487	return !isUInt<`15`>(x: Imm)
3488	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3489	: DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
3490	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3491	}
3492	case Intrinsic::loongarch_break: {
3493	unsigned Imm = Op2 ->getAsZExtVal();
3494	return !isUInt<`15`>(x: Imm)
3495	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3496	: DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
3497	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3498	}
3499	case Intrinsic::loongarch_movgr2fcsr: {
3500	if (!Subtarget.hasBasicF())
3501	return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
3502	unsigned Imm = Op2 ->getAsZExtVal();
3503	return !isUInt<`2`>(x: Imm)
3504	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3505	: DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
3506	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
3507	N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
3508	Operand: Op.getOperand(i: `3`)));
3509	}
3510	case Intrinsic::loongarch_syscall: {
3511	unsigned Imm = Op2 ->getAsZExtVal();
3512	return !isUInt<`15`>(x: Imm)
3513	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3514	: DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
3515	N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3516	}
3517	#define IOCSRWR_CASE(NAME, NODE) \
3518	case Intrinsic::loongarch_##NAME: { \
3519	SDValue Op3 = Op.getOperand(3); \
3520	return Subtarget.is64Bit() \
3521	? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3522	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3523	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3524	: DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3525	Op3); \
3526	}
3527	IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3528	IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3529	IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3530	#undef IOCSRWR_CASE
3531	case Intrinsic::loongarch_iocsrwr_d: {
3532	return !Subtarget.is64Bit()
3533	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
3534	: DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
3535	N2: Op2,
3536	N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
3537	Operand: Op.getOperand(i: `3`)));
3538	}
3539	#define ASRT_LE_GT_CASE(NAME) \
3540	case Intrinsic::loongarch_##NAME: { \
3541	return !Subtarget.is64Bit() \
3542	? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3543	: Op; \
3544	}
3545	ASRT_LE_GT_CASE(asrtle_d)
3546	ASRT_LE_GT_CASE(asrtgt_d)
3547	#undef ASRT_LE_GT_CASE
3548	case Intrinsic::loongarch_ldpte_d: {
3549	unsigned Imm = Op.getConstantOperandVal(i: `3`);
3550	return !Subtarget.is64Bit()
3551	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
3552	: !isUInt<`8`>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3553	: Op;
3554	}
3555	case Intrinsic::loongarch_lsx_vst:
3556	case Intrinsic::loongarch_lasx_xvst:
3557	return !isInt<`12`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue())
3558	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3559	: SDValue ();
3560	case Intrinsic::loongarch_lasx_xvstelm_b:
3561	return (!isInt<`8`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3562	!isUInt<`5`>(x: Op.getConstantOperandVal(i: `5`)))
3563	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3564	: SDValue ();
3565	case Intrinsic::loongarch_lsx_vstelm_b:
3566	return (!isInt<`8`>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3567	!isUInt<`4`>(x: Op.getConstantOperandVal(i: `5`)))
3568	? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3569	: SDValue ();
3570	case Intrinsic::loongarch_lasx_xvstelm_h:
3571	return (!isShiftedInt<`8`, `1`>(
3572	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3573	!isUInt<`4`>(x: Op.getConstantOperandVal(i: `5`)))
3574	? emitIntrinsicErrorMessage(
3575	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
3576	: SDValue ();
3577	case Intrinsic::loongarch_lsx_vstelm_h:
3578	return (!isShiftedInt<`8`, `1`>(
3579	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3580	!isUInt<`3`>(x: Op.getConstantOperandVal(i: `5`)))
3581	? emitIntrinsicErrorMessage(
3582	Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
3583	: SDValue ();
3584	case Intrinsic::loongarch_lasx_xvstelm_w:
3585	return (!isShiftedInt<`8`, `2`>(
3586	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3587	!isUInt<`3`>(x: Op.getConstantOperandVal(i: `5`)))
3588	? emitIntrinsicErrorMessage(
3589	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
3590	: SDValue ();
3591	case Intrinsic::loongarch_lsx_vstelm_w:
3592	return (!isShiftedInt<`8`, `2`>(
3593	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3594	!isUInt<`2`>(x: Op.getConstantOperandVal(i: `5`)))
3595	? emitIntrinsicErrorMessage(
3596	Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
3597	: SDValue ();
3598	case Intrinsic::loongarch_lasx_xvstelm_d:
3599	return (!isShiftedInt<`8`, `3`>(
3600	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3601	!isUInt<`2`>(x: Op.getConstantOperandVal(i: `5`)))
3602	? emitIntrinsicErrorMessage(
3603	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
3604	: SDValue ();
3605	case Intrinsic::loongarch_lsx_vstelm_d:
3606	return (!isShiftedInt<`8`, `3`>(
3607	x: cast<ConstantSDNode>(Val: Op.getOperand(i: `4`))->getSExtValue()) \|\|
3608	!isUInt<`1`>(x: Op.getConstantOperandVal(i: `5`)))
3609	? emitIntrinsicErrorMessage(
3610	Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
3611	: SDValue ();
3612	}
3613	}
3614
3615	SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3616	SelectionDAG &DAG) const {
3617	SDLoc DL(Op);
3618	SDValue Lo = Op.getOperand(i: `0`);
3619	SDValue Hi = Op.getOperand(i: `1`);
3620	SDValue Shamt = Op.getOperand(i: `2`);
3621	EVT VT = Lo.getValueType();
3622
3623	// if Shamt-GRLen < 0: // Shamt < GRLen
3624	// Lo = Lo << Shamt
3625	// Hi = (Hi << Shamt) \| ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3626	// else:
3627	// Lo = 0
3628	// Hi = Lo << (Shamt-GRLen)
3629
3630	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
3631	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
3632	SDValue MinusGRLen =
3633	DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
3634	SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT);
3635	SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
3636	SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
3637
3638	SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
3639	SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
3640	SDValue ShiftRightLo =
3641	DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
3642	SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
3643	SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
3644	SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
3645
3646	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
3647
3648	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
3649	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
3650
3651	SDValue Parts[`2`] = {Lo, Hi};
3652	return DAG.getMergeValues(Ops: Parts, dl: DL);
3653	}
3654
3655	SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
3656	SelectionDAG &DAG,
3657	bool IsSRA) const {
3658	SDLoc DL(Op);
3659	SDValue Lo = Op.getOperand(i: `0`);
3660	SDValue Hi = Op.getOperand(i: `1`);
3661	SDValue Shamt = Op.getOperand(i: `2`);
3662	EVT VT = Lo.getValueType();
3663
3664	// SRA expansion:
3665	// if Shamt-GRLen < 0: // Shamt < GRLen
3666	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (ShAmt ^ GRLen-1))
3667	// Hi = Hi >>s Shamt
3668	// else:
3669	// Lo = Hi >>s (Shamt-GRLen);
3670	// Hi = Hi >>s (GRLen-1)
3671	//
3672	// SRL expansion:
3673	// if Shamt-GRLen < 0: // Shamt < GRLen
3674	// Lo = (Lo >>u Shamt) \| ((Hi << 1) << (ShAmt ^ GRLen-1))
3675	// Hi = Hi >>u Shamt
3676	// else:
3677	// Lo = Hi >>u (Shamt-GRLen);
3678	// Hi = 0;
3679
3680	unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3681
3682	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
3683	SDValue One = DAG.getConstant(Val: `1`, DL, VT);
3684	SDValue MinusGRLen =
3685	DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
3686	SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - `1`, DL, VT);
3687	SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
3688	SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
3689
3690	SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
3691	SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
3692	SDValue ShiftLeftHi =
3693	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
3694	SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
3695	SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
3696	SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
3697	SDValue HiFalse =
3698	IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
3699
3700	SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
3701
3702	Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
3703	Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
3704
3705	SDValue Parts[`2`] = {Lo, Hi};
3706	return DAG.getMergeValues(Ops: Parts, dl: DL);
3707	}
3708
3709	// Returns the opcode of the target-specific SDNode that implements the 32-bit
3710	// form of the given Opcode.
3711	static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
3712	switch (Opcode) {
3713	default:
3714	llvm_unreachable("Unexpected opcode");
3715	case ISD::SDIV:
3716	return LoongArchISD::DIV_W;
3717	case ISD::UDIV:
3718	return LoongArchISD::DIV_WU;
3719	case ISD::SREM:
3720	return LoongArchISD::MOD_W;
3721	case ISD::UREM:
3722	return LoongArchISD::MOD_WU;
3723	case ISD::SHL:
3724	return LoongArchISD::SLL_W;
3725	case ISD::SRA:
3726	return LoongArchISD::SRA_W;
3727	case ISD::SRL:
3728	return LoongArchISD::SRL_W;
3729	case ISD::ROTL:
3730	case ISD::ROTR:
3731	return LoongArchISD::ROTR_W;
3732	case ISD::CTTZ:
3733	return LoongArchISD::CTZ_W;
3734	case ISD::CTLZ:
3735	return LoongArchISD::CLZ_W;
3736	}
3737	}
3738
3739	// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
3740	// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
3741	// otherwise be promoted to i64, making it difficult to select the
3742	// SLL_W/.../W later one because the fact the operation was originally of*
3743	// type i8/i16/i32 is lost.
3744	static SDValue customLegalizeToWOp(SDNode N, SelectionDAG &DAG, int* NumOp,
3745	unsigned ExtOpc = ISD::ANY_EXTEND) {
3746	SDLoc DL(N);
3747	LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
3748	SDValue NewOp0, NewRes;
3749
3750	switch (NumOp) {
3751	default:
3752	llvm_unreachable("Unexpected NumOp");
3753	case `1`: {
3754	NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
3755	NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
3756	break;
3757	}
3758	case `2`: {
3759	NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
3760	SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
3761	if (N->getOpcode() == ISD::ROTL) {
3762	SDValue TmpOp = DAG.getConstant(Val: `32`, DL, VT: MVT::i64);
3763	NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
3764	}
3765	NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
3766	break;
3767	}
3768	// TODO:Handle more NumOp.
3769	}
3770
3771	// ReplaceNodeResults requires we maintain the same type for the return
3772	// value.
3773	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: NewRes);
3774	}
3775
3776	// Converts the given 32-bit operation to a i64 operation with signed extension
3777	// semantic to reduce the signed extension instructions.
3778	static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3779	SDLoc DL(N);
3780	SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `0`));
3781	SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `1`));
3782	SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
3783	SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
3784	N2: DAG.getValueType(MVT::i32));
3785	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
3786	}
3787
3788	// Helper function that emits error message for intrinsics with/without chain
3789	// and return a UNDEF or and the chain as the results.
3790	static void emitErrorAndReplaceIntrinsicResults(
3791	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
3792	StringRef ErrorMsg, bool WithChain = true) {
3793	DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: `0`) + ": " + ErrorMsg + ".");
3794	Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)));
3795	if (!WithChain)
3796	return;
3797	Results.push_back(Elt: N->getOperand(Num: `0`));
3798	}
3799
3800	template <unsigned N>
3801	static void
3802	replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
3803	SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
3804	unsigned ResOp) {
3805	const StringRef ErrorMsgOOR = "argument out of range";
3806	unsigned Imm = Node->getConstantOperandVal(Num: `2`);
3807	if (!isUInt<N>(Imm)) {
3808	emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
3809	/WithChain=/false);
3810	return;
3811	}
3812	SDLoc DL(Node);
3813	SDValue Vec = Node->getOperand(Num: `1`);
3814
3815	SDValue PickElt =
3816	DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
3817	N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
3818	N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
3819	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: `0`),
3820	Operand: PickElt.getValue(R: `0`)));
3821	}
3822
3823	static void replaceVecCondBranchResults(SDNode *N,
3824	SmallVectorImpl<SDValue> &Results,
3825	SelectionDAG &DAG,
3826	const LoongArchSubtarget &Subtarget,
3827	unsigned ResOp) {
3828	SDLoc DL(N);
3829	SDValue Vec = N->getOperand(Num: `1`);
3830
3831	SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
3832	Results.push_back(
3833	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: `0`), Operand: CB.getValue(R: `0`)));
3834	}
3835
3836	static void
3837	replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
3838	SelectionDAG &DAG,
3839	const LoongArchSubtarget &Subtarget) {
3840	switch (N->getConstantOperandVal(Num: `0`)) {
3841	default:
3842	llvm_unreachable("Unexpected Intrinsic.");
3843	case Intrinsic::loongarch_lsx_vpickve2gr_b:
3844	replaceVPICKVE2GRResults<`4`>(Node: N, Results, DAG, Subtarget,
3845	ResOp: LoongArchISD::VPICK_SEXT_ELT);
3846	break;
3847	case Intrinsic::loongarch_lsx_vpickve2gr_h:
3848	case Intrinsic::loongarch_lasx_xvpickve2gr_w:
3849	replaceVPICKVE2GRResults<`3`>(Node: N, Results, DAG, Subtarget,
3850	ResOp: LoongArchISD::VPICK_SEXT_ELT);
3851	break;
3852	case Intrinsic::loongarch_lsx_vpickve2gr_w:
3853	replaceVPICKVE2GRResults<`2`>(Node: N, Results, DAG, Subtarget,
3854	ResOp: LoongArchISD::VPICK_SEXT_ELT);
3855	break;
3856	case Intrinsic::loongarch_lsx_vpickve2gr_bu:
3857	replaceVPICKVE2GRResults<`4`>(Node: N, Results, DAG, Subtarget,
3858	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
3859	break;
3860	case Intrinsic::loongarch_lsx_vpickve2gr_hu:
3861	case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
3862	replaceVPICKVE2GRResults<`3`>(Node: N, Results, DAG, Subtarget,
3863	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
3864	break;
3865	case Intrinsic::loongarch_lsx_vpickve2gr_wu:
3866	replaceVPICKVE2GRResults<`2`>(Node: N, Results, DAG, Subtarget,
3867	ResOp: LoongArchISD::VPICK_ZEXT_ELT);
3868	break;
3869	case Intrinsic::loongarch_lsx_bz_b:
3870	case Intrinsic::loongarch_lsx_bz_h:
3871	case Intrinsic::loongarch_lsx_bz_w:
3872	case Intrinsic::loongarch_lsx_bz_d:
3873	case Intrinsic::loongarch_lasx_xbz_b:
3874	case Intrinsic::loongarch_lasx_xbz_h:
3875	case Intrinsic::loongarch_lasx_xbz_w:
3876	case Intrinsic::loongarch_lasx_xbz_d:
3877	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3878	ResOp: LoongArchISD::VALL_ZERO);
3879	break;
3880	case Intrinsic::loongarch_lsx_bz_v:
3881	case Intrinsic::loongarch_lasx_xbz_v:
3882	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3883	ResOp: LoongArchISD::VANY_ZERO);
3884	break;
3885	case Intrinsic::loongarch_lsx_bnz_b:
3886	case Intrinsic::loongarch_lsx_bnz_h:
3887	case Intrinsic::loongarch_lsx_bnz_w:
3888	case Intrinsic::loongarch_lsx_bnz_d:
3889	case Intrinsic::loongarch_lasx_xbnz_b:
3890	case Intrinsic::loongarch_lasx_xbnz_h:
3891	case Intrinsic::loongarch_lasx_xbnz_w:
3892	case Intrinsic::loongarch_lasx_xbnz_d:
3893	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3894	ResOp: LoongArchISD::VALL_NONZERO);
3895	break;
3896	case Intrinsic::loongarch_lsx_bnz_v:
3897	case Intrinsic::loongarch_lasx_xbnz_v:
3898	replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3899	ResOp: LoongArchISD::VANY_NONZERO);
3900	break;
3901	}
3902	}
3903
3904	static void replaceCMP_XCHG_128Results(SDNode *N,
3905	SmallVectorImpl<SDValue> &Results,
3906	SelectionDAG &DAG) {
3907	assert(N->getValueType(`0`) == MVT::i128 &&
3908	"AtomicCmpSwap on types less than 128 should be legal");
3909	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3910
3911	unsigned Opcode;
3912	switch (MemOp->getMergedOrdering()) {
3913	case AtomicOrdering::Acquire:
3914	case AtomicOrdering::AcquireRelease:
3915	case AtomicOrdering::SequentiallyConsistent:
3916	Opcode = LoongArch::PseudoCmpXchg128Acquire;
3917	break;
3918	case AtomicOrdering::Monotonic:
3919	case AtomicOrdering::Release:
3920	Opcode = LoongArch::PseudoCmpXchg128;
3921	break;
3922	default:
3923	llvm_unreachable("Unexpected ordering!");
3924	}
3925
3926	SDLoc DL(N);
3927	auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: `2`), DL, LoVT: MVT::i64, HiVT: MVT::i64);
3928	auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: `3`), DL, LoVT: MVT::i64, HiVT: MVT::i64);
3929	SDValue Ops[] = {N->getOperand(Num: `1`), CmpVal.first, CmpVal.second,
3930	NewVal.first, NewVal.second, N->getOperand(Num: `0`)};
3931
3932	SDNode *CmpSwap = DAG.getMachineNode(
3933	Opcode, dl: SDLoc (N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other),
3934	Ops);
3935	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3936	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128,
3937	N1: SDValue (CmpSwap, `0`), N2: SDValue (CmpSwap, `1`)));
3938	Results.push_back(Elt: SDValue (CmpSwap, `3`));
3939	}
3940
3941	void LoongArchTargetLowering::ReplaceNodeResults(
3942	SDNode N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const* {
3943	SDLoc DL(N);
3944	EVT VT = N->getValueType(ResNo: `0`);
3945	switch (N->getOpcode()) {
3946	default:
3947	llvm_unreachable("Don't know how to legalize this operation");
3948	case ISD::ADD:
3949	case ISD::SUB:
3950	assert(N->getValueType(`0`) == MVT::i32 && Subtarget.is64Bit() &&
3951	"Unexpected custom legalisation");
3952	Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
3953	break;
3954	case ISD::SDIV:
3955	case ISD::UDIV:
3956	case ISD::SREM:
3957	case ISD::UREM:
3958	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3959	"Unexpected custom legalisation");
3960	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`,
3961	ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32
3962	? ISD::ANY_EXTEND
3963	: ISD::SIGN_EXTEND));
3964	break;
3965	case ISD::SHL:
3966	case ISD::SRA:
3967	case ISD::SRL:
3968	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3969	"Unexpected custom legalisation");
3970	if (N->getOperand(Num: `1`).getOpcode() != ISD::Constant) {
3971	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`));
3972	break;
3973	}
3974	break;
3975	case ISD::ROTL:
3976	case ISD::ROTR:
3977	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3978	"Unexpected custom legalisation");
3979	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `2`));
3980	break;
3981	case ISD::FP_TO_SINT: {
3982	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3983	"Unexpected custom legalisation");
3984	SDValue Src = N->getOperand(Num: `0`);
3985	EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: `0`));
3986	if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
3987	TargetLowering::TypeSoftenFloat) {
3988	if (!isTypeLegal(VT: Src.getValueType()))
3989	return;
3990	if (Src.getValueType() == MVT::f16)
3991	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src);
3992	SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
3993	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
3994	return;
3995	}
3996	// If the FP type needs to be softened, emit a library call using the 'si'
3997	// version. If we left it to default legalization we'd end up with 'di'.
3998	RTLIB::Libcall LC;
3999	LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
4000	MakeLibCallOptions CallOptions;
4001	EVT OpVT = Src.getValueType();
4002	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true);
4003	SDValue Chain = SDValue ();
4004	SDValue Result;
4005	std::tie(args&: Result, args&: Chain) =
4006	makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
4007	Results.push_back(Elt: Result);
4008	break;
4009	}
4010	case ISD::BITCAST: {
4011	SDValue Src = N->getOperand(Num: `0`);
4012	EVT SrcVT = Src.getValueType();
4013	if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4014	Subtarget.hasBasicF()) {
4015	SDValue Dst =
4016	DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
4017	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
4018	} else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4019	SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
4020	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src);
4021	SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
4022	N1: NewReg.getValue(R: `0`), N2: NewReg.getValue(R: `1`));
4023	Results.push_back(Elt: RetReg);
4024	}
4025	break;
4026	}
4027	case ISD::FP_TO_UINT: {
4028	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4029	"Unexpected custom legalisation");
4030	auto &TLI = DAG.getTargetLoweringInfo();
4031	SDValue Tmp1, Tmp2;
4032	TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
4033	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
4034	break;
4035	}
4036	case ISD::BSWAP: {
4037	SDValue Src = N->getOperand(Num: `0`);
4038	assert((VT == MVT::i16 \|\| VT == MVT::i32) &&
4039	"Unexpected custom legalization");
4040	MVT GRLenVT = Subtarget.getGRLenVT();
4041	SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4042	SDValue Tmp;
4043	switch (VT.getSizeInBits()) {
4044	default:
4045	llvm_unreachable("Unexpected operand width");
4046	case `16`:
4047	Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
4048	break;
4049	case `32`:
4050	// Only LA64 will get to here due to the size mismatch between VT and
4051	// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4052	Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
4053	break;
4054	}
4055	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4056	break;
4057	}
4058	case ISD::BITREVERSE: {
4059	SDValue Src = N->getOperand(Num: `0`);
4060	assert((VT == MVT::i8 \|\| (VT == MVT::i32 && Subtarget.is64Bit())) &&
4061	"Unexpected custom legalization");
4062	MVT GRLenVT = Subtarget.getGRLenVT();
4063	SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4064	SDValue Tmp;
4065	switch (VT.getSizeInBits()) {
4066	default:
4067	llvm_unreachable("Unexpected operand width");
4068	case `8`:
4069	Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
4070	break;
4071	case `32`:
4072	Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
4073	break;
4074	}
4075	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4076	break;
4077	}
4078	case ISD::CTLZ:
4079	case ISD::CTTZ: {
4080	assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4081	"Unexpected custom legalisation");
4082	Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: `1`));
4083	break;
4084	}
4085	case ISD::INTRINSIC_W_CHAIN: {
4086	SDValue Chain = N->getOperand(Num: `0`);
4087	SDValue Op2 = N->getOperand(Num: `2`);
4088	MVT GRLenVT = Subtarget.getGRLenVT();
4089	const StringRef ErrorMsgOOR = "argument out of range";
4090	const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4091	const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4092
4093	switch (N->getConstantOperandVal(Num: `1`)) {
4094	default:
4095	llvm_unreachable("Unexpected Intrinsic.");
4096	case Intrinsic::loongarch_movfcsr2gr: {
4097	if (!Subtarget.hasBasicF()) {
4098	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
4099	return;
4100	}
4101	unsigned Imm = Op2 ->getAsZExtVal();
4102	if (!isUInt<`2`>(x: Imm)) {
4103	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4104	return;
4105	}
4106	SDValue MOVFCSR2GRResults = DAG.getNode(
4107	Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc (N), ResultTys: {MVT::i64, MVT::Other},
4108	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4109	Results.push_back(
4110	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: `0`)));
4111	Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: `1`));
4112	break;
4113	}
4114	#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4115	case Intrinsic::loongarch_##NAME: { \
4116	SDValue NODE = DAG.getNode( \
4117	LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4118	{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4119	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4120	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4121	Results.push_back(NODE.getValue(1)); \
4122	break; \
4123	}
4124	CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4125	CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4126	CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4127	CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4128	CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4129	CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4130	#undef CRC_CASE_EXT_BINARYOP
4131
4132	#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4133	case Intrinsic::loongarch_##NAME: { \
4134	SDValue NODE = DAG.getNode( \
4135	LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4136	{Chain, Op2, \
4137	DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4138	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4139	Results.push_back(NODE.getValue(1)); \
4140	break; \
4141	}
4142	CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4143	CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4144	#undef CRC_CASE_EXT_UNARYOP
4145	#define CSR_CASE(ID) \
4146	case Intrinsic::loongarch_##ID: { \
4147	if (!Subtarget.is64Bit()) \
4148	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4149	break; \
4150	}
4151	CSR_CASE(csrrd_d);
4152	CSR_CASE(csrwr_d);
4153	CSR_CASE(csrxchg_d);
4154	CSR_CASE(iocsrrd_d);
4155	#undef CSR_CASE
4156	case Intrinsic::loongarch_csrrd_w: {
4157	unsigned Imm = Op2 ->getAsZExtVal();
4158	if (!isUInt<`14`>(x: Imm)) {
4159	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4160	return;
4161	}
4162	SDValue CSRRDResults =
4163	DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
4164	Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4165	Results.push_back(
4166	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: `0`)));
4167	Results.push_back(Elt: CSRRDResults.getValue(R: `1`));
4168	break;
4169	}
4170	case Intrinsic::loongarch_csrwr_w: {
4171	unsigned Imm = N->getConstantOperandVal(Num: `3`);
4172	if (!isUInt<`14`>(x: Imm)) {
4173	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4174	return;
4175	}
4176	SDValue CSRWRResults =
4177	DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
4178	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
4179	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4180	Results.push_back(
4181	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: `0`)));
4182	Results.push_back(Elt: CSRWRResults.getValue(R: `1`));
4183	break;
4184	}
4185	case Intrinsic::loongarch_csrxchg_w: {
4186	unsigned Imm = N->getConstantOperandVal(Num: `4`);
4187	if (!isUInt<`14`>(x: Imm)) {
4188	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4189	return;
4190	}
4191	SDValue CSRXCHGResults = DAG.getNode(
4192	Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
4193	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
4194	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: `3`)),
4195	DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4196	Results.push_back(
4197	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: `0`)));
4198	Results.push_back(Elt: CSRXCHGResults.getValue(R: `1`));
4199	break;
4200	}
4201	#define IOCSRRD_CASE(NAME, NODE) \
4202	case Intrinsic::loongarch_##NAME: { \
4203	SDValue IOCSRRDResults = \
4204	DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4205	{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4206	Results.push_back( \
4207	DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4208	Results.push_back(IOCSRRDResults.getValue(1)); \
4209	break; \
4210	}
4211	IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4212	IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4213	IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4214	#undef IOCSRRD_CASE
4215	case Intrinsic::loongarch_cpucfg: {
4216	SDValue CPUCFGResults =
4217	DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
4218	Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
4219	Results.push_back(
4220	Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: `0`)));
4221	Results.push_back(Elt: CPUCFGResults.getValue(R: `1`));
4222	break;
4223	}
4224	case Intrinsic::loongarch_lddir_d: {
4225	if (!Subtarget.is64Bit()) {
4226	emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
4227	return;
4228	}
4229	break;
4230	}
4231	}
4232	break;
4233	}
4234	case ISD::READ_REGISTER: {
4235	if (Subtarget.is64Bit())
4236	DAG.getContext()->emitError(
4237	ErrorStr: "On LA64, only 64-bit registers can be read.");
4238	else
4239	DAG.getContext()->emitError(
4240	ErrorStr: "On LA32, only 32-bit registers can be read.");
4241	Results.push_back(Elt: DAG.getUNDEF(VT));
4242	Results.push_back(Elt: N->getOperand(Num: `0`));
4243	break;
4244	}
4245	case ISD::INTRINSIC_WO_CHAIN: {
4246	replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4247	break;
4248	}
4249	case ISD::LROUND: {
4250	SDValue Op0 = N->getOperand(Num: `0`);
4251	EVT OpVT = Op0.getValueType();
4252	RTLIB::Libcall LC =
4253	OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4254	MakeLibCallOptions CallOptions;
4255	CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64, Value: true);
4256	SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
4257	Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
4258	Results.push_back(Elt: Result);
4259	break;
4260	}
4261	case ISD::ATOMIC_CMP_SWAP: {
4262	replaceCMP_XCHG_128Results(N, Results, DAG);
4263	break;
4264	}
4265	case ISD::TRUNCATE: {
4266	MVT VT = N->getSimpleValueType(ResNo: `0`);
4267	if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector)
4268	return;
4269
4270	MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT();
4271	SDValue In = N->getOperand(Num: `0`);
4272	EVT InVT = In.getValueType();
4273	EVT InEltVT = InVT.getVectorElementType();
4274	EVT EltVT = VT.getVectorElementType();
4275	unsigned MinElts = VT.getVectorNumElements();
4276	unsigned WidenNumElts = WidenVT.getVectorNumElements();
4277	unsigned InBits = InVT.getSizeInBits();
4278
4279	if ((`128` % InBits) == `0` && WidenVT.is128BitVector()) {
4280	if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == `0`) {
4281	int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4282	SmallVector<int, `16`> TruncMask(WidenNumElts, -`1`);
4283	for (unsigned I = `0`; I < MinElts; ++I)
4284	TruncMask [I] = Scale * I;
4285
4286	unsigned WidenNumElts = `128` / In.getScalarValueSizeInBits();
4287	MVT SVT = In.getSimpleValueType().getScalarType();
4288	MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts);
4289	SDValue WidenIn =
4290	DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In,
4291	N3: DAG.getVectorIdxConstant(Val: `0`, DL));
4292	assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4293	"Illegal vector type in truncation");
4294	WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn);
4295	Results.push_back(
4296	Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask));
4297	return;
4298	}
4299	}
4300
4301	break;
4302	}
4303	}
4304	}
4305
4306	static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
4307	TargetLowering::DAGCombinerInfo &DCI,
4308	const LoongArchSubtarget &Subtarget) {
4309	if (DCI.isBeforeLegalizeOps())
4310	return SDValue ();
4311
4312	SDValue FirstOperand = N->getOperand(Num: `0`);
4313	SDValue SecondOperand = N->getOperand(Num: `1`);
4314	unsigned FirstOperandOpc = FirstOperand.getOpcode();
4315	EVT ValTy = N->getValueType(ResNo: `0`);
4316	SDLoc DL(N);
4317	uint64_t lsb, msb;
4318	unsigned SMIdx, SMLen;
4319	ConstantSDNode *CN;
4320	SDValue NewOperand;
4321	MVT GRLenVT = Subtarget.getGRLenVT();
4322
4323	// BSTRPICK requires the 32S feature.
4324	if (!Subtarget.has32S())
4325	return SDValue ();
4326
4327	// Op's second operand must be a shifted mask.
4328	if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) \|\|
4329	!isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
4330	return SDValue ();
4331
4332	if (FirstOperandOpc == ISD::SRA \|\| FirstOperandOpc == ISD::SRL) {
4333	// Pattern match BSTRPICK.
4334	// $dst = and ((sra or srl) $src , lsb), (2len - 1)
4335	// => BSTRPICK $dst, $src, msb, lsb
4336	// where msb = lsb + len - 1
4337
4338	// The second operand of the shift must be an immediate.
4339	if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: `1`))))
4340	return SDValue ();
4341
4342	lsb = CN->getZExtValue();
4343
4344	// Return if the shifted mask does not start at bit 0 or the sum of its
4345	// length and lsb exceeds the word's size.
4346	if (SMIdx != `0` \|\| lsb + SMLen > ValTy.getSizeInBits())
4347	return SDValue ();
4348
4349	NewOperand = FirstOperand.getOperand(i: `0`);
4350	} else {
4351	// Pattern match BSTRPICK.
4352	// $dst = and $src, (2len- 1) , if len > 12
4353	// => BSTRPICK $dst, $src, msb, lsb
4354	// where lsb = 0 and msb = len - 1
4355
4356	// If the mask is <= 0xfff, andi can be used instead.
4357	if (CN->getZExtValue() <= `0xfff`)
4358	return SDValue ();
4359
4360	// Return if the MSB exceeds.
4361	if (SMIdx + SMLen > ValTy.getSizeInBits())
4362	return SDValue ();
4363
4364	if (SMIdx > `0`) {
4365	// Omit if the constant has more than 2 uses. This a conservative
4366	// decision. Whether it is a win depends on the HW microarchitecture.
4367	// However it should always be better for 1 and 2 uses.
4368	if (CN->use_size() > `2`)
4369	return SDValue ();
4370	// Return if the constant can be composed by a single LU12I.W.
4371	if ((CN->getZExtValue() & `0xfff`) == `0`)
4372	return SDValue ();
4373	// Return if the constand can be composed by a single ADDI with
4374	// the zero register.
4375	if (CN->getSExtValue() >= -`2048` && CN->getSExtValue() < `0`)
4376	return SDValue ();
4377	}
4378
4379	lsb = SMIdx;
4380	NewOperand = FirstOperand;
4381	}
4382
4383	msb = lsb + SMLen - `1`;
4384	SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
4385	N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
4386	N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
4387	if (FirstOperandOpc == ISD::SRA \|\| FirstOperandOpc == ISD::SRL \|\| lsb == `0`)
4388	return NR0;
4389	// Try to optimize to
4390	// bstrpick $Rd, $Rs, msb, lsb
4391	// slli $Rd, $Rd, lsb
4392	return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
4393	N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
4394	}
4395
4396	static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
4397	TargetLowering::DAGCombinerInfo &DCI,
4398	const LoongArchSubtarget &Subtarget) {
4399	// BSTRPICK requires the 32S feature.
4400	if (!Subtarget.has32S())
4401	return SDValue ();
4402
4403	if (DCI.isBeforeLegalizeOps())
4404	return SDValue ();
4405
4406	// $dst = srl (and $src, Mask), Shamt
4407	// =>
4408	// BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4409	// when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4410	//
4411
4412	SDValue FirstOperand = N->getOperand(Num: `0`);
4413	ConstantSDNode *CN;
4414	EVT ValTy = N->getValueType(ResNo: `0`);
4415	SDLoc DL(N);
4416	MVT GRLenVT = Subtarget.getGRLenVT();
4417	unsigned MaskIdx, MaskLen;
4418	uint64_t Shamt;
4419
4420	// The first operand must be an AND and the second operand of the AND must be
4421	// a shifted mask.
4422	if (FirstOperand.getOpcode() != ISD::AND \|\|
4423	!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: `1`))) \|\|
4424	!isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
4425	return SDValue ();
4426
4427	// The second operand (shift amount) must be an immediate.
4428	if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))))
4429	return SDValue ();
4430
4431	Shamt = CN->getZExtValue();
4432	if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - `1`)
4433	return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
4434	N1: FirstOperand ->getOperand(Num: `0`),
4435	N2: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
4436	N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
4437
4438	return SDValue ();
4439	}
4440
4441	// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4442	// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4443	static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4444	unsigned Depth) {
4445	// Limit recursion.
4446	if (Depth >= SelectionDAG::MaxRecursionDepth)
4447	return false;
4448	switch (Src.getOpcode()) {
4449	case ISD::SETCC:
4450	case ISD::TRUNCATE:
4451	return Src.getOperand(i: `0`).getValueSizeInBits() == Size;
4452	case ISD::FREEZE:
4453	return checkBitcastSrcVectorSize(Src: Src.getOperand(i: `0`), Size, Depth: Depth + `1`);
4454	case ISD::AND:
4455	case ISD::XOR:
4456	case ISD::OR:
4457	return checkBitcastSrcVectorSize(Src: Src.getOperand(i: `0`), Size, Depth: Depth + `1`) &&
4458	checkBitcastSrcVectorSize(Src: Src.getOperand(i: `1`), Size, Depth: Depth + `1`);
4459	case ISD::SELECT:
4460	case ISD::VSELECT:
4461	return Src.getOperand(i: `0`).getScalarValueSizeInBits() == `1` &&
4462	checkBitcastSrcVectorSize(Src: Src.getOperand(i: `1`), Size, Depth: Depth + `1`) &&
4463	checkBitcastSrcVectorSize(Src: Src.getOperand(i: `2`), Size, Depth: Depth + `1`);
4464	case ISD::BUILD_VECTOR:
4465	return ISD::isBuildVectorAllZeros(N: Src.getNode()) \|\|
4466	ISD::isBuildVectorAllOnes(N: Src.getNode());
4467	}
4468	return false;
4469	}
4470
4471	// Helper to push sign extension of vXi1 SETCC result through bitops.
4472	static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
4473	SDValue Src, const SDLoc &DL) {
4474	switch (Src.getOpcode()) {
4475	case ISD::SETCC:
4476	case ISD::FREEZE:
4477	case ISD::TRUNCATE:
4478	case ISD::BUILD_VECTOR:
4479	return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
4480	case ISD::AND:
4481	case ISD::XOR:
4482	case ISD::OR:
4483	return DAG.getNode(
4484	Opcode: Src.getOpcode(), DL, VT: SExtVT,
4485	N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `0`), DL),
4486	N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `1`), DL));
4487	case ISD::SELECT:
4488	case ISD::VSELECT:
4489	return DAG.getSelect(
4490	DL, VT: SExtVT, Cond: Src.getOperand(i: `0`),
4491	LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `1`), DL),
4492	RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: `2`), DL));
4493	}
4494	llvm_unreachable("Unexpected node type for vXi1 sign extension");
4495	}
4496
4497	static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
4498	TargetLowering::DAGCombinerInfo &DCI,
4499	const LoongArchSubtarget &Subtarget) {
4500	SDLoc DL(N);
4501	EVT VT = N->getValueType(ResNo: `0`);
4502	SDValue Src = N->getOperand(Num: `0`);
4503	EVT SrcVT = Src.getValueType();
4504
4505	if (!DCI.isBeforeLegalizeOps())
4506	return SDValue ();
4507
4508	if (!SrcVT.isSimple() \|\| SrcVT.getScalarType() != MVT::i1)
4509	return SDValue ();
4510
4511	unsigned Opc = ISD::DELETED_NODE;
4512	// Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4513	if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4514	bool UseLASX;
4515	EVT CmpVT = Src.getOperand(i: `0`).getValueType();
4516	EVT EltVT = CmpVT.getVectorElementType();
4517
4518	if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() <= `128`)
4519	UseLASX = false;
4520	else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4521	CmpVT.getSizeInBits() <= `256`)
4522	UseLASX = true;
4523	else
4524	return SDValue ();
4525
4526	SDValue SrcN1 = Src.getOperand(i: `1`);
4527	switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: `2`))->get()) {
4528	default:
4529	break;
4530	case ISD::SETEQ:
4531	// x == 0 => not (vmsknez.b x)
4532	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
4533	Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
4534	break;
4535	case ISD::SETGT:
4536	// x > -1 => vmskgez.b x
4537	if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8)
4538	Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4539	break;
4540	case ISD::SETGE:
4541	// x >= 0 => vmskgez.b x
4542	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
4543	Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4544	break;
4545	case ISD::SETLT:
4546	// x < 0 => vmskltz.{b,h,w,d} x
4547	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) &&
4548	(EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
4549	EltVT == MVT::i64))
4550	Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4551	break;
4552	case ISD::SETLE:
4553	// x <= -1 => vmskltz.{b,h,w,d} x
4554	if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) &&
4555	(EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
4556	EltVT == MVT::i64))
4557	Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4558	break;
4559	case ISD::SETNE:
4560	// x != 0 => vmsknez.b x
4561	if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
4562	Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
4563	break;
4564	}
4565	}
4566
4567	// Generate vXi1 using [X]VMSKLTZ
4568	if (Opc == ISD::DELETED_NODE) {
4569	MVT SExtVT;
4570	bool UseLASX = false;
4571	bool PropagateSExt = false;
4572	switch (SrcVT.getSimpleVT().SimpleTy) {
4573	default:
4574	return SDValue ();
4575	case MVT::v2i1:
4576	SExtVT = MVT::v2i64;
4577	break;
4578	case MVT::v4i1:
4579	SExtVT = MVT::v4i32;
4580	if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: `256`, Depth: `0`)) {
4581	SExtVT = MVT::v4i64;
4582	UseLASX = true;
4583	PropagateSExt = true;
4584	}
4585	break;
4586	case MVT::v8i1:
4587	SExtVT = MVT::v8i16;
4588	if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: `256`, Depth: `0`)) {
4589	SExtVT = MVT::v8i32;
4590	UseLASX = true;
4591	PropagateSExt = true;
4592	}
4593	break;
4594	case MVT::v16i1:
4595	SExtVT = MVT::v16i8;
4596	if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: `256`, Depth: `0`)) {
4597	SExtVT = MVT::v16i16;
4598	UseLASX = true;
4599	PropagateSExt = true;
4600	}
4601	break;
4602	case MVT::v32i1:
4603	SExtVT = MVT::v32i8;
4604	UseLASX = true;
4605	break;
4606	};
4607	if (UseLASX && !Subtarget.has32S() && !Subtarget.hasExtLASX())
4608	return SDValue ();
4609	Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
4610	: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
4611	Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4612	} else {
4613	Src = Src.getOperand(i: `0`);
4614	}
4615
4616	SDValue V = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: Src);
4617	EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
4618	V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
4619	return DAG.getBitcast(VT, V);
4620	}
4621
4622	static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
4623	TargetLowering::DAGCombinerInfo &DCI,
4624	const LoongArchSubtarget &Subtarget) {
4625	MVT GRLenVT = Subtarget.getGRLenVT();
4626	EVT ValTy = N->getValueType(ResNo: `0`);
4627	SDValue N0 = N->getOperand(Num: `0`), N1 = N->getOperand(Num: `1`);
4628	ConstantSDNode CN0, CN1;
4629	SDLoc DL(N);
4630	unsigned ValBits = ValTy.getSizeInBits();
4631	unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
4632	unsigned Shamt;
4633	bool SwapAndRetried = false;
4634
4635	// BSTRPICK requires the 32S feature.
4636	if (!Subtarget.has32S())
4637	return SDValue ();
4638
4639	if (DCI.isBeforeLegalizeOps())
4640	return SDValue ();
4641
4642	if (ValBits != `32` && ValBits != `64`)
4643	return SDValue ();
4644
4645	Retry:
4646	// 1st pattern to match BSTRINS:
4647	// R = or (and X, mask0), (and (shl Y, lsb), mask1)
4648	// where mask1 = (2size - 1) << lsb, mask0 = ~mask1
4649	// =>
4650	// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4651	if (N0.getOpcode() == ISD::AND &&
4652	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
4653	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4654	N1.getOpcode() == ISD::AND && N1.getOperand(i: `0`).getOpcode() == ISD::SHL &&
4655	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
4656	isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
4657	MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
4658	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
4659	(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4660	(MaskIdx0 + MaskLen0 <= ValBits)) {
4661	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
4662	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
4663	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
4664	N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - `1`), DL, VT: GRLenVT),
4665	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4666	}
4667
4668	// 2nd pattern to match BSTRINS:
4669	// R = or (and X, mask0), (shl (and Y, mask1), lsb)
4670	// where mask1 = (2size - 1), mask0 = ~(mask1 << lsb)
4671	// =>
4672	// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4673	if (N0.getOpcode() == ISD::AND &&
4674	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
4675	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4676	N1.getOpcode() == ISD::SHL && N1.getOperand(i: `0`).getOpcode() == ISD::AND &&
4677	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
4678	(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4679	(CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
4680	isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
4681	MaskLen0 == MaskLen1 && MaskIdx1 == `0` &&
4682	(MaskIdx0 + MaskLen0 <= ValBits)) {
4683	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
4684	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
4685	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
4686	N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - `1`), DL, VT: GRLenVT),
4687	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4688	}
4689
4690	// 3rd pattern to match BSTRINS:
4691	// R = or (and X, mask0), (and Y, mask1)
4692	// where ~mask0 = (2size - 1) << lsb, mask0 & mask1 = 0
4693	// =>
4694	// R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
4695	// where msb = lsb + size - 1
4696	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4697	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
4698	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4699	(MaskIdx0 + MaskLen0 <= `64`) &&
4700	(CN1 = dyn_cast<ConstantSDNode>(Val: N1 ->getOperand(Num: `1`))) &&
4701	(CN1->getSExtValue() & CN0->getSExtValue()) == `0`) {
4702	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
4703	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
4704	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1 ->getValueType(ResNo: `0`), N1,
4705	N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
4706	N3: DAG.getConstant(Val: ValBits == `32`
4707	? (MaskIdx0 + (MaskLen0 & `31`) - `1`)
4708	: (MaskIdx0 + MaskLen0 - `1`),
4709	DL, VT: GRLenVT),
4710	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4711	}
4712
4713	// 4th pattern to match BSTRINS:
4714	// R = or (and X, mask), (shl Y, shamt)
4715	// where mask = (2shamt - 1)
4716	// =>
4717	// R = BSTRINS X, Y, ValBits - 1, shamt
4718	// where ValBits = 32 or 64
4719	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
4720	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
4721	isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4722	MaskIdx0 == `0` && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
4723	(Shamt = CN1->getZExtValue()) == MaskLen0 &&
4724	(MaskIdx0 + MaskLen0 <= ValBits)) {
4725	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
4726	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
4727	N2: N1.getOperand(i: `0`),
4728	N3: DAG.getConstant(Val: (ValBits - `1`), DL, VT: GRLenVT),
4729	N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
4730	}
4731
4732	// 5th pattern to match BSTRINS:
4733	// R = or (and X, mask), const
4734	// where ~mask = (2size - 1) << lsb, mask & const = 0
4735	// =>
4736	// R = BSTRINS X, (const >> lsb), msb, lsb
4737	// where msb = lsb + size - 1
4738	if (N0.getOpcode() == ISD::AND &&
4739	(CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) &&
4740	isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4741	(CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
4742	(CN1->getSExtValue() & CN0->getSExtValue()) == `0`) {
4743	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
4744	return DAG.getNode(
4745	Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: `0`),
4746	N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
4747	N3: DAG.getConstant(Val: ValBits == `32` ? (MaskIdx0 + (MaskLen0 & `31`) - `1`)
4748	: (MaskIdx0 + MaskLen0 - `1`),
4749	DL, VT: GRLenVT),
4750	N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4751	}
4752
4753	// 6th pattern.
4754	// a = b \| ((c & mask) << shamt), where all positions in b to be overwritten
4755	// by the incoming bits are known to be zero.
4756	// =>
4757	// a = BSTRINS b, c, shamt + MaskLen - 1, shamt
4758	//
4759	// Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
4760	// pattern is more common than the 1st. So we put the 1st before the 6th in
4761	// order to match as many nodes as possible.
4762	ConstantSDNode CNMask, CNShamt;
4763	unsigned MaskIdx, MaskLen;
4764	if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: `0`).getOpcode() == ISD::AND &&
4765	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
4766	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4767	MaskIdx == `0` && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
4768	CNShamt->getZExtValue() + MaskLen <= ValBits) {
4769	Shamt = CNShamt->getZExtValue();
4770	APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
4771	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
4772	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
4773	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
4774	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
4775	N3: DAG.getConstant(Val: Shamt + MaskLen - `1`, DL, VT: GRLenVT),
4776	N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
4777	}
4778	}
4779
4780	// 7th pattern.
4781	// a = b \| ((c << shamt) & shifted_mask), where all positions in b to be
4782	// overwritten by the incoming bits are known to be zero.
4783	// =>
4784	// a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
4785	//
4786	// Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
4787	// before the 7th in order to match as many nodes as possible.
4788	if (N1.getOpcode() == ISD::AND &&
4789	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
4790	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4791	N1.getOperand(i: `0`).getOpcode() == ISD::SHL &&
4792	(CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `0`).getOperand(i: `1`))) &&
4793	CNShamt->getZExtValue() == MaskIdx) {
4794	APInt ShMask(ValBits, CNMask->getZExtValue());
4795	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
4796	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
4797	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
4798	N2: N1.getOperand(i: `0`).getOperand(i: `0`),
4799	N3: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
4800	N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
4801	}
4802	}
4803
4804	// (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
4805	if (!SwapAndRetried) {
4806	std::swap(a&: N0, b&: N1);
4807	SwapAndRetried = true;
4808	goto Retry;
4809	}
4810
4811	SwapAndRetried = false;
4812	Retry2:
4813	// 8th pattern.
4814	// a = b \| (c & shifted_mask), where all positions in b to be overwritten by
4815	// the incoming bits are known to be zero.
4816	// =>
4817	// a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
4818	//
4819	// Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
4820	// we put it here in order to match as many nodes as possible or generate less
4821	// instructions.
4822	if (N1.getOpcode() == ISD::AND &&
4823	(CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: `1`))) &&
4824	isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
4825	APInt ShMask(ValBits, CNMask->getZExtValue());
4826	if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
4827	LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
4828	return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
4829	N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1 ->getValueType(ResNo: `0`),
4830	N1: N1 ->getOperand(Num: `0`),
4831	N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
4832	N3: DAG.getConstant(Val: MaskIdx + MaskLen - `1`, DL, VT: GRLenVT),
4833	N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
4834	}
4835	}
4836	// Swap N0/N1 and retry.
4837	if (!SwapAndRetried) {
4838	std::swap(a&: N0, b&: N1);
4839	SwapAndRetried = true;
4840	goto Retry2;
4841	}
4842
4843	return SDValue ();
4844	}
4845
4846	static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
4847	ExtType = ISD::NON_EXTLOAD;
4848
4849	switch (V.getNode()->getOpcode()) {
4850	case ISD::LOAD: {
4851	LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
4852	if ((LoadNode->getMemoryVT() == MVT::i8) \|\|
4853	(LoadNode->getMemoryVT() == MVT::i16)) {
4854	ExtType = LoadNode->getExtensionType();
4855	return true;
4856	}
4857	return false;
4858	}
4859	case ISD::AssertSext: {
4860	VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: `1`));
4861	if ((TypeNode->getVT() == MVT::i8) \|\| (TypeNode->getVT() == MVT::i16)) {
4862	ExtType = ISD::SEXTLOAD;
4863	return true;
4864	}
4865	return false;
4866	}
4867	case ISD::AssertZext: {
4868	VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: `1`));
4869	if ((TypeNode->getVT() == MVT::i8) \|\| (TypeNode->getVT() == MVT::i16)) {
4870	ExtType = ISD::ZEXTLOAD;
4871	return true;
4872	}
4873	return false;
4874	}
4875	default:
4876	return false;
4877	}
4878
4879	return false;
4880	}
4881
4882	// Eliminate redundant truncation and zero-extension nodes.
4883	// Case 1:*
4884	// +------------+ +------------+ +------------+
4885	// \| Input1 \| \| Input2 \| \| CC \|
4886	// +------------+ +------------+ +------------+
4887	// \| \| \|
4888	// V V +----+
4889	// +------------+ +------------+ \|
4890	// \| TRUNCATE \| \| TRUNCATE \| \|
4891	// +------------+ +------------+ \|
4892	// \| \| \|
4893	// V V \|
4894	// +------------+ +------------+ \|
4895	// \| ZERO_EXT \| \| ZERO_EXT \| \|
4896	// +------------+ +------------+ \|
4897	// \| \| \|
4898	// \| +-------------+ \|
4899	// V V \| \|
4900	// +----------------+ \| \|
4901	// \| AND \| \| \|
4902	// +----------------+ \| \|
4903	// \| \| \|
4904	// +---------------+ \| \|
4905	// \| \| \|
4906	// V V V
4907	// +-------------+
4908	// \| CMP \|
4909	// +-------------+
4910	// Case 2:*
4911	// +------------+ +------------+ +-------------+ +------------+ +------------+
4912	// \| Input1 \| \| Input2 \| \| Constant -1 \| \| Constant 0 \| \| CC \|
4913	// +------------+ +------------+ +-------------+ +------------+ +------------+
4914	// \| \| \| \| \|
4915	// V \| \| \| \|
4916	// +------------+ \| \| \| \|
4917	// \| XOR \|<---------------------+ \| \|
4918	// +------------+ \| \| \|
4919	// \| \| \| \|
4920	// V V +---------------+ \|
4921	// +------------+ +------------+ \| \|
4922	// \| TRUNCATE \| \| TRUNCATE \| \| +-------------------------+
4923	// +------------+ +------------+ \| \|
4924	// \| \| \| \|
4925	// V V \| \|
4926	// +------------+ +------------+ \| \|
4927	// \| ZERO_EXT \| \| ZERO_EXT \| \| \|
4928	// +------------+ +------------+ \| \|
4929	// \| \| \| \|
4930	// V V \| \|
4931	// +----------------+ \| \|
4932	// \| AND \| \| \|
4933	// +----------------+ \| \|
4934	// \| \| \|
4935	// +---------------+ \| \|
4936	// \| \| \|
4937	// V V V
4938	// +-------------+
4939	// \| CMP \|
4940	// +-------------+
4941	static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
4942	TargetLowering::DAGCombinerInfo &DCI,
4943	const LoongArchSubtarget &Subtarget) {
4944	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
4945
4946	SDNode *AndNode = N->getOperand(Num: `0`).getNode();
4947	if (AndNode->getOpcode() != ISD::AND)
4948	return SDValue ();
4949
4950	SDValue AndInputValue2 = AndNode->getOperand(Num: `1`);
4951	if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
4952	return SDValue ();
4953
4954	SDValue CmpInputValue = N->getOperand(Num: `1`);
4955	SDValue AndInputValue1 = AndNode->getOperand(Num: `0`);
4956	if (AndInputValue1.getOpcode() == ISD::XOR) {
4957	if (CC != ISD::SETEQ && CC != ISD::SETNE)
4958	return SDValue ();
4959	ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: `1`));
4960	if (!CN \|\| CN->getSExtValue() != -`1`)
4961	return SDValue ();
4962	CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
4963	if (!CN \|\| CN->getSExtValue() != `0`)
4964	return SDValue ();
4965	AndInputValue1 = AndInputValue1.getOperand(i: `0`);
4966	if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
4967	return SDValue ();
4968	} else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
4969	if (AndInputValue2 != CmpInputValue)
4970	return SDValue ();
4971	} else {
4972	return SDValue ();
4973	}
4974
4975	SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: `0`);
4976	if (TruncValue1.getOpcode() != ISD::TRUNCATE)
4977	return SDValue ();
4978
4979	SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: `0`);
4980	if (TruncValue2.getOpcode() != ISD::TRUNCATE)
4981	return SDValue ();
4982
4983	SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: `0`);
4984	SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: `0`);
4985	ISD::LoadExtType ExtType1;
4986	ISD::LoadExtType ExtType2;
4987
4988	if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) \|\|
4989	!checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
4990	return SDValue ();
4991
4992	if (TruncInputValue1 ->getValueType(ResNo: `0`) != TruncInputValue2 ->getValueType(ResNo: `0`) \|\|
4993	AndNode->getValueType(ResNo: `0`) != TruncInputValue1 ->getValueType(ResNo: `0`))
4994	return SDValue ();
4995
4996	if ((ExtType2 != ISD::ZEXTLOAD) &&
4997	((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
4998	return SDValue ();
4999
5000	// These truncation and zero-extension nodes are not necessary, remove them.
5001	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N), VT: AndNode->getValueType(ResNo: `0`),
5002	N1: TruncInputValue1, N2: TruncInputValue2);
5003	SDValue NewSetCC =
5004	DAG.getSetCC(DL: SDLoc (N), VT: N->getValueType(ResNo: `0`), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
5005	DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
5006	return SDValue (N, `0`);
5007	}
5008
5009	// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5010	static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5011	TargetLowering::DAGCombinerInfo &DCI,
5012	const LoongArchSubtarget &Subtarget) {
5013	if (DCI.isBeforeLegalizeOps())
5014	return SDValue ();
5015
5016	SDValue Src = N->getOperand(Num: `0`);
5017	if (Src.getOpcode() != LoongArchISD::REVB_2W)
5018	return SDValue ();
5019
5020	return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
5021	Operand: Src.getOperand(i: `0`));
5022	}
5023
5024	template <unsigned N>
5025	static SDValue legalizeIntrinsicImmArg(SDNode Node, unsigned* ImmOp,
5026	SelectionDAG &DAG,
5027	const LoongArchSubtarget &Subtarget,
5028	bool IsSigned = false) {
5029	SDLoc DL(Node);
5030	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
5031	// Check the ImmArg.
5032	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
5033	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5034	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
5035	": argument out of range.");
5036	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
5037	}
5038	return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
5039	}
5040
5041	template <unsigned N>
5042	static SDValue lowerVectorSplatImm(SDNode Node, unsigned* ImmOp,
5043	SelectionDAG &DAG, bool IsSigned = false) {
5044	SDLoc DL(Node);
5045	EVT ResTy = Node->getValueType(ResNo: `0`);
5046	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
5047
5048	// Check the ImmArg.
5049	if ((IsSigned && !isInt<N>(CImm->getSExtValue())) \|\|
5050	(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5051	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
5052	": argument out of range.");
5053	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5054	}
5055	return DAG.getConstant(
5056	Val: APInt (ResTy.getScalarType().getSizeInBits(),
5057	IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5058	DL, VT: ResTy);
5059	}
5060
5061	static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
5062	SDLoc DL(Node);
5063	EVT ResTy = Node->getValueType(ResNo: `0`);
5064	SDValue Vec = Node->getOperand(Num: `2`);
5065	SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - `1`, DL, VT: ResTy);
5066	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
5067	}
5068
5069	static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
5070	SDLoc DL(Node);
5071	EVT ResTy = Node->getValueType(ResNo: `0`);
5072	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ResTy);
5073	SDValue Bit =
5074	DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
5075
5076	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: `1`),
5077	N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
5078	}
5079
5080	template <unsigned N>
5081	static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
5082	SDLoc DL(Node);
5083	EVT ResTy = Node->getValueType(ResNo: `0`);
5084	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
5085	// Check the unsigned ImmArg.
5086	if (!isUInt<N>(CImm->getZExtValue())) {
5087	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
5088	": argument out of range.");
5089	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5090	}
5091
5092	APInt BitImm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
5093	SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
5094
5095	return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: Mask);
5096	}
5097
5098	template <unsigned N>
5099	static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
5100	SDLoc DL(Node);
5101	EVT ResTy = Node->getValueType(ResNo: `0`);
5102	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
5103	// Check the unsigned ImmArg.
5104	if (!isUInt<N>(CImm->getZExtValue())) {
5105	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
5106	": argument out of range.");
5107	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5108	}
5109
5110	APInt Imm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
5111	SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
5112	return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: BitImm);
5113	}
5114
5115	template <unsigned N>
5116	static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
5117	SDLoc DL(Node);
5118	EVT ResTy = Node->getValueType(ResNo: `0`);
5119	auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`));
5120	// Check the unsigned ImmArg.
5121	if (!isUInt<N>(CImm->getZExtValue())) {
5122	DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: `0`) +
5123	": argument out of range.");
5124	return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5125	}
5126
5127	APInt Imm = APInt (ResTy.getScalarSizeInBits(), `1`) << CImm->getAPIntValue();
5128	SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
5129	return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: `1`), N2: BitImm);
5130	}
5131
5132	static SDValue
5133	performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
5134	TargetLowering::DAGCombinerInfo &DCI,
5135	const LoongArchSubtarget &Subtarget) {
5136	SDLoc DL(N);
5137	switch (N->getConstantOperandVal(Num: `0`)) {
5138	default:
5139	break;
5140	case Intrinsic::loongarch_lsx_vadd_b:
5141	case Intrinsic::loongarch_lsx_vadd_h:
5142	case Intrinsic::loongarch_lsx_vadd_w:
5143	case Intrinsic::loongarch_lsx_vadd_d:
5144	case Intrinsic::loongarch_lasx_xvadd_b:
5145	case Intrinsic::loongarch_lasx_xvadd_h:
5146	case Intrinsic::loongarch_lasx_xvadd_w:
5147	case Intrinsic::loongarch_lasx_xvadd_d:
5148	return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5149	N2: N->getOperand(Num: `2`));
5150	case Intrinsic::loongarch_lsx_vaddi_bu:
5151	case Intrinsic::loongarch_lsx_vaddi_hu:
5152	case Intrinsic::loongarch_lsx_vaddi_wu:
5153	case Intrinsic::loongarch_lsx_vaddi_du:
5154	case Intrinsic::loongarch_lasx_xvaddi_bu:
5155	case Intrinsic::loongarch_lasx_xvaddi_hu:
5156	case Intrinsic::loongarch_lasx_xvaddi_wu:
5157	case Intrinsic::loongarch_lasx_xvaddi_du:
5158	return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5159	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5160	case Intrinsic::loongarch_lsx_vsub_b:
5161	case Intrinsic::loongarch_lsx_vsub_h:
5162	case Intrinsic::loongarch_lsx_vsub_w:
5163	case Intrinsic::loongarch_lsx_vsub_d:
5164	case Intrinsic::loongarch_lasx_xvsub_b:
5165	case Intrinsic::loongarch_lasx_xvsub_h:
5166	case Intrinsic::loongarch_lasx_xvsub_w:
5167	case Intrinsic::loongarch_lasx_xvsub_d:
5168	return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5169	N2: N->getOperand(Num: `2`));
5170	case Intrinsic::loongarch_lsx_vsubi_bu:
5171	case Intrinsic::loongarch_lsx_vsubi_hu:
5172	case Intrinsic::loongarch_lsx_vsubi_wu:
5173	case Intrinsic::loongarch_lsx_vsubi_du:
5174	case Intrinsic::loongarch_lasx_xvsubi_bu:
5175	case Intrinsic::loongarch_lasx_xvsubi_hu:
5176	case Intrinsic::loongarch_lasx_xvsubi_wu:
5177	case Intrinsic::loongarch_lasx_xvsubi_du:
5178	return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5179	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5180	case Intrinsic::loongarch_lsx_vneg_b:
5181	case Intrinsic::loongarch_lsx_vneg_h:
5182	case Intrinsic::loongarch_lsx_vneg_w:
5183	case Intrinsic::loongarch_lsx_vneg_d:
5184	case Intrinsic::loongarch_lasx_xvneg_b:
5185	case Intrinsic::loongarch_lasx_xvneg_h:
5186	case Intrinsic::loongarch_lasx_xvneg_w:
5187	case Intrinsic::loongarch_lasx_xvneg_d:
5188	return DAG.getNode(
5189	Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`),
5190	N1: DAG.getConstant(
5191	Val: APInt (N->getValueType(ResNo: `0`).getScalarType().getSizeInBits(), `0`,
5192	/isSigned=/true),
5193	DL: SDLoc (N), VT: N->getValueType(ResNo: `0`)),
5194	N2: N->getOperand(Num: `1`));
5195	case Intrinsic::loongarch_lsx_vmax_b:
5196	case Intrinsic::loongarch_lsx_vmax_h:
5197	case Intrinsic::loongarch_lsx_vmax_w:
5198	case Intrinsic::loongarch_lsx_vmax_d:
5199	case Intrinsic::loongarch_lasx_xvmax_b:
5200	case Intrinsic::loongarch_lasx_xvmax_h:
5201	case Intrinsic::loongarch_lasx_xvmax_w:
5202	case Intrinsic::loongarch_lasx_xvmax_d:
5203	return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5204	N2: N->getOperand(Num: `2`));
5205	case Intrinsic::loongarch_lsx_vmax_bu:
5206	case Intrinsic::loongarch_lsx_vmax_hu:
5207	case Intrinsic::loongarch_lsx_vmax_wu:
5208	case Intrinsic::loongarch_lsx_vmax_du:
5209	case Intrinsic::loongarch_lasx_xvmax_bu:
5210	case Intrinsic::loongarch_lasx_xvmax_hu:
5211	case Intrinsic::loongarch_lasx_xvmax_wu:
5212	case Intrinsic::loongarch_lasx_xvmax_du:
5213	return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5214	N2: N->getOperand(Num: `2`));
5215	case Intrinsic::loongarch_lsx_vmaxi_b:
5216	case Intrinsic::loongarch_lsx_vmaxi_h:
5217	case Intrinsic::loongarch_lsx_vmaxi_w:
5218	case Intrinsic::loongarch_lsx_vmaxi_d:
5219	case Intrinsic::loongarch_lasx_xvmaxi_b:
5220	case Intrinsic::loongarch_lasx_xvmaxi_h:
5221	case Intrinsic::loongarch_lasx_xvmaxi_w:
5222	case Intrinsic::loongarch_lasx_xvmaxi_d:
5223	return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5224	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG, /IsSigned=/true));
5225	case Intrinsic::loongarch_lsx_vmaxi_bu:
5226	case Intrinsic::loongarch_lsx_vmaxi_hu:
5227	case Intrinsic::loongarch_lsx_vmaxi_wu:
5228	case Intrinsic::loongarch_lsx_vmaxi_du:
5229	case Intrinsic::loongarch_lasx_xvmaxi_bu:
5230	case Intrinsic::loongarch_lasx_xvmaxi_hu:
5231	case Intrinsic::loongarch_lasx_xvmaxi_wu:
5232	case Intrinsic::loongarch_lasx_xvmaxi_du:
5233	return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5234	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5235	case Intrinsic::loongarch_lsx_vmin_b:
5236	case Intrinsic::loongarch_lsx_vmin_h:
5237	case Intrinsic::loongarch_lsx_vmin_w:
5238	case Intrinsic::loongarch_lsx_vmin_d:
5239	case Intrinsic::loongarch_lasx_xvmin_b:
5240	case Intrinsic::loongarch_lasx_xvmin_h:
5241	case Intrinsic::loongarch_lasx_xvmin_w:
5242	case Intrinsic::loongarch_lasx_xvmin_d:
5243	return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5244	N2: N->getOperand(Num: `2`));
5245	case Intrinsic::loongarch_lsx_vmin_bu:
5246	case Intrinsic::loongarch_lsx_vmin_hu:
5247	case Intrinsic::loongarch_lsx_vmin_wu:
5248	case Intrinsic::loongarch_lsx_vmin_du:
5249	case Intrinsic::loongarch_lasx_xvmin_bu:
5250	case Intrinsic::loongarch_lasx_xvmin_hu:
5251	case Intrinsic::loongarch_lasx_xvmin_wu:
5252	case Intrinsic::loongarch_lasx_xvmin_du:
5253	return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5254	N2: N->getOperand(Num: `2`));
5255	case Intrinsic::loongarch_lsx_vmini_b:
5256	case Intrinsic::loongarch_lsx_vmini_h:
5257	case Intrinsic::loongarch_lsx_vmini_w:
5258	case Intrinsic::loongarch_lsx_vmini_d:
5259	case Intrinsic::loongarch_lasx_xvmini_b:
5260	case Intrinsic::loongarch_lasx_xvmini_h:
5261	case Intrinsic::loongarch_lasx_xvmini_w:
5262	case Intrinsic::loongarch_lasx_xvmini_d:
5263	return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5264	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG, /IsSigned=/true));
5265	case Intrinsic::loongarch_lsx_vmini_bu:
5266	case Intrinsic::loongarch_lsx_vmini_hu:
5267	case Intrinsic::loongarch_lsx_vmini_wu:
5268	case Intrinsic::loongarch_lsx_vmini_du:
5269	case Intrinsic::loongarch_lasx_xvmini_bu:
5270	case Intrinsic::loongarch_lasx_xvmini_hu:
5271	case Intrinsic::loongarch_lasx_xvmini_wu:
5272	case Intrinsic::loongarch_lasx_xvmini_du:
5273	return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5274	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5275	case Intrinsic::loongarch_lsx_vmul_b:
5276	case Intrinsic::loongarch_lsx_vmul_h:
5277	case Intrinsic::loongarch_lsx_vmul_w:
5278	case Intrinsic::loongarch_lsx_vmul_d:
5279	case Intrinsic::loongarch_lasx_xvmul_b:
5280	case Intrinsic::loongarch_lasx_xvmul_h:
5281	case Intrinsic::loongarch_lasx_xvmul_w:
5282	case Intrinsic::loongarch_lasx_xvmul_d:
5283	return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5284	N2: N->getOperand(Num: `2`));
5285	case Intrinsic::loongarch_lsx_vmadd_b:
5286	case Intrinsic::loongarch_lsx_vmadd_h:
5287	case Intrinsic::loongarch_lsx_vmadd_w:
5288	case Intrinsic::loongarch_lsx_vmadd_d:
5289	case Intrinsic::loongarch_lasx_xvmadd_b:
5290	case Intrinsic::loongarch_lasx_xvmadd_h:
5291	case Intrinsic::loongarch_lasx_xvmadd_w:
5292	case Intrinsic::loongarch_lasx_xvmadd_d: {
5293	EVT ResTy = N->getValueType(ResNo: `0`);
5294	return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `1`),
5295	N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `2`),
5296	N2: N->getOperand(Num: `3`)));
5297	}
5298	case Intrinsic::loongarch_lsx_vmsub_b:
5299	case Intrinsic::loongarch_lsx_vmsub_h:
5300	case Intrinsic::loongarch_lsx_vmsub_w:
5301	case Intrinsic::loongarch_lsx_vmsub_d:
5302	case Intrinsic::loongarch_lasx_xvmsub_b:
5303	case Intrinsic::loongarch_lasx_xvmsub_h:
5304	case Intrinsic::loongarch_lasx_xvmsub_w:
5305	case Intrinsic::loongarch_lasx_xvmsub_d: {
5306	EVT ResTy = N->getValueType(ResNo: `0`);
5307	return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `1`),
5308	N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc (N), VT: ResTy, N1: N->getOperand(Num: `2`),
5309	N2: N->getOperand(Num: `3`)));
5310	}
5311	case Intrinsic::loongarch_lsx_vdiv_b:
5312	case Intrinsic::loongarch_lsx_vdiv_h:
5313	case Intrinsic::loongarch_lsx_vdiv_w:
5314	case Intrinsic::loongarch_lsx_vdiv_d:
5315	case Intrinsic::loongarch_lasx_xvdiv_b:
5316	case Intrinsic::loongarch_lasx_xvdiv_h:
5317	case Intrinsic::loongarch_lasx_xvdiv_w:
5318	case Intrinsic::loongarch_lasx_xvdiv_d:
5319	return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5320	N2: N->getOperand(Num: `2`));
5321	case Intrinsic::loongarch_lsx_vdiv_bu:
5322	case Intrinsic::loongarch_lsx_vdiv_hu:
5323	case Intrinsic::loongarch_lsx_vdiv_wu:
5324	case Intrinsic::loongarch_lsx_vdiv_du:
5325	case Intrinsic::loongarch_lasx_xvdiv_bu:
5326	case Intrinsic::loongarch_lasx_xvdiv_hu:
5327	case Intrinsic::loongarch_lasx_xvdiv_wu:
5328	case Intrinsic::loongarch_lasx_xvdiv_du:
5329	return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5330	N2: N->getOperand(Num: `2`));
5331	case Intrinsic::loongarch_lsx_vmod_b:
5332	case Intrinsic::loongarch_lsx_vmod_h:
5333	case Intrinsic::loongarch_lsx_vmod_w:
5334	case Intrinsic::loongarch_lsx_vmod_d:
5335	case Intrinsic::loongarch_lasx_xvmod_b:
5336	case Intrinsic::loongarch_lasx_xvmod_h:
5337	case Intrinsic::loongarch_lasx_xvmod_w:
5338	case Intrinsic::loongarch_lasx_xvmod_d:
5339	return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5340	N2: N->getOperand(Num: `2`));
5341	case Intrinsic::loongarch_lsx_vmod_bu:
5342	case Intrinsic::loongarch_lsx_vmod_hu:
5343	case Intrinsic::loongarch_lsx_vmod_wu:
5344	case Intrinsic::loongarch_lsx_vmod_du:
5345	case Intrinsic::loongarch_lasx_xvmod_bu:
5346	case Intrinsic::loongarch_lasx_xvmod_hu:
5347	case Intrinsic::loongarch_lasx_xvmod_wu:
5348	case Intrinsic::loongarch_lasx_xvmod_du:
5349	return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5350	N2: N->getOperand(Num: `2`));
5351	case Intrinsic::loongarch_lsx_vand_v:
5352	case Intrinsic::loongarch_lasx_xvand_v:
5353	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5354	N2: N->getOperand(Num: `2`));
5355	case Intrinsic::loongarch_lsx_vor_v:
5356	case Intrinsic::loongarch_lasx_xvor_v:
5357	return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5358	N2: N->getOperand(Num: `2`));
5359	case Intrinsic::loongarch_lsx_vxor_v:
5360	case Intrinsic::loongarch_lasx_xvxor_v:
5361	return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5362	N2: N->getOperand(Num: `2`));
5363	case Intrinsic::loongarch_lsx_vnor_v:
5364	case Intrinsic::loongarch_lasx_xvnor_v: {
5365	SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5366	N2: N->getOperand(Num: `2`));
5367	return DAG.getNOT(DL, Val: Res, VT: Res ->getValueType(ResNo: `0`));
5368	}
5369	case Intrinsic::loongarch_lsx_vandi_b:
5370	case Intrinsic::loongarch_lasx_xvandi_b:
5371	return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5372	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
5373	case Intrinsic::loongarch_lsx_vori_b:
5374	case Intrinsic::loongarch_lasx_xvori_b:
5375	return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5376	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
5377	case Intrinsic::loongarch_lsx_vxori_b:
5378	case Intrinsic::loongarch_lasx_xvxori_b:
5379	return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5380	N2: lowerVectorSplatImm<`8`>(Node: N, ImmOp: `2`, DAG));
5381	case Intrinsic::loongarch_lsx_vsll_b:
5382	case Intrinsic::loongarch_lsx_vsll_h:
5383	case Intrinsic::loongarch_lsx_vsll_w:
5384	case Intrinsic::loongarch_lsx_vsll_d:
5385	case Intrinsic::loongarch_lasx_xvsll_b:
5386	case Intrinsic::loongarch_lasx_xvsll_h:
5387	case Intrinsic::loongarch_lasx_xvsll_w:
5388	case Intrinsic::loongarch_lasx_xvsll_d:
5389	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5390	N2: truncateVecElts(Node: N, DAG));
5391	case Intrinsic::loongarch_lsx_vslli_b:
5392	case Intrinsic::loongarch_lasx_xvslli_b:
5393	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5394	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
5395	case Intrinsic::loongarch_lsx_vslli_h:
5396	case Intrinsic::loongarch_lasx_xvslli_h:
5397	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5398	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
5399	case Intrinsic::loongarch_lsx_vslli_w:
5400	case Intrinsic::loongarch_lasx_xvslli_w:
5401	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5402	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5403	case Intrinsic::loongarch_lsx_vslli_d:
5404	case Intrinsic::loongarch_lasx_xvslli_d:
5405	return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5406	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
5407	case Intrinsic::loongarch_lsx_vsrl_b:
5408	case Intrinsic::loongarch_lsx_vsrl_h:
5409	case Intrinsic::loongarch_lsx_vsrl_w:
5410	case Intrinsic::loongarch_lsx_vsrl_d:
5411	case Intrinsic::loongarch_lasx_xvsrl_b:
5412	case Intrinsic::loongarch_lasx_xvsrl_h:
5413	case Intrinsic::loongarch_lasx_xvsrl_w:
5414	case Intrinsic::loongarch_lasx_xvsrl_d:
5415	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5416	N2: truncateVecElts(Node: N, DAG));
5417	case Intrinsic::loongarch_lsx_vsrli_b:
5418	case Intrinsic::loongarch_lasx_xvsrli_b:
5419	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5420	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
5421	case Intrinsic::loongarch_lsx_vsrli_h:
5422	case Intrinsic::loongarch_lasx_xvsrli_h:
5423	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5424	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
5425	case Intrinsic::loongarch_lsx_vsrli_w:
5426	case Intrinsic::loongarch_lasx_xvsrli_w:
5427	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5428	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5429	case Intrinsic::loongarch_lsx_vsrli_d:
5430	case Intrinsic::loongarch_lasx_xvsrli_d:
5431	return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5432	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
5433	case Intrinsic::loongarch_lsx_vsra_b:
5434	case Intrinsic::loongarch_lsx_vsra_h:
5435	case Intrinsic::loongarch_lsx_vsra_w:
5436	case Intrinsic::loongarch_lsx_vsra_d:
5437	case Intrinsic::loongarch_lasx_xvsra_b:
5438	case Intrinsic::loongarch_lasx_xvsra_h:
5439	case Intrinsic::loongarch_lasx_xvsra_w:
5440	case Intrinsic::loongarch_lasx_xvsra_d:
5441	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5442	N2: truncateVecElts(Node: N, DAG));
5443	case Intrinsic::loongarch_lsx_vsrai_b:
5444	case Intrinsic::loongarch_lasx_xvsrai_b:
5445	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5446	N2: lowerVectorSplatImm<`3`>(Node: N, ImmOp: `2`, DAG));
5447	case Intrinsic::loongarch_lsx_vsrai_h:
5448	case Intrinsic::loongarch_lasx_xvsrai_h:
5449	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5450	N2: lowerVectorSplatImm<`4`>(Node: N, ImmOp: `2`, DAG));
5451	case Intrinsic::loongarch_lsx_vsrai_w:
5452	case Intrinsic::loongarch_lasx_xvsrai_w:
5453	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5454	N2: lowerVectorSplatImm<`5`>(Node: N, ImmOp: `2`, DAG));
5455	case Intrinsic::loongarch_lsx_vsrai_d:
5456	case Intrinsic::loongarch_lasx_xvsrai_d:
5457	return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5458	N2: lowerVectorSplatImm<`6`>(Node: N, ImmOp: `2`, DAG));
5459	case Intrinsic::loongarch_lsx_vclz_b:
5460	case Intrinsic::loongarch_lsx_vclz_h:
5461	case Intrinsic::loongarch_lsx_vclz_w:
5462	case Intrinsic::loongarch_lsx_vclz_d:
5463	case Intrinsic::loongarch_lasx_xvclz_b:
5464	case Intrinsic::loongarch_lasx_xvclz_h:
5465	case Intrinsic::loongarch_lasx_xvclz_w:
5466	case Intrinsic::loongarch_lasx_xvclz_d:
5467	return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: `0`), Operand: N->getOperand(Num: `1`));
5468	case Intrinsic::loongarch_lsx_vpcnt_b:
5469	case Intrinsic::loongarch_lsx_vpcnt_h:
5470	case Intrinsic::loongarch_lsx_vpcnt_w:
5471	case Intrinsic::loongarch_lsx_vpcnt_d:
5472	case Intrinsic::loongarch_lasx_xvpcnt_b:
5473	case Intrinsic::loongarch_lasx_xvpcnt_h:
5474	case Intrinsic::loongarch_lasx_xvpcnt_w:
5475	case Intrinsic::loongarch_lasx_xvpcnt_d:
5476	return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: `0`), Operand: N->getOperand(Num: `1`));
5477	case Intrinsic::loongarch_lsx_vbitclr_b:
5478	case Intrinsic::loongarch_lsx_vbitclr_h:
5479	case Intrinsic::loongarch_lsx_vbitclr_w:
5480	case Intrinsic::loongarch_lsx_vbitclr_d:
5481	case Intrinsic::loongarch_lasx_xvbitclr_b:
5482	case Intrinsic::loongarch_lasx_xvbitclr_h:
5483	case Intrinsic::loongarch_lasx_xvbitclr_w:
5484	case Intrinsic::loongarch_lasx_xvbitclr_d:
5485	return lowerVectorBitClear(Node: N, DAG);
5486	case Intrinsic::loongarch_lsx_vbitclri_b:
5487	case Intrinsic::loongarch_lasx_xvbitclri_b:
5488	return lowerVectorBitClearImm<`3`>(Node: N, DAG);
5489	case Intrinsic::loongarch_lsx_vbitclri_h:
5490	case Intrinsic::loongarch_lasx_xvbitclri_h:
5491	return lowerVectorBitClearImm<`4`>(Node: N, DAG);
5492	case Intrinsic::loongarch_lsx_vbitclri_w:
5493	case Intrinsic::loongarch_lasx_xvbitclri_w:
5494	return lowerVectorBitClearImm<`5`>(Node: N, DAG);
5495	case Intrinsic::loongarch_lsx_vbitclri_d:
5496	case Intrinsic::loongarch_lasx_xvbitclri_d:
5497	return lowerVectorBitClearImm<`6`>(Node: N, DAG);
5498	case Intrinsic::loongarch_lsx_vbitset_b:
5499	case Intrinsic::loongarch_lsx_vbitset_h:
5500	case Intrinsic::loongarch_lsx_vbitset_w:
5501	case Intrinsic::loongarch_lsx_vbitset_d:
5502	case Intrinsic::loongarch_lasx_xvbitset_b:
5503	case Intrinsic::loongarch_lasx_xvbitset_h:
5504	case Intrinsic::loongarch_lasx_xvbitset_w:
5505	case Intrinsic::loongarch_lasx_xvbitset_d: {
5506	EVT VecTy = N->getValueType(ResNo: `0`);
5507	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VecTy);
5508	return DAG.getNode(
5509	Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: `1`),
5510	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
5511	}
5512	case Intrinsic::loongarch_lsx_vbitseti_b:
5513	case Intrinsic::loongarch_lasx_xvbitseti_b:
5514	return lowerVectorBitSetImm<`3`>(Node: N, DAG);
5515	case Intrinsic::loongarch_lsx_vbitseti_h:
5516	case Intrinsic::loongarch_lasx_xvbitseti_h:
5517	return lowerVectorBitSetImm<`4`>(Node: N, DAG);
5518	case Intrinsic::loongarch_lsx_vbitseti_w:
5519	case Intrinsic::loongarch_lasx_xvbitseti_w:
5520	return lowerVectorBitSetImm<`5`>(Node: N, DAG);
5521	case Intrinsic::loongarch_lsx_vbitseti_d:
5522	case Intrinsic::loongarch_lasx_xvbitseti_d:
5523	return lowerVectorBitSetImm<`6`>(Node: N, DAG);
5524	case Intrinsic::loongarch_lsx_vbitrev_b:
5525	case Intrinsic::loongarch_lsx_vbitrev_h:
5526	case Intrinsic::loongarch_lsx_vbitrev_w:
5527	case Intrinsic::loongarch_lsx_vbitrev_d:
5528	case Intrinsic::loongarch_lasx_xvbitrev_b:
5529	case Intrinsic::loongarch_lasx_xvbitrev_h:
5530	case Intrinsic::loongarch_lasx_xvbitrev_w:
5531	case Intrinsic::loongarch_lasx_xvbitrev_d: {
5532	EVT VecTy = N->getValueType(ResNo: `0`);
5533	SDValue One = DAG.getConstant(Val: `1`, DL, VT: VecTy);
5534	return DAG.getNode(
5535	Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: `1`),
5536	N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
5537	}
5538	case Intrinsic::loongarch_lsx_vbitrevi_b:
5539	case Intrinsic::loongarch_lasx_xvbitrevi_b:
5540	return lowerVectorBitRevImm<`3`>(Node: N, DAG);
5541	case Intrinsic::loongarch_lsx_vbitrevi_h:
5542	case Intrinsic::loongarch_lasx_xvbitrevi_h:
5543	return lowerVectorBitRevImm<`4`>(Node: N, DAG);
5544	case Intrinsic::loongarch_lsx_vbitrevi_w:
5545	case Intrinsic::loongarch_lasx_xvbitrevi_w:
5546	return lowerVectorBitRevImm<`5`>(Node: N, DAG);
5547	case Intrinsic::loongarch_lsx_vbitrevi_d:
5548	case Intrinsic::loongarch_lasx_xvbitrevi_d:
5549	return lowerVectorBitRevImm<`6`>(Node: N, DAG);
5550	case Intrinsic::loongarch_lsx_vfadd_s:
5551	case Intrinsic::loongarch_lsx_vfadd_d:
5552	case Intrinsic::loongarch_lasx_xvfadd_s:
5553	case Intrinsic::loongarch_lasx_xvfadd_d:
5554	return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5555	N2: N->getOperand(Num: `2`));
5556	case Intrinsic::loongarch_lsx_vfsub_s:
5557	case Intrinsic::loongarch_lsx_vfsub_d:
5558	case Intrinsic::loongarch_lasx_xvfsub_s:
5559	case Intrinsic::loongarch_lasx_xvfsub_d:
5560	return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5561	N2: N->getOperand(Num: `2`));
5562	case Intrinsic::loongarch_lsx_vfmul_s:
5563	case Intrinsic::loongarch_lsx_vfmul_d:
5564	case Intrinsic::loongarch_lasx_xvfmul_s:
5565	case Intrinsic::loongarch_lasx_xvfmul_d:
5566	return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5567	N2: N->getOperand(Num: `2`));
5568	case Intrinsic::loongarch_lsx_vfdiv_s:
5569	case Intrinsic::loongarch_lsx_vfdiv_d:
5570	case Intrinsic::loongarch_lasx_xvfdiv_s:
5571	case Intrinsic::loongarch_lasx_xvfdiv_d:
5572	return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5573	N2: N->getOperand(Num: `2`));
5574	case Intrinsic::loongarch_lsx_vfmadd_s:
5575	case Intrinsic::loongarch_lsx_vfmadd_d:
5576	case Intrinsic::loongarch_lasx_xvfmadd_s:
5577	case Intrinsic::loongarch_lasx_xvfmadd_d:
5578	return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `1`),
5579	N2: N->getOperand(Num: `2`), N3: N->getOperand(Num: `3`));
5580	case Intrinsic::loongarch_lsx_vinsgr2vr_b:
5581	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
5582	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
5583	N3: legalizeIntrinsicImmArg<`4`>(Node: N, ImmOp: `3`, DAG, Subtarget));
5584	case Intrinsic::loongarch_lsx_vinsgr2vr_h:
5585	case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
5586	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
5587	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
5588	N3: legalizeIntrinsicImmArg<`3`>(Node: N, ImmOp: `3`, DAG, Subtarget));
5589	case Intrinsic::loongarch_lsx_vinsgr2vr_w:
5590	case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
5591	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
5592	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
5593	N3: legalizeIntrinsicImmArg<`2`>(Node: N, ImmOp: `3`, DAG, Subtarget));
5594	case Intrinsic::loongarch_lsx_vinsgr2vr_d:
5595	return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc (N), VT: N->getValueType(ResNo: `0`),
5596	N1: N->getOperand(Num: `1`), N2: N->getOperand(Num: `2`),
5597	N3: legalizeIntrinsicImmArg<`1`>(Node: N, ImmOp: `3`, DAG, Subtarget));
5598	case Intrinsic::loongarch_lsx_vreplgr2vr_b:
5599	case Intrinsic::loongarch_lsx_vreplgr2vr_h:
5600	case Intrinsic::loongarch_lsx_vreplgr2vr_w:
5601	case Intrinsic::loongarch_lsx_vreplgr2vr_d:
5602	case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
5603	case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
5604	case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
5605	case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
5606	return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: `0`),
5607	Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
5608	Operand: N->getOperand(Num: `1`)));
5609	case Intrinsic::loongarch_lsx_vreplve_b:
5610	case Intrinsic::loongarch_lsx_vreplve_h:
5611	case Intrinsic::loongarch_lsx_vreplve_w:
5612	case Intrinsic::loongarch_lsx_vreplve_d:
5613	case Intrinsic::loongarch_lasx_xvreplve_b:
5614	case Intrinsic::loongarch_lasx_xvreplve_h:
5615	case Intrinsic::loongarch_lasx_xvreplve_w:
5616	case Intrinsic::loongarch_lasx_xvreplve_d:
5617	return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: `0`),
5618	N1: N->getOperand(Num: `1`),
5619	N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
5620	Operand: N->getOperand(Num: `2`)));
5621	}
5622	return SDValue ();
5623	}
5624
5625	static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
5626	TargetLowering::DAGCombinerInfo &DCI,
5627	const LoongArchSubtarget &Subtarget) {
5628	// If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
5629	// conversion is unnecessary and can be replaced with the
5630	// MOVFR2GR_S_LA64 operand.
5631	SDValue Op0 = N->getOperand(Num: `0`);
5632	if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
5633	return Op0.getOperand(i: `0`);
5634	return SDValue ();
5635	}
5636
5637	static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
5638	TargetLowering::DAGCombinerInfo &DCI,
5639	const LoongArchSubtarget &Subtarget) {
5640	// If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
5641	// conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
5642	// operand.
5643	SDValue Op0 = N->getOperand(Num: `0`);
5644	if (Op0 ->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
5645	assert(Op0.getOperand(`0`).getValueType() == N->getSimpleValueType(`0`) &&
5646	"Unexpected value type!");
5647	return Op0.getOperand(i: `0`);
5648	}
5649	return SDValue ();
5650	}
5651
5652	static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
5653	TargetLowering::DAGCombinerInfo &DCI,
5654	const LoongArchSubtarget &Subtarget) {
5655	MVT VT = N->getSimpleValueType(ResNo: `0`);
5656	unsigned NumBits = VT.getScalarSizeInBits();
5657
5658	// Simplify the inputs.
5659	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5660	APInt DemandedMask(APInt::getAllOnes(numBits: NumBits));
5661	if (TLI.SimplifyDemandedBits(Op: SDValue (N, `0`), DemandedBits: DemandedMask, DCI))
5662	return SDValue (N, `0`);
5663
5664	return SDValue ();
5665	}
5666
5667	static SDValue
5668	performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
5669	TargetLowering::DAGCombinerInfo &DCI,
5670	const LoongArchSubtarget &Subtarget) {
5671	SDValue Op0 = N->getOperand(Num: `0`);
5672	SDLoc DL(N);
5673
5674	// If the input to SplitPairF64 is just BuildPairF64 then the operation is
5675	// redundant. Instead, use BuildPairF64's operands directly.
5676	if (Op0 ->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
5677	return DCI.CombineTo(N, Res0: Op0.getOperand(i: `0`), Res1: Op0.getOperand(i: `1`));
5678
5679	if (Op0 ->isUndef()) {
5680	SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
5681	SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
5682	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
5683	}
5684
5685	// It's cheaper to materialise two 32-bit integers than to load a double
5686	// from the constant pool and transfer it to integer registers through the
5687	// stack.
5688	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
5689	APInt V = C->getValueAPF().bitcastToAPInt();
5690	SDValue Lo = DAG.getConstant(Val: V.trunc(width: `32`), DL, VT: MVT::i32);
5691	SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: `32`).trunc(width: `32`), DL, VT: MVT::i32);
5692	return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
5693	}
5694
5695	return SDValue ();
5696	}
5697
5698	SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
5699	DAGCombinerInfo &DCI) const {
5700	SelectionDAG &DAG = DCI.DAG;
5701	switch (N->getOpcode()) {
5702	default:
5703	break;
5704	case ISD::AND:
5705	return performANDCombine(N, DAG, DCI, Subtarget);
5706	case ISD::OR:
5707	return performORCombine(N, DAG, DCI, Subtarget);
5708	case ISD::SETCC:
5709	return performSETCCCombine(N, DAG, DCI, Subtarget);
5710	case ISD::SRL:
5711	return performSRLCombine(N, DAG, DCI, Subtarget);
5712	case ISD::BITCAST:
5713	return performBITCASTCombine(N, DAG, DCI, Subtarget);
5714	case LoongArchISD::BITREV_W:
5715	return performBITREV_WCombine(N, DAG, DCI, Subtarget);
5716	case ISD::INTRINSIC_WO_CHAIN:
5717	return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
5718	case LoongArchISD::MOVGR2FR_W_LA64:
5719	return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
5720	case LoongArchISD::MOVFR2GR_S_LA64:
5721	return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
5722	case LoongArchISD::VMSKLTZ:
5723	case LoongArchISD::XVMSKLTZ:
5724	return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
5725	case LoongArchISD::SPLIT_PAIR_F64:
5726	return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
5727	}
5728	return SDValue ();
5729	}
5730
5731	static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
5732	MachineBasicBlock *MBB) {
5733	if (!ZeroDivCheck)
5734	return MBB;
5735
5736	// Build instructions:
5737	// MBB:
5738	// div(or mod) $dst, $dividend, $divisor
5739	// bne $divisor, $zero, SinkMBB
5740	// BreakMBB:
5741	// break 7 // BRK_DIVZERO
5742	// SinkMBB:
5743	// fallthrough
5744	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
5745	MachineFunction::iterator It = ++MBB->getIterator();
5746	MachineFunction *MF = MBB->getParent();
5747	auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
5748	auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
5749	MF->insert(MBBI: It, MBB: BreakMBB);
5750	MF->insert(MBBI: It, MBB: SinkMBB);
5751
5752	// Transfer the remainder of MBB and its successor edges to SinkMBB.
5753	SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
5754	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
5755
5756	const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
5757	DebugLoc DL = MI.getDebugLoc();
5758	MachineOperand &Divisor = MI.getOperand(i: `2`);
5759	Register DivisorReg = Divisor.getReg();
5760
5761	// MBB:
5762	BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE))
5763	.addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill()))
5764	.addReg(RegNo: LoongArch::R0)
5765	.addMBB(MBB: SinkMBB);
5766	MBB->addSuccessor(Succ: BreakMBB);
5767	MBB->addSuccessor(Succ: SinkMBB);
5768
5769	// BreakMBB:
5770	// See linux header file arch/loongarch/include/uapi/asm/break.h for the
5771	// definition of BRK_DIVZERO.
5772	BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: `7` /BRK_DIVZERO/);
5773	BreakMBB->addSuccessor(Succ: SinkMBB);
5774
5775	// Clear Divisor's kill flag.
5776	Divisor.setIsKill(false);
5777
5778	return SinkMBB;
5779	}
5780
5781	static MachineBasicBlock *
5782	emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
5783	const LoongArchSubtarget &Subtarget) {
5784	unsigned CondOpc;
5785	switch (MI.getOpcode()) {
5786	default:
5787	llvm_unreachable("Unexpected opcode");
5788	case LoongArch::PseudoVBZ:
5789	CondOpc = LoongArch::VSETEQZ_V;
5790	break;
5791	case LoongArch::PseudoVBZ_B:
5792	CondOpc = LoongArch::VSETANYEQZ_B;
5793	break;
5794	case LoongArch::PseudoVBZ_H:
5795	CondOpc = LoongArch::VSETANYEQZ_H;
5796	break;
5797	case LoongArch::PseudoVBZ_W:
5798	CondOpc = LoongArch::VSETANYEQZ_W;
5799	break;
5800	case LoongArch::PseudoVBZ_D:
5801	CondOpc = LoongArch::VSETANYEQZ_D;
5802	break;
5803	case LoongArch::PseudoVBNZ:
5804	CondOpc = LoongArch::VSETNEZ_V;
5805	break;
5806	case LoongArch::PseudoVBNZ_B:
5807	CondOpc = LoongArch::VSETALLNEZ_B;
5808	break;
5809	case LoongArch::PseudoVBNZ_H:
5810	CondOpc = LoongArch::VSETALLNEZ_H;
5811	break;
5812	case LoongArch::PseudoVBNZ_W:
5813	CondOpc = LoongArch::VSETALLNEZ_W;
5814	break;
5815	case LoongArch::PseudoVBNZ_D:
5816	CondOpc = LoongArch::VSETALLNEZ_D;
5817	break;
5818	case LoongArch::PseudoXVBZ:
5819	CondOpc = LoongArch::XVSETEQZ_V;
5820	break;
5821	case LoongArch::PseudoXVBZ_B:
5822	CondOpc = LoongArch::XVSETANYEQZ_B;
5823	break;
5824	case LoongArch::PseudoXVBZ_H:
5825	CondOpc = LoongArch::XVSETANYEQZ_H;
5826	break;
5827	case LoongArch::PseudoXVBZ_W:
5828	CondOpc = LoongArch::XVSETANYEQZ_W;
5829	break;
5830	case LoongArch::PseudoXVBZ_D:
5831	CondOpc = LoongArch::XVSETANYEQZ_D;
5832	break;
5833	case LoongArch::PseudoXVBNZ:
5834	CondOpc = LoongArch::XVSETNEZ_V;
5835	break;
5836	case LoongArch::PseudoXVBNZ_B:
5837	CondOpc = LoongArch::XVSETALLNEZ_B;
5838	break;
5839	case LoongArch::PseudoXVBNZ_H:
5840	CondOpc = LoongArch::XVSETALLNEZ_H;
5841	break;
5842	case LoongArch::PseudoXVBNZ_W:
5843	CondOpc = LoongArch::XVSETALLNEZ_W;
5844	break;
5845	case LoongArch::PseudoXVBNZ_D:
5846	CondOpc = LoongArch::XVSETALLNEZ_D;
5847	break;
5848	}
5849
5850	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5851	const BasicBlock *LLVM_BB = BB->getBasicBlock();
5852	DebugLoc DL = MI.getDebugLoc();
5853	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5854	MachineFunction::iterator It = ++BB->getIterator();
5855
5856	MachineFunction *F = BB->getParent();
5857	MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
5858	MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
5859	MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
5860
5861	F->insert(MBBI: It, MBB: FalseBB);
5862	F->insert(MBBI: It, MBB: TrueBB);
5863	F->insert(MBBI: It, MBB: SinkBB);
5864
5865	// Transfer the remainder of MBB and its successor edges to Sink.
5866	SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
5867	SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
5868
5869	// Insert the real instruction to BB.
5870	Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
5871	BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: `1`).getReg());
5872
5873	// Insert branch.
5874	BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
5875	BB->addSuccessor(Succ: FalseBB);
5876	BB->addSuccessor(Succ: TrueBB);
5877
5878	// FalseBB.
5879	Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
5880	BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
5881	.addReg(RegNo: LoongArch::R0)
5882	.addImm(Val: `0`);
5883	BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
5884	FalseBB->addSuccessor(Succ: SinkBB);
5885
5886	// TrueBB.
5887	Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
5888	BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
5889	.addReg(RegNo: LoongArch::R0)
5890	.addImm(Val: `1`);
5891	TrueBB->addSuccessor(Succ: SinkBB);
5892
5893	// SinkBB: merge the results.
5894	BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
5895	DestReg: MI.getOperand(i: `0`).getReg())
5896	.addReg(RegNo: RD1)
5897	.addMBB(MBB: FalseBB)
5898	.addReg(RegNo: RD2)
5899	.addMBB(MBB: TrueBB);
5900
5901	// The pseudo instruction is gone now.
5902	MI.eraseFromParent();
5903	return SinkBB;
5904	}
5905
5906	static MachineBasicBlock *
5907	emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
5908	const LoongArchSubtarget &Subtarget) {
5909	unsigned InsOp;
5910	unsigned HalfSize;
5911	switch (MI.getOpcode()) {
5912	default:
5913	llvm_unreachable("Unexpected opcode");
5914	case LoongArch::PseudoXVINSGR2VR_B:
5915	HalfSize = `16`;
5916	InsOp = LoongArch::VINSGR2VR_B;
5917	break;
5918	case LoongArch::PseudoXVINSGR2VR_H:
5919	HalfSize = `8`;
5920	InsOp = LoongArch::VINSGR2VR_H;
5921	break;
5922	}
5923	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5924	const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
5925	const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
5926	DebugLoc DL = MI.getDebugLoc();
5927	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5928	// XDst = vector_insert XSrc, Elt, Idx
5929	Register XDst = MI.getOperand(i: `0`).getReg();
5930	Register XSrc = MI.getOperand(i: `1`).getReg();
5931	Register Elt = MI.getOperand(i: `2`).getReg();
5932	unsigned Idx = MI.getOperand(i: `3`).getImm();
5933
5934	Register ScratchReg1 = XSrc;
5935	if (Idx >= HalfSize) {
5936	ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
5937	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1)
5938	.addReg(RegNo: XSrc)
5939	.addReg(RegNo: XSrc)
5940	.addImm(Val: `1`);
5941	}
5942
5943	Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
5944	Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
5945	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
5946	.addReg(RegNo: ScratchReg1, flags: `0`, SubReg: LoongArch::sub_128);
5947	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2)
5948	.addReg(RegNo: ScratchSubReg1)
5949	.addReg(RegNo: Elt)
5950	.addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx);
5951
5952	Register ScratchReg2 = XDst;
5953	if (Idx >= HalfSize)
5954	ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
5955
5956	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2)
5957	.addImm(Val: `0`)
5958	.addReg(RegNo: ScratchSubReg2)
5959	.addImm(Val: LoongArch::sub_128);
5960
5961	if (Idx >= HalfSize)
5962	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst)
5963	.addReg(RegNo: XSrc)
5964	.addReg(RegNo: ScratchReg2)
5965	.addImm(Val: `2`);
5966
5967	MI.eraseFromParent();
5968	return BB;
5969	}
5970
5971	static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
5972	MachineBasicBlock *BB,
5973	const LoongArchSubtarget &Subtarget) {
5974	assert(Subtarget.hasExtLSX());
5975	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5976	const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
5977	DebugLoc DL = MI.getDebugLoc();
5978	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5979	Register Dst = MI.getOperand(i: `0`).getReg();
5980	Register Src = MI.getOperand(i: `1`).getReg();
5981	Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
5982	Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
5983	Register ScratchReg3 = MRI.createVirtualRegister(RegClass: RC);
5984
5985	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VLDI), DestReg: ScratchReg1).addImm(Val: `0`);
5986	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
5987	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
5988	: LoongArch::VINSGR2VR_W),
5989	DestReg: ScratchReg2)
5990	.addReg(RegNo: ScratchReg1)
5991	.addReg(RegNo: Src)
5992	.addImm(Val: `0`);
5993	BuildMI(
5994	BB&: *BB, I&: MI, MIMD: DL,
5995	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
5996	DestReg: ScratchReg3)
5997	.addReg(RegNo: ScratchReg2);
5998	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
5999	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6000	: LoongArch::VPICKVE2GR_W),
6001	DestReg: Dst)
6002	.addReg(RegNo: ScratchReg3)
6003	.addImm(Val: `0`);
6004
6005	MI.eraseFromParent();
6006	return BB;
6007	}
6008
6009	static MachineBasicBlock *
6010	emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
6011	const LoongArchSubtarget &Subtarget) {
6012	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6013	const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6014	const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6015	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6016	Register Dst = MI.getOperand(i: `0`).getReg();
6017	Register Src = MI.getOperand(i: `1`).getReg();
6018	DebugLoc DL = MI.getDebugLoc();
6019	unsigned EleBits = `8`;
6020	unsigned NotOpc = `0`;
6021	unsigned MskOpc;
6022
6023	switch (MI.getOpcode()) {
6024	default:
6025	llvm_unreachable("Unexpected opcode");
6026	case LoongArch::PseudoVMSKLTZ_B:
6027	MskOpc = LoongArch::VMSKLTZ_B;
6028	break;
6029	case LoongArch::PseudoVMSKLTZ_H:
6030	MskOpc = LoongArch::VMSKLTZ_H;
6031	EleBits = `16`;
6032	break;
6033	case LoongArch::PseudoVMSKLTZ_W:
6034	MskOpc = LoongArch::VMSKLTZ_W;
6035	EleBits = `32`;
6036	break;
6037	case LoongArch::PseudoVMSKLTZ_D:
6038	MskOpc = LoongArch::VMSKLTZ_D;
6039	EleBits = `64`;
6040	break;
6041	case LoongArch::PseudoVMSKGEZ_B:
6042	MskOpc = LoongArch::VMSKGEZ_B;
6043	break;
6044	case LoongArch::PseudoVMSKEQZ_B:
6045	MskOpc = LoongArch::VMSKNZ_B;
6046	NotOpc = LoongArch::VNOR_V;
6047	break;
6048	case LoongArch::PseudoVMSKNEZ_B:
6049	MskOpc = LoongArch::VMSKNZ_B;
6050	break;
6051	case LoongArch::PseudoXVMSKLTZ_B:
6052	MskOpc = LoongArch::XVMSKLTZ_B;
6053	RC = &LoongArch::LASX256RegClass;
6054	break;
6055	case LoongArch::PseudoXVMSKLTZ_H:
6056	MskOpc = LoongArch::XVMSKLTZ_H;
6057	RC = &LoongArch::LASX256RegClass;
6058	EleBits = `16`;
6059	break;
6060	case LoongArch::PseudoXVMSKLTZ_W:
6061	MskOpc = LoongArch::XVMSKLTZ_W;
6062	RC = &LoongArch::LASX256RegClass;
6063	EleBits = `32`;
6064	break;
6065	case LoongArch::PseudoXVMSKLTZ_D:
6066	MskOpc = LoongArch::XVMSKLTZ_D;
6067	RC = &LoongArch::LASX256RegClass;
6068	EleBits = `64`;
6069	break;
6070	case LoongArch::PseudoXVMSKGEZ_B:
6071	MskOpc = LoongArch::XVMSKGEZ_B;
6072	RC = &LoongArch::LASX256RegClass;
6073	break;
6074	case LoongArch::PseudoXVMSKEQZ_B:
6075	MskOpc = LoongArch::XVMSKNZ_B;
6076	NotOpc = LoongArch::XVNOR_V;
6077	RC = &LoongArch::LASX256RegClass;
6078	break;
6079	case LoongArch::PseudoXVMSKNEZ_B:
6080	MskOpc = LoongArch::XVMSKNZ_B;
6081	RC = &LoongArch::LASX256RegClass;
6082	break;
6083	}
6084
6085	Register Msk = MRI.createVirtualRegister(RegClass: RC);
6086	if (NotOpc) {
6087	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
6088	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src);
6089	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk)
6090	.addReg(RegNo: Tmp, flags: RegState::Kill)
6091	.addReg(RegNo: Tmp, flags: RegState::Kill);
6092	} else {
6093	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src);
6094	}
6095
6096	if (TRI->getRegSizeInBits(RC: *RC) > `128`) {
6097	Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6098	Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6099	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo)
6100	.addReg(RegNo: Msk)
6101	.addImm(Val: `0`);
6102	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi)
6103	.addReg(RegNo: Msk, flags: RegState::Kill)
6104	.addImm(Val: `4`);
6105	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
6106	MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6107	: LoongArch::BSTRINS_W),
6108	DestReg: Dst)
6109	.addReg(RegNo: Lo, flags: RegState::Kill)
6110	.addReg(RegNo: Hi, flags: RegState::Kill)
6111	.addImm(Val: `256` / EleBits - `1`)
6112	.addImm(Val: `128` / EleBits);
6113	} else {
6114	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst)
6115	.addReg(RegNo: Msk, flags: RegState::Kill)
6116	.addImm(Val: `0`);
6117	}
6118
6119	MI.eraseFromParent();
6120	return BB;
6121	}
6122
6123	static MachineBasicBlock *
6124	emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6125	const LoongArchSubtarget &Subtarget) {
6126	assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6127	"Unexpected instruction");
6128
6129	MachineFunction &MF = *BB->getParent();
6130	DebugLoc DL = MI.getDebugLoc();
6131	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6132	Register LoReg = MI.getOperand(i: `0`).getReg();
6133	Register HiReg = MI.getOperand(i: `1`).getReg();
6134	Register SrcReg = MI.getOperand(i: `2`).getReg();
6135
6136	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg);
6137	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg)
6138	.addReg(RegNo: SrcReg, flags: getKillRegState(B: MI.getOperand(i: `2`).isKill()));
6139	MI.eraseFromParent(); // The pseudo instruction is gone now.
6140	return BB;
6141	}
6142
6143	static MachineBasicBlock *
6144	emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6145	const LoongArchSubtarget &Subtarget) {
6146	assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6147	"Unexpected instruction");
6148
6149	MachineFunction &MF = *BB->getParent();
6150	DebugLoc DL = MI.getDebugLoc();
6151	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6152	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6153	Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass);
6154	Register DstReg = MI.getOperand(i: `0`).getReg();
6155	Register LoReg = MI.getOperand(i: `1`).getReg();
6156	Register HiReg = MI.getOperand(i: `2`).getReg();
6157
6158	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg)
6159	.addReg(RegNo: LoReg, flags: getKillRegState(B: MI.getOperand(i: `1`).isKill()));
6160	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg)
6161	.addReg(RegNo: TmpReg, flags: RegState::Kill)
6162	.addReg(RegNo: HiReg, flags: getKillRegState(B: MI.getOperand(i: `2`).isKill()));
6163	MI.eraseFromParent(); // The pseudo instruction is gone now.
6164	return BB;
6165	}
6166
6167	static bool isSelectPseudo(MachineInstr &MI) {
6168	switch (MI.getOpcode()) {
6169	default:
6170	return false;
6171	case LoongArch::Select_GPR_Using_CC_GPR:
6172	return true;
6173	}
6174	}
6175
6176	static MachineBasicBlock *
6177	emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
6178	const LoongArchSubtarget &Subtarget) {
6179	// To "insert" Select_ instructions, we actually have to insert the triangle*
6180	// control-flow pattern. The incoming instructions know the destination vreg
6181	// to set, the condition code register to branch on, the true/false values to
6182	// select between, and the condcode to use to select the appropriate branch.
6183	//
6184	// We produce the following control flow:
6185	// HeadMBB
6186	// \| \
6187	// \| IfFalseMBB
6188	// \| /
6189	// TailMBB
6190	//
6191	// When we find a sequence of selects we attempt to optimize their emission
6192	// by sharing the control flow. Currently we only handle cases where we have
6193	// multiple selects with the exact same condition (same LHS, RHS and CC).
6194	// The selects may be interleaved with other instructions if the other
6195	// instructions meet some requirements we deem safe:
6196	// - They are not pseudo instructions.
6197	// - They are debug instructions. Otherwise,
6198	// - They do not have side-effects, do not access memory and their inputs do
6199	// not depend on the results of the select pseudo-instructions.
6200	// The TrueV/FalseV operands of the selects cannot depend on the result of
6201	// previous selects in the sequence.
6202	// These conditions could be further relaxed. See the X86 target for a
6203	// related approach and more information.
6204
6205	Register LHS = MI.getOperand(i: `1`).getReg();
6206	Register RHS;
6207	if (MI.getOperand(i: `2`).isReg())
6208	RHS = MI.getOperand(i: `2`).getReg();
6209	auto CC = static_cast<unsigned>(MI.getOperand(i: `3`).getImm());
6210
6211	SmallVector<MachineInstr *, `4`> SelectDebugValues;
6212	SmallSet<Register, `4`> SelectDests;
6213	SelectDests.insert(V: MI.getOperand(i: `0`).getReg());
6214
6215	MachineInstr *LastSelectPseudo = &MI;
6216	for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator (MI);
6217	SequenceMBBI != E; ++SequenceMBBI) {
6218	if (SequenceMBBI ->isDebugInstr())
6219	continue;
6220	if (isSelectPseudo(MI&: *SequenceMBBI)) {
6221	if (SequenceMBBI ->getOperand(i: `1`).getReg() != LHS \|\|
6222	!SequenceMBBI ->getOperand(i: `2`).isReg() \|\|
6223	SequenceMBBI ->getOperand(i: `2`).getReg() != RHS \|\|
6224	SequenceMBBI ->getOperand(i: `3`).getImm() != CC \|\|
6225	SelectDests.count(V: SequenceMBBI ->getOperand(i: `4`).getReg()) \|\|
6226	SelectDests.count(V: SequenceMBBI ->getOperand(i: `5`).getReg()))
6227	break;
6228	LastSelectPseudo = &*SequenceMBBI;
6229	SequenceMBBI ->collectDebugValues(DbgValues&: SelectDebugValues);
6230	SelectDests.insert(V: SequenceMBBI ->getOperand(i: `0`).getReg());
6231	continue;
6232	}
6233	if (SequenceMBBI ->hasUnmodeledSideEffects() \|\|
6234	SequenceMBBI ->mayLoadOrStore() \|\|
6235	SequenceMBBI ->usesCustomInsertionHook())
6236	break;
6237	if (llvm::any_of(Range: SequenceMBBI ->operands(), P: [&](MachineOperand &MO) {
6238	return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
6239	}))
6240	break;
6241	}
6242
6243	const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6244	const BasicBlock *LLVM_BB = BB->getBasicBlock();
6245	DebugLoc DL = MI.getDebugLoc();
6246	MachineFunction::iterator I = ++BB->getIterator();
6247
6248	MachineBasicBlock *HeadMBB = BB;
6249	MachineFunction *F = BB->getParent();
6250	MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
6251	MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
6252
6253	F->insert(MBBI: I, MBB: IfFalseMBB);
6254	F->insert(MBBI: I, MBB: TailMBB);
6255
6256	// Set the call frame size on entry to the new basic blocks.
6257	unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
6258	IfFalseMBB->setCallFrameSize(CallFrameSize);
6259	TailMBB->setCallFrameSize(CallFrameSize);
6260
6261	// Transfer debug instructions associated with the selects to TailMBB.
6262	for (MachineInstr *DebugInstr : SelectDebugValues) {
6263	TailMBB->push_back(MI: DebugInstr->removeFromParent());
6264	}
6265
6266	// Move all instructions after the sequence to TailMBB.
6267	TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
6268	From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
6269	// Update machine-CFG edges by transferring all successors of the current
6270	// block to the new block which will contain the Phi nodes for the selects.
6271	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
6272	// Set the successors for HeadMBB.
6273	HeadMBB->addSuccessor(Succ: IfFalseMBB);
6274	HeadMBB->addSuccessor(Succ: TailMBB);
6275
6276	// Insert appropriate branch.
6277	if (MI.getOperand(i: `2`).isImm())
6278	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC))
6279	.addReg(RegNo: LHS)
6280	.addImm(Val: MI.getOperand(i: `2`).getImm())
6281	.addMBB(MBB: TailMBB);
6282	else
6283	BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB);
6284
6285	// IfFalseMBB just falls through to TailMBB.
6286	IfFalseMBB->addSuccessor(Succ: TailMBB);
6287
6288	// Create PHIs for all of the select pseudo-instructions.
6289	auto SelectMBBI = MI.getIterator();
6290	auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
6291	auto InsertionPoint = TailMBB->begin();
6292	while (SelectMBBI != SelectEnd) {
6293	auto Next = std::next(x: SelectMBBI);
6294	if (isSelectPseudo(MI&: *SelectMBBI)) {
6295	// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6296	BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI ->getDebugLoc(),
6297	MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI ->getOperand(i: `0`).getReg())
6298	.addReg(RegNo: SelectMBBI ->getOperand(i: `4`).getReg())
6299	.addMBB(MBB: HeadMBB)
6300	.addReg(RegNo: SelectMBBI ->getOperand(i: `5`).getReg())
6301	.addMBB(MBB: IfFalseMBB);
6302	SelectMBBI ->eraseFromParent();
6303	}
6304	SelectMBBI = Next;
6305	}
6306
6307	F->getProperties().resetNoPHIs();
6308	return TailMBB;
6309	}
6310
6311	MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6312	MachineInstr &MI, MachineBasicBlock BB) const* {
6313	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6314	DebugLoc DL = MI.getDebugLoc();
6315
6316	switch (MI.getOpcode()) {
6317	default:
6318	llvm_unreachable("Unexpected instr type to insert");
6319	case LoongArch::DIV_W:
6320	case LoongArch::DIV_WU:
6321	case LoongArch::MOD_W:
6322	case LoongArch::MOD_WU:
6323	case LoongArch::DIV_D:
6324	case LoongArch::DIV_DU:
6325	case LoongArch::MOD_D:
6326	case LoongArch::MOD_DU:
6327	return insertDivByZeroTrap(MI, MBB: BB);
6328	break;
6329	case LoongArch::WRFCSR: {
6330	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
6331	DestReg: LoongArch::FCSR0 + MI.getOperand(i: `0`).getImm())
6332	.addReg(RegNo: MI.getOperand(i: `1`).getReg());
6333	MI.eraseFromParent();
6334	return BB;
6335	}
6336	case LoongArch::RDFCSR: {
6337	MachineInstr *ReadFCSR =
6338	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
6339	DestReg: MI.getOperand(i: `0`).getReg())
6340	.addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: `1`).getImm());
6341	ReadFCSR->getOperand(i: `1`).setIsUndef();
6342	MI.eraseFromParent();
6343	return BB;
6344	}
6345	case LoongArch::Select_GPR_Using_CC_GPR:
6346	return emitSelectPseudo(MI, BB, Subtarget);
6347	case LoongArch::BuildPairF64Pseudo:
6348	return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6349	case LoongArch::SplitPairF64Pseudo:
6350	return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6351	case LoongArch::PseudoVBZ:
6352	case LoongArch::PseudoVBZ_B:
6353	case LoongArch::PseudoVBZ_H:
6354	case LoongArch::PseudoVBZ_W:
6355	case LoongArch::PseudoVBZ_D:
6356	case LoongArch::PseudoVBNZ:
6357	case LoongArch::PseudoVBNZ_B:
6358	case LoongArch::PseudoVBNZ_H:
6359	case LoongArch::PseudoVBNZ_W:
6360	case LoongArch::PseudoVBNZ_D:
6361	case LoongArch::PseudoXVBZ:
6362	case LoongArch::PseudoXVBZ_B:
6363	case LoongArch::PseudoXVBZ_H:
6364	case LoongArch::PseudoXVBZ_W:
6365	case LoongArch::PseudoXVBZ_D:
6366	case LoongArch::PseudoXVBNZ:
6367	case LoongArch::PseudoXVBNZ_B:
6368	case LoongArch::PseudoXVBNZ_H:
6369	case LoongArch::PseudoXVBNZ_W:
6370	case LoongArch::PseudoXVBNZ_D:
6371	return emitVecCondBranchPseudo(MI, BB, Subtarget);
6372	case LoongArch::PseudoXVINSGR2VR_B:
6373	case LoongArch::PseudoXVINSGR2VR_H:
6374	return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6375	case LoongArch::PseudoCTPOP:
6376	return emitPseudoCTPOP(MI, BB, Subtarget);
6377	case LoongArch::PseudoVMSKLTZ_B:
6378	case LoongArch::PseudoVMSKLTZ_H:
6379	case LoongArch::PseudoVMSKLTZ_W:
6380	case LoongArch::PseudoVMSKLTZ_D:
6381	case LoongArch::PseudoVMSKGEZ_B:
6382	case LoongArch::PseudoVMSKEQZ_B:
6383	case LoongArch::PseudoVMSKNEZ_B:
6384	case LoongArch::PseudoXVMSKLTZ_B:
6385	case LoongArch::PseudoXVMSKLTZ_H:
6386	case LoongArch::PseudoXVMSKLTZ_W:
6387	case LoongArch::PseudoXVMSKLTZ_D:
6388	case LoongArch::PseudoXVMSKGEZ_B:
6389	case LoongArch::PseudoXVMSKEQZ_B:
6390	case LoongArch::PseudoXVMSKNEZ_B:
6391	return emitPseudoVMSKCOND(MI, BB, Subtarget);
6392	case TargetOpcode::STATEPOINT:
6393	// STATEPOINT is a pseudo instruction which has no implicit defs/uses
6394	// while bl call instruction (where statepoint will be lowered at the
6395	// end) has implicit def. This def is early-clobber as it will be set at
6396	// the moment of the call and earlier than any use is read.
6397	// Add this implicit dead def here as a workaround.
6398	MI.addOperand(MF&: *MI.getMF(),
6399	Op: MachineOperand::CreateReg(
6400	Reg: LoongArch::R1, /isDef/ true,
6401	/isImp/ true, /isKill/ false, /isDead/ true,
6402	/isUndef/ false, /isEarlyClobber/ true));
6403	if (!Subtarget.is64Bit())
6404	report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets");
6405	return emitPatchPoint(MI, MBB: BB);
6406	}
6407	}
6408
6409	bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
6410	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6411	unsigned Fast) const* {
6412	if (!Subtarget.hasUAL())
6413	return false;
6414
6415	// TODO: set reasonable speed number.
6416	if (Fast)
6417	*Fast = `1`;
6418	return true;
6419	}
6420
6421	const char LoongArchTargetLowering::getTargetNodeName(unsigned* Opcode) const {
6422	switch ((LoongArchISD::NodeType)Opcode) {
6423	case LoongArchISD::FIRST_NUMBER:
6424	break;
6425
6426	#define NODE_NAME_CASE(node) \
6427	case LoongArchISD::node: \
6428	return "LoongArchISD::" #node;
6429
6430	// TODO: Add more target-dependent nodes later.
6431	NODE_NAME_CASE(CALL)
6432	NODE_NAME_CASE(CALL_MEDIUM)
6433	NODE_NAME_CASE(CALL_LARGE)
6434	NODE_NAME_CASE(RET)
6435	NODE_NAME_CASE(TAIL)
6436	NODE_NAME_CASE(TAIL_MEDIUM)
6437	NODE_NAME_CASE(TAIL_LARGE)
6438	NODE_NAME_CASE(SELECT_CC)
6439	NODE_NAME_CASE(SLL_W)
6440	NODE_NAME_CASE(SRA_W)
6441	NODE_NAME_CASE(SRL_W)
6442	NODE_NAME_CASE(BSTRINS)
6443	NODE_NAME_CASE(BSTRPICK)
6444	NODE_NAME_CASE(MOVGR2FR_W_LA64)
6445	NODE_NAME_CASE(MOVFR2GR_S_LA64)
6446	NODE_NAME_CASE(FTINT)
6447	NODE_NAME_CASE(BUILD_PAIR_F64)
6448	NODE_NAME_CASE(SPLIT_PAIR_F64)
6449	NODE_NAME_CASE(REVB_2H)
6450	NODE_NAME_CASE(REVB_2W)
6451	NODE_NAME_CASE(BITREV_4B)
6452	NODE_NAME_CASE(BITREV_8B)
6453	NODE_NAME_CASE(BITREV_W)
6454	NODE_NAME_CASE(ROTR_W)
6455	NODE_NAME_CASE(ROTL_W)
6456	NODE_NAME_CASE(DIV_W)
6457	NODE_NAME_CASE(DIV_WU)
6458	NODE_NAME_CASE(MOD_W)
6459	NODE_NAME_CASE(MOD_WU)
6460	NODE_NAME_CASE(CLZ_W)
6461	NODE_NAME_CASE(CTZ_W)
6462	NODE_NAME_CASE(DBAR)
6463	NODE_NAME_CASE(IBAR)
6464	NODE_NAME_CASE(BREAK)
6465	NODE_NAME_CASE(SYSCALL)
6466	NODE_NAME_CASE(CRC_W_B_W)
6467	NODE_NAME_CASE(CRC_W_H_W)
6468	NODE_NAME_CASE(CRC_W_W_W)
6469	NODE_NAME_CASE(CRC_W_D_W)
6470	NODE_NAME_CASE(CRCC_W_B_W)
6471	NODE_NAME_CASE(CRCC_W_H_W)
6472	NODE_NAME_CASE(CRCC_W_W_W)
6473	NODE_NAME_CASE(CRCC_W_D_W)
6474	NODE_NAME_CASE(CSRRD)
6475	NODE_NAME_CASE(CSRWR)
6476	NODE_NAME_CASE(CSRXCHG)
6477	NODE_NAME_CASE(IOCSRRD_B)
6478	NODE_NAME_CASE(IOCSRRD_H)
6479	NODE_NAME_CASE(IOCSRRD_W)
6480	NODE_NAME_CASE(IOCSRRD_D)
6481	NODE_NAME_CASE(IOCSRWR_B)
6482	NODE_NAME_CASE(IOCSRWR_H)
6483	NODE_NAME_CASE(IOCSRWR_W)
6484	NODE_NAME_CASE(IOCSRWR_D)
6485	NODE_NAME_CASE(CPUCFG)
6486	NODE_NAME_CASE(MOVGR2FCSR)
6487	NODE_NAME_CASE(MOVFCSR2GR)
6488	NODE_NAME_CASE(CACOP_D)
6489	NODE_NAME_CASE(CACOP_W)
6490	NODE_NAME_CASE(VSHUF)
6491	NODE_NAME_CASE(VPICKEV)
6492	NODE_NAME_CASE(VPICKOD)
6493	NODE_NAME_CASE(VPACKEV)
6494	NODE_NAME_CASE(VPACKOD)
6495	NODE_NAME_CASE(VILVL)
6496	NODE_NAME_CASE(VILVH)
6497	NODE_NAME_CASE(VSHUF4I)
6498	NODE_NAME_CASE(VREPLVEI)
6499	NODE_NAME_CASE(VREPLGR2VR)
6500	NODE_NAME_CASE(XVPERMI)
6501	NODE_NAME_CASE(VPICK_SEXT_ELT)
6502	NODE_NAME_CASE(VPICK_ZEXT_ELT)
6503	NODE_NAME_CASE(VREPLVE)
6504	NODE_NAME_CASE(VALL_ZERO)
6505	NODE_NAME_CASE(VANY_ZERO)
6506	NODE_NAME_CASE(VALL_NONZERO)
6507	NODE_NAME_CASE(VANY_NONZERO)
6508	NODE_NAME_CASE(FRECIPE)
6509	NODE_NAME_CASE(FRSQRTE)
6510	NODE_NAME_CASE(VSLLI)
6511	NODE_NAME_CASE(VSRLI)
6512	NODE_NAME_CASE(VBSLL)
6513	NODE_NAME_CASE(VBSRL)
6514	NODE_NAME_CASE(VLDREPL)
6515	NODE_NAME_CASE(VMSKLTZ)
6516	NODE_NAME_CASE(VMSKGEZ)
6517	NODE_NAME_CASE(VMSKEQZ)
6518	NODE_NAME_CASE(VMSKNEZ)
6519	NODE_NAME_CASE(XVMSKLTZ)
6520	NODE_NAME_CASE(XVMSKGEZ)
6521	NODE_NAME_CASE(XVMSKEQZ)
6522	NODE_NAME_CASE(XVMSKNEZ)
6523	}
6524	#undef NODE_NAME_CASE
6525	return nullptr;
6526	}
6527
6528	//===----------------------------------------------------------------------===//
6529	// Calling Convention Implementation
6530	//===----------------------------------------------------------------------===//
6531
6532	// Eight general-purpose registers a0-a7 used for passing integer arguments,
6533	// with a0-a1 reused to return values. Generally, the GPRs are used to pass
6534	// fixed-point arguments, and floating-point arguments when no FPR is available
6535	// or with soft float ABI.
6536	const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
6537	LoongArch::R7, LoongArch::R8, LoongArch::R9,
6538	LoongArch::R10, LoongArch::R11};
6539	// Eight floating-point registers fa0-fa7 used for passing floating-point
6540	// arguments, and fa0-fa1 are also used to return values.
6541	const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
6542	LoongArch::F3, LoongArch::F4, LoongArch::F5,
6543	LoongArch::F6, LoongArch::F7};
6544	// FPR32 and FPR64 alias each other.
6545	const MCPhysReg ArgFPR64s[] = {
6546	LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
6547	LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
6548
6549	const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
6550	LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
6551	LoongArch::VR6, LoongArch::VR7};
6552
6553	const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
6554	LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
6555	LoongArch::XR6, LoongArch::XR7};
6556
6557	// Pass a 2GRLen argument that has been split into two GRLen values through*
6558	// registers or the stack as necessary.
6559	static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
6560	CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
6561	unsigned ValNo2, MVT ValVT2, MVT LocVT2,
6562	ISD::ArgFlagsTy ArgFlags2) {
6563	unsigned GRLenInBytes = GRLen / `8`;
6564	if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
6565	// At least one half can be passed via register.
6566	State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg,
6567	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
6568	} else {
6569	// Both halves must be passed on the stack, with proper alignment.
6570	Align StackAlign =
6571	std::max(a: Align (GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
6572	State.addLoc(
6573	V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
6574	Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
6575	LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
6576	State.addLoc(V: CCValAssign::getMem(
6577	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align (GRLenInBytes)),
6578	LocVT: LocVT2, HTP: CCValAssign::Full));
6579	return false;
6580	}
6581	if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
6582	// The second half can also be passed via register.
6583	State.addLoc(
6584	V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
6585	} else {
6586	// The second half is passed via the stack, without additional alignment.
6587	State.addLoc(V: CCValAssign::getMem(
6588	ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align (GRLenInBytes)),
6589	LocVT: LocVT2, HTP: CCValAssign::Full));
6590	}
6591	return false;
6592	}
6593
6594	// Implements the LoongArch calling convention. Returns true upon failure.
6595	static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
6596	unsigned ValNo, MVT ValVT,
6597	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6598	CCState &State, bool IsFixed, bool IsRet,
6599	Type *OrigTy) {
6600	unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
6601	assert((GRLen == `32` \|\| GRLen == `64`) && "Unspport GRLen");
6602	MVT GRLenVT = GRLen == `32` ? MVT::i32 : MVT::i64;
6603	MVT LocVT = ValVT;
6604
6605	// Any return value split into more than two values can't be returned
6606	// directly.
6607	if (IsRet && ValNo > `1`)
6608	return true;
6609
6610	// If passing a variadic argument, or if no FPR is available.
6611	bool UseGPRForFloat = true;
6612
6613	switch (ABI) {
6614	default:
6615	llvm_unreachable("Unexpected ABI");
6616	break;
6617	case LoongArchABI::ABI_ILP32F:
6618	case LoongArchABI::ABI_LP64F:
6619	case LoongArchABI::ABI_ILP32D:
6620	case LoongArchABI::ABI_LP64D:
6621	UseGPRForFloat = !IsFixed;
6622	break;
6623	case LoongArchABI::ABI_ILP32S:
6624	case LoongArchABI::ABI_LP64S:
6625	break;
6626	}
6627
6628	// If this is a variadic argument, the LoongArch calling convention requires
6629	// that it is assigned an 'even' or 'aligned' register if it has (2GRLen)/8*
6630	// byte alignment. An aligned register should be used regardless of whether
6631	// the original argument was split during legalisation or not. The argument
6632	// will not be passed by registers if the original type is larger than
6633	// 2GRLen, so the register alignment rule does not apply.*
6634	unsigned TwoGRLenInBytes = (`2` * GRLen) / `8`;
6635	if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
6636	DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
6637	unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
6638	// Skip 'odd' register if necessary.
6639	if (RegIdx != std::size(ArgGPRs) && RegIdx % `2` == `1`)
6640	State.AllocateReg(Regs: ArgGPRs);
6641	}
6642
6643	SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6644	SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6645	State.getPendingArgFlags();
6646
6647	assert(PendingLocs.size() == PendingArgFlags.size() &&
6648	"PendingLocs and PendingArgFlags out of sync");
6649
6650	// FPR32 and FPR64 alias each other.
6651	if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
6652	UseGPRForFloat = true;
6653
6654	if (UseGPRForFloat && ValVT == MVT::f32) {
6655	LocVT = GRLenVT;
6656	LocInfo = CCValAssign::BCvt;
6657	} else if (UseGPRForFloat && GRLen == `64` && ValVT == MVT::f64) {
6658	LocVT = MVT::i64;
6659	LocInfo = CCValAssign::BCvt;
6660	} else if (UseGPRForFloat && GRLen == `32` && ValVT == MVT::f64) {
6661	// Handle passing f64 on LA32D with a soft float ABI or when floating point
6662	// registers are exhausted.
6663	assert(PendingLocs.empty() && "Can't lower f64 if it is split");
6664	// Depending on available argument GPRS, f64 may be passed in a pair of
6665	// GPRs, split between a GPR and the stack, or passed completely on the
6666	// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6667	// cases.
6668	MCRegister Reg = State.AllocateReg(Regs: ArgGPRs);
6669	if (!Reg) {
6670	int64_t StackOffset = State.AllocateStack(Size: `8`, Alignment: Align (`8`));
6671	State.addLoc(
6672	V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
6673	return false;
6674	}
6675	LocVT = MVT::i32;
6676	State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6677	MCRegister HiReg = State.AllocateReg(Regs: ArgGPRs);
6678	if (HiReg) {
6679	State.addLoc(
6680	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo));
6681	} else {
6682	int64_t StackOffset = State.AllocateStack(Size: `4`, Alignment: Align (`4`));
6683	State.addLoc(
6684	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
6685	}
6686	return false;
6687	}
6688
6689	// Split arguments might be passed indirectly, so keep track of the pending
6690	// values.
6691	if (ValVT.isScalarInteger() && (ArgFlags.isSplit() \|\| !PendingLocs.empty())) {
6692	LocVT = GRLenVT;
6693	LocInfo = CCValAssign::Indirect;
6694	PendingLocs.push_back(
6695	Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
6696	PendingArgFlags.push_back(Elt: ArgFlags);
6697	if (!ArgFlags.isSplitEnd()) {
6698	return false;
6699	}
6700	}
6701
6702	// If the split argument only had two elements, it should be passed directly
6703	// in registers or on the stack.
6704	if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
6705	PendingLocs.size() <= `2`) {
6706	assert(PendingLocs.size() == `2` && "Unexpected PendingLocs.size()");
6707	// Apply the normal calling convention rules to the first half of the
6708	// split argument.
6709	CCValAssign VA = PendingLocs [`0`];
6710	ISD::ArgFlagsTy AF = PendingArgFlags [`0`];
6711	PendingLocs.clear();
6712	PendingArgFlags.clear();
6713	return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
6714	ArgFlags2: ArgFlags);
6715	}
6716
6717	// Allocate to a register if possible, or else a stack slot.
6718	Register Reg;
6719	unsigned StoreSizeBytes = GRLen / `8`;
6720	Align StackAlign = Align (GRLen / `8`);
6721
6722	if (ValVT == MVT::f32 && !UseGPRForFloat)
6723	Reg = State.AllocateReg(Regs: ArgFPR32s);
6724	else if (ValVT == MVT::f64 && !UseGPRForFloat)
6725	Reg = State.AllocateReg(Regs: ArgFPR64s);
6726	else if (ValVT.is128BitVector())
6727	Reg = State.AllocateReg(Regs: ArgVRs);
6728	else if (ValVT.is256BitVector())
6729	Reg = State.AllocateReg(Regs: ArgXRs);
6730	else
6731	Reg = State.AllocateReg(Regs: ArgGPRs);
6732
6733	unsigned StackOffset =
6734	Reg ? `0` : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
6735
6736	// If we reach this point and PendingLocs is non-empty, we must be at the
6737	// end of a split argument that must be passed indirectly.
6738	if (!PendingLocs.empty()) {
6739	assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
6740	assert(PendingLocs.size() > `2` && "Unexpected PendingLocs.size()");
6741	for (auto &It : PendingLocs) {
6742	if (Reg)
6743	It.convertToReg(Reg);
6744	else
6745	It.convertToMem(Offset: StackOffset);
6746	State.addLoc(V: It);
6747	}
6748	PendingLocs.clear();
6749	PendingArgFlags.clear();
6750	return false;
6751	}
6752	assert((!UseGPRForFloat \|\| LocVT == GRLenVT) &&
6753	"Expected an GRLenVT at this stage");
6754
6755	if (Reg) {
6756	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6757	return false;
6758	}
6759
6760	// When a floating-point value is passed on the stack, no bit-cast is needed.
6761	if (ValVT.isFloatingPoint()) {
6762	LocVT = ValVT;
6763	LocInfo = CCValAssign::Full;
6764	}
6765
6766	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
6767	return false;
6768	}
6769
6770	void LoongArchTargetLowering::analyzeInputArgs(
6771	MachineFunction &MF, CCState &CCInfo,
6772	const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
6773	LoongArchCCAssignFn Fn) const {
6774	FunctionType *FType = MF.getFunction().getFunctionType();
6775	for (unsigned i = `0`, e = Ins.size(); i != e; ++i) {
6776	MVT ArgVT = Ins [i].VT;
6777	Type ArgTy = nullptr*;
6778	if (IsRet)
6779	ArgTy = FType->getReturnType();
6780	else if (Ins [i].isOrigArg())
6781	ArgTy = FType->getParamType(i: Ins [i].getOrigArgIndex());
6782	LoongArchABI::ABI ABI =
6783	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6784	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins [i].Flags,
6785	CCInfo, /IsFixed=/true, IsRet, ArgTy)) {
6786	LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
6787	<< `'\n'`);
6788	llvm_unreachable("");
6789	}
6790	}
6791	}
6792
6793	void LoongArchTargetLowering::analyzeOutputArgs(
6794	MachineFunction &MF, CCState &CCInfo,
6795	const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
6796	CallLoweringInfo CLI, LoongArchCCAssignFn Fn) const* {
6797	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
6798	MVT ArgVT = Outs [i].VT;
6799	Type OrigTy = CLI ? CLI->getArgs()[Outs [i].OrigArgIndex].Ty : nullptr*;
6800	LoongArchABI::ABI ABI =
6801	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6802	if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs [i].Flags,
6803	CCInfo, Outs [i].IsFixed, IsRet, OrigTy)) {
6804	LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
6805	<< "\n");
6806	llvm_unreachable("");
6807	}
6808	}
6809	}
6810
6811	// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
6812	// values.
6813	static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
6814	const CCValAssign &VA, const SDLoc &DL) {
6815	switch (VA.getLocInfo()) {
6816	default:
6817	llvm_unreachable("Unexpected CCValAssign::LocInfo");
6818	case CCValAssign::Full:
6819	case CCValAssign::Indirect:
6820	break;
6821	case CCValAssign::BCvt:
6822	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6823	Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
6824	else
6825	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
6826	break;
6827	}
6828	return Val;
6829	}
6830
6831	static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
6832	const CCValAssign &VA, const SDLoc &DL,
6833	const ISD::InputArg &In,
6834	const LoongArchTargetLowering &TLI) {
6835	MachineFunction &MF = DAG.getMachineFunction();
6836	MachineRegisterInfo &RegInfo = MF.getRegInfo();
6837	EVT LocVT = VA.getLocVT();
6838	SDValue Val;
6839	const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
6840	Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
6841	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
6842	Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
6843
6844	// If input is sign extended from 32 bits, note it for the OptW pass.
6845	if (In.isOrigArg()) {
6846	Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
6847	if (OrigArg->getType()->isIntegerTy()) {
6848	unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
6849	// An input zero extended from i31 can also be considered sign extended.
6850	if ((BitWidth <= `32` && In.Flags.isSExt()) \|\|
6851	(BitWidth < `32` && In.Flags.isZExt())) {
6852	LoongArchMachineFunctionInfo *LAFI =
6853	MF.getInfo<LoongArchMachineFunctionInfo>();
6854	LAFI->addSExt32Register(Reg: VReg);
6855	}
6856	}
6857	}
6858
6859	return convertLocVTToValVT(DAG, Val, VA, DL);
6860	}
6861
6862	// The caller is responsible for loading the full value if the argument is
6863	// passed with CCValAssign::Indirect.
6864	static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
6865	const CCValAssign &VA, const SDLoc &DL) {
6866	MachineFunction &MF = DAG.getMachineFunction();
6867	MachineFrameInfo &MFI = MF.getFrameInfo();
6868	EVT ValVT = VA.getValVT();
6869	int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
6870	/IsImmutable=/true);
6871	SDValue FIN = DAG.getFrameIndex(
6872	FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: `0`)));
6873
6874	ISD::LoadExtType ExtType;
6875	switch (VA.getLocInfo()) {
6876	default:
6877	llvm_unreachable("Unexpected CCValAssign::LocInfo");
6878	case CCValAssign::Full:
6879	case CCValAssign::Indirect:
6880	case CCValAssign::BCvt:
6881	ExtType = ISD::NON_EXTLOAD;
6882	break;
6883	}
6884	return DAG.getExtLoad(
6885	ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
6886	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
6887	}
6888
6889	static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
6890	const CCValAssign &VA,
6891	const CCValAssign &HiVA,
6892	const SDLoc &DL) {
6893	assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
6894	"Unexpected VA");
6895	MachineFunction &MF = DAG.getMachineFunction();
6896	MachineFrameInfo &MFI = MF.getFrameInfo();
6897	MachineRegisterInfo &RegInfo = MF.getRegInfo();
6898
6899	assert(VA.isRegLoc() && "Expected register VA assignment");
6900
6901	Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6902	RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
6903	SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
6904	SDValue Hi;
6905	if (HiVA.isMemLoc()) {
6906	// Second half of f64 is passed on the stack.
6907	int FI = MFI.CreateFixedObject(Size: `4`, SPOffset: HiVA.getLocMemOffset(),
6908	/IsImmutable=/true);
6909	SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
6910	Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
6911	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
6912	} else {
6913	// Second half of f64 is passed in another GPR.
6914	Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6915	RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
6916	Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
6917	}
6918	return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
6919	}
6920
6921	static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
6922	const CCValAssign &VA, const SDLoc &DL) {
6923	EVT LocVT = VA.getLocVT();
6924
6925	switch (VA.getLocInfo()) {
6926	default:
6927	llvm_unreachable("Unexpected CCValAssign::LocInfo");
6928	case CCValAssign::Full:
6929	break;
6930	case CCValAssign::BCvt:
6931	if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6932	Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
6933	else
6934	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
6935	break;
6936	}
6937	return Val;
6938	}
6939
6940	static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
6941	CCValAssign::LocInfo LocInfo,
6942	ISD::ArgFlagsTy ArgFlags, CCState &State) {
6943	if (LocVT == MVT::i32 \|\| LocVT == MVT::i64) {
6944	// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
6945	// s0 s1 s2 s3 s4 s5 s6 s7 s8
6946	static const MCPhysReg GPRList[] = {
6947	LoongArch::R23, LoongArch::R24, LoongArch::R25,
6948	LoongArch::R26, LoongArch::R27, LoongArch::R28,
6949	LoongArch::R29, LoongArch::R30, LoongArch::R31};
6950	if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) {
6951	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6952	return false;
6953	}
6954	}
6955
6956	if (LocVT == MVT::f32) {
6957	// Pass in STG registers: F1, F2, F3, F4
6958	// fs0,fs1,fs2,fs3
6959	static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
6960	LoongArch::F26, LoongArch::F27};
6961	if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) {
6962	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6963	return false;
6964	}
6965	}
6966
6967	if (LocVT == MVT::f64) {
6968	// Pass in STG registers: D1, D2, D3, D4
6969	// fs4,fs5,fs6,fs7
6970	static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
6971	LoongArch::F30_64, LoongArch::F31_64};
6972	if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) {
6973	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6974	return false;
6975	}
6976	}
6977
6978	report_fatal_error(reason: "No registers left in GHC calling convention");
6979	return true;
6980	}
6981
6982	// Transform physical registers into virtual registers.
6983	SDValue LoongArchTargetLowering::LowerFormalArguments(
6984	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
6985	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
6986	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6987
6988	MachineFunction &MF = DAG.getMachineFunction();
6989
6990	switch (CallConv) {
6991	default:
6992	llvm_unreachable("Unsupported calling convention");
6993	case CallingConv::C:
6994	case CallingConv::Fast:
6995	break;
6996	case CallingConv::GHC:
6997	if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) \|\|
6998	!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
6999	report_fatal_error(
7000	reason: "GHC calling convention requires the F and D extensions");
7001	}
7002
7003	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7004	MVT GRLenVT = Subtarget.getGRLenVT();
7005	unsigned GRLenInBytes = Subtarget.getGRLen() / `8`;
7006	// Used with varargs to acumulate store chains.
7007	std::vector<SDValue> OutChains;
7008
7009	// Assign locations to all of the incoming arguments.
7010	SmallVector<CCValAssign> ArgLocs;
7011	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7012
7013	if (CallConv == CallingConv::GHC)
7014	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
7015	else
7016	analyzeInputArgs(MF, CCInfo, Ins, /IsRet=/false, Fn: CC_LoongArch);
7017
7018	for (unsigned i = `0`, e = ArgLocs.size(), InsIdx = `0`; i != e; ++i, ++InsIdx) {
7019	CCValAssign &VA = ArgLocs [i];
7020	SDValue ArgValue;
7021	// Passing f64 on LA32D with a soft float ABI must be handled as a special
7022	// case.
7023	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7024	assert(VA.needsCustom());
7025	ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs [++i], DL);
7026	} else if (VA.isRegLoc())
7027	ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins [InsIdx], TLI: *this);
7028	else
7029	ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7030	if (VA.getLocInfo() == CCValAssign::Indirect) {
7031	// If the original argument was split and passed by reference, we need to
7032	// load all parts of it here (using the same address).
7033	InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
7034	PtrInfo: MachinePointerInfo ()));
7035	unsigned ArgIndex = Ins [InsIdx].OrigArgIndex;
7036	unsigned ArgPartOffset = Ins [InsIdx].PartOffset;
7037	assert(ArgPartOffset == `0`);
7038	while (i + `1` != e && Ins [InsIdx + `1`].OrigArgIndex == ArgIndex) {
7039	CCValAssign &PartVA = ArgLocs [i + `1`];
7040	unsigned PartOffset = Ins [InsIdx + `1`].PartOffset - ArgPartOffset;
7041	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
7042	SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
7043	InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
7044	PtrInfo: MachinePointerInfo ()));
7045	++i;
7046	++InsIdx;
7047	}
7048	continue;
7049	}
7050	InVals.push_back(Elt: ArgValue);
7051	}
7052
7053	if (IsVarArg) {
7054	ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
7055	unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
7056	const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7057	MachineFrameInfo &MFI = MF.getFrameInfo();
7058	MachineRegisterInfo &RegInfo = MF.getRegInfo();
7059	auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7060
7061	// Offset of the first variable argument from stack pointer, and size of
7062	// the vararg save area. For now, the varargs save area is either zero or
7063	// large enough to hold a0-a7.
7064	int VaArgOffset, VarArgsSaveSize;
7065
7066	// If all registers are allocated, then all varargs must be passed on the
7067	// stack and we don't need to save any argregs.
7068	if (ArgRegs.size() == Idx) {
7069	VaArgOffset = CCInfo.getStackSize();
7070	VarArgsSaveSize = `0`;
7071	} else {
7072	VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7073	VaArgOffset = -VarArgsSaveSize;
7074	}
7075
7076	// Record the frame index of the first variable argument
7077	// which is a value necessary to VASTART.
7078	int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
7079	LoongArchFI->setVarArgsFrameIndex(FI);
7080
7081	// If saving an odd number of registers then create an extra stack slot to
7082	// ensure that the frame pointer is 2GRLen-aligned, which in turn ensures*
7083	// offsets to even-numbered registered remain 2GRLen-aligned.*
7084	if (Idx % `2`) {
7085	MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
7086	IsImmutable: true);
7087	VarArgsSaveSize += GRLenInBytes;
7088	}
7089
7090	// Copy the integer registers that may have been used for passing varargs
7091	// to the vararg save area.
7092	for (unsigned I = Idx; I < ArgRegs.size();
7093	++I, VaArgOffset += GRLenInBytes) {
7094	const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
7095	RegInfo.addLiveIn(Reg: ArgRegs [I], vreg: Reg);
7096	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
7097	FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
7098	SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
7099	SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
7100	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
7101	cast<StoreSDNode>(Val: Store.getNode())
7102	->getMemOperand()
7103	->setValue((Value )nullptr*);
7104	OutChains.push_back(x: Store);
7105	}
7106	LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7107	}
7108
7109	// All stores are grouped in one node to allow the matching between
7110	// the size of Ins and InVals. This only happens for vararg functions.
7111	if (!OutChains.empty()) {
7112	OutChains.push_back(x: Chain);
7113	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
7114	}
7115
7116	return Chain;
7117	}
7118
7119	bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
7120	return CI->isTailCall();
7121	}
7122
7123	// Check if the return value is used as only a return value, as otherwise
7124	// we can't perform a tail-call.
7125	bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
7126	SDValue &Chain) const {
7127	if (N->getNumValues() != `1`)
7128	return false;
7129	if (!N->hasNUsesOfValue(NUses: `1`, Value: `0`))
7130	return false;
7131
7132	SDNode Copy = N->user_begin();
7133	if (Copy->getOpcode() != ISD::CopyToReg)
7134	return false;
7135
7136	// If the ISD::CopyToReg has a glue operand, we conservatively assume it
7137	// isn't safe to perform a tail call.
7138	if (Copy->getGluedNode())
7139	return false;
7140
7141	// The copy must be used by a LoongArchISD::RET, and nothing else.
7142	bool HasRet = false;
7143	for (SDNode *Node : Copy->users()) {
7144	if (Node->getOpcode() != LoongArchISD::RET)
7145	return false;
7146	HasRet = true;
7147	}
7148
7149	if (!HasRet)
7150	return false;
7151
7152	Chain = Copy->getOperand(Num: `0`);
7153	return true;
7154	}
7155
7156	// Check whether the call is eligible for tail call optimization.
7157	bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7158	CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7159	const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7160
7161	auto CalleeCC = CLI.CallConv;
7162	auto &Outs = CLI.Outs;
7163	auto &Caller = MF.getFunction();
7164	auto CallerCC = Caller.getCallingConv();
7165
7166	// Do not tail call opt if the stack is used to pass parameters.
7167	if (CCInfo.getStackSize() != `0`)
7168	return false;
7169
7170	// Do not tail call opt if any parameters need to be passed indirectly.
7171	for (auto &VA : ArgLocs)
7172	if (VA.getLocInfo() == CCValAssign::Indirect)
7173	return false;
7174
7175	// Do not tail call opt if either caller or callee uses struct return
7176	// semantics.
7177	auto IsCallerStructRet = Caller.hasStructRetAttr();
7178	auto IsCalleeStructRet = Outs.empty() ? false : Outs [`0`].Flags.isSRet();
7179	if (IsCallerStructRet \|\| IsCalleeStructRet)
7180	return false;
7181
7182	// Do not tail call opt if either the callee or caller has a byval argument.
7183	for (auto &Arg : Outs)
7184	if (Arg.Flags.isByVal())
7185	return false;
7186
7187	// The callee has to preserve all registers the caller needs to preserve.
7188	const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7189	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7190	if (CalleeCC != CallerCC) {
7191	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7192	if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
7193	return false;
7194	}
7195	return true;
7196	}
7197
7198	static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
7199	return DAG.getDataLayout().getPrefTypeAlign(
7200	Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
7201	}
7202
7203	// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7204	// and output parameter nodes.
7205	SDValue
7206	LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
7207	SmallVectorImpl<SDValue> &InVals) const {
7208	SelectionDAG &DAG = CLI.DAG;
7209	SDLoc &DL = CLI.DL;
7210	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
7211	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7212	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
7213	SDValue Chain = CLI.Chain;
7214	SDValue Callee = CLI.Callee;
7215	CallingConv::ID CallConv = CLI.CallConv;
7216	bool IsVarArg = CLI.IsVarArg;
7217	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7218	MVT GRLenVT = Subtarget.getGRLenVT();
7219	bool &IsTailCall = CLI.IsTailCall;
7220
7221	MachineFunction &MF = DAG.getMachineFunction();
7222
7223	// Analyze the operands of the call, assigning locations to each operand.
7224	SmallVector<CCValAssign> ArgLocs;
7225	CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7226
7227	if (CallConv == CallingConv::GHC)
7228	ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
7229	else
7230	analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /IsRet=/false, CLI: &CLI, Fn: CC_LoongArch);
7231
7232	// Check if it's really possible to do a tail call.
7233	if (IsTailCall)
7234	IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
7235
7236	if (IsTailCall)
7237	++NumTailCalls;
7238	else if (CLI.CB && CLI.CB->isMustTailCall())
7239	report_fatal_error(reason: "failed to perform tail call elimination on a call "
7240	"site marked musttail");
7241
7242	// Get a count of how many bytes are to be pushed on the stack.
7243	unsigned NumBytes = ArgCCInfo.getStackSize();
7244
7245	// Create local copies for byval args.
7246	SmallVector<SDValue> ByValArgs;
7247	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
7248	ISD::ArgFlagsTy Flags = Outs [i].Flags;
7249	if (!Flags.isByVal())
7250	continue;
7251
7252	SDValue Arg = OutVals [i];
7253	unsigned Size = Flags.getByValSize();
7254	Align Alignment = Flags.getNonZeroByValAlign();
7255
7256	int FI =
7257	MF.getFrameInfo().CreateStackObject(Size, Alignment, /isSS=/isSpillSlot: false);
7258	SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
7259	SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
7260
7261	Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
7262	/IsVolatile=/isVol: false,
7263	/AlwaysInline=/false, /CI=/nullptr, OverrideTailCall: std::nullopt,
7264	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
7265	ByValArgs.push_back(Elt: FIPtr);
7266	}
7267
7268	if (!IsTailCall)
7269	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: CLI.DL);
7270
7271	// Copy argument values to their designated locations.
7272	SmallVector<std::pair<Register, SDValue>> RegsToPass;
7273	SmallVector<SDValue> MemOpChains;
7274	SDValue StackPtr;
7275	for (unsigned i = `0`, j = `0`, e = ArgLocs.size(), OutIdx = `0`; i != e;
7276	++i, ++OutIdx) {
7277	CCValAssign &VA = ArgLocs [i];
7278	SDValue ArgValue = OutVals [OutIdx];
7279	ISD::ArgFlagsTy Flags = Outs [OutIdx].Flags;
7280
7281	// Handle passing f64 on LA32D with a soft float ABI as a special case.
7282	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7283	assert(VA.isRegLoc() && "Expected register VA assignment");
7284	assert(VA.needsCustom());
7285	SDValue SplitF64 =
7286	DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
7287	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
7288	SDValue Lo = SplitF64.getValue(R: `0`);
7289	SDValue Hi = SplitF64.getValue(R: `1`);
7290
7291	Register RegLo = VA.getLocReg();
7292	RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
7293
7294	// Get the CCValAssign for the Hi part.
7295	CCValAssign &HiVA = ArgLocs [++i];
7296
7297	if (HiVA.isMemLoc()) {
7298	// Second half of f64 is passed on the stack.
7299	if (!StackPtr.getNode())
7300	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
7301	SDValue Address =
7302	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
7303	N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
7304	// Emit the store.
7305	MemOpChains.push_back(Elt: DAG.getStore(
7306	Chain, dl: DL, Val: Hi, Ptr: Address,
7307	PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
7308	} else {
7309	// Second half of f64 is passed in another GPR.
7310	Register RegHigh = HiVA.getLocReg();
7311	RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
7312	}
7313	continue;
7314	}
7315
7316	// Promote the value if needed.
7317	// For now, only handle fully promoted and indirect arguments.
7318	if (VA.getLocInfo() == CCValAssign::Indirect) {
7319	// Store the argument in a stack slot and pass its address.
7320	Align StackAlign =
7321	std::max(a: getPrefTypeAlign(VT: Outs [OutIdx].ArgVT, DAG),
7322	b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
7323	TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7324	// If the original argument was split and passed by reference, we need to
7325	// store the required parts of it here (and pass just one address).
7326	unsigned ArgIndex = Outs [OutIdx].OrigArgIndex;
7327	unsigned ArgPartOffset = Outs [OutIdx].PartOffset;
7328	assert(ArgPartOffset == `0`);
7329	// Calculate the total size to store. We don't have access to what we're
7330	// actually storing other than performing the loop and collecting the
7331	// info.
7332	SmallVector<std::pair<SDValue, SDValue>> Parts;
7333	while (i + `1` != e && Outs [OutIdx + `1`].OrigArgIndex == ArgIndex) {
7334	SDValue PartValue = OutVals [OutIdx + `1`];
7335	unsigned PartOffset = Outs [OutIdx + `1`].PartOffset - ArgPartOffset;
7336	SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
7337	EVT PartVT = PartValue.getValueType();
7338
7339	StoredSize += PartVT.getStoreSize();
7340	StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
7341	Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
7342	++i;
7343	++OutIdx;
7344	}
7345	SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
7346	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
7347	MemOpChains.push_back(
7348	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
7349	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
7350	for (const auto &Part : Parts) {
7351	SDValue PartValue = Part.first;
7352	SDValue PartOffset = Part.second;
7353	SDValue Address =
7354	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
7355	MemOpChains.push_back(
7356	Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
7357	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
7358	}
7359	ArgValue = SpillSlot;
7360	} else {
7361	ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
7362	}
7363
7364	// Use local copy if it is a byval arg.
7365	if (Flags.isByVal())
7366	ArgValue = ByValArgs [j++];
7367
7368	if (VA.isRegLoc()) {
7369	// Queue up the argument copies and emit them at the end.
7370	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
7371	} else {
7372	assert(VA.isMemLoc() && "Argument not register or memory");
7373	assert(!IsTailCall && "Tail call not allowed if stack is used "
7374	"for passing parameters");
7375
7376	// Work out the address of the stack slot.
7377	if (!StackPtr.getNode())
7378	StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
7379	SDValue Address =
7380	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
7381	N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
7382
7383	// Emit the store.
7384	MemOpChains.push_back(
7385	Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo ()));
7386	}
7387	}
7388
7389	// Join the stores, which are independent of one another.
7390	if (!MemOpChains.empty())
7391	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
7392
7393	SDValue Glue;
7394
7395	// Build a sequence of copy-to-reg nodes, chained and glued together.
7396	for (auto &Reg : RegsToPass) {
7397	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
7398	Glue = Chain.getValue(R: `1`);
7399	}
7400
7401	// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7402	// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7403	// split it and then direct call can be matched by PseudoCALL.
7404	if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
7405	const GlobalValue *GV = S->getGlobal();
7406	unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
7407	? LoongArchII::MO_CALL
7408	: LoongArchII::MO_CALL_PLT;
7409	Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: `0`, TargetFlags: OpFlags);
7410	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
7411	unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
7412	? LoongArchII::MO_CALL
7413	: LoongArchII::MO_CALL_PLT;
7414	Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
7415	}
7416
7417	// The first call operand is the chain and the second is the target address.
7418	SmallVector<SDValue> Ops;
7419	Ops.push_back(Elt: Chain);
7420	Ops.push_back(Elt: Callee);
7421
7422	// Add argument registers to the end of the list so that they are
7423	// known live into the call.
7424	for (auto &Reg : RegsToPass)
7425	Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
7426
7427	if (!IsTailCall) {
7428	// Add a register mask operand representing the call-preserved registers.
7429	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7430	const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7431	assert(Mask && "Missing call preserved mask for calling convention");
7432	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
7433	}
7434
7435	// Glue the call to the argument copies, if any.
7436	if (Glue.getNode())
7437	Ops.push_back(Elt: Glue);
7438
7439	// Emit the call.
7440	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
7441	unsigned Op;
7442	switch (DAG.getTarget().getCodeModel()) {
7443	default:
7444	report_fatal_error(reason: "Unsupported code model");
7445	case CodeModel::Small:
7446	Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
7447	break;
7448	case CodeModel::Medium:
7449	assert(Subtarget.is64Bit() && "Medium code model requires LA64");
7450	Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
7451	break;
7452	case CodeModel::Large:
7453	assert(Subtarget.is64Bit() && "Large code model requires LA64");
7454	Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
7455	break;
7456	}
7457
7458	if (IsTailCall) {
7459	MF.getFrameInfo().setHasTailCall();
7460	SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
7461	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
7462	return Ret;
7463	}
7464
7465	Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
7466	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
7467	Glue = Chain.getValue(R: `1`);
7468
7469	// Mark the end of the call, which is glued to the call itself.
7470	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue, DL);
7471	Glue = Chain.getValue(R: `1`);
7472
7473	// Assign locations to each value returned by this call.
7474	SmallVector<CCValAssign> RVLocs;
7475	CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7476	analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /IsRet=/true, Fn: CC_LoongArch);
7477
7478	// Copy all of the result registers out of their specified physreg.
7479	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
7480	auto &VA = RVLocs [i];
7481	// Copy the value out.
7482	SDValue RetValue =
7483	DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
7484	// Glue the RetValue to the end of the call sequence.
7485	Chain = RetValue.getValue(R: `1`);
7486	Glue = RetValue.getValue(R: `2`);
7487
7488	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7489	assert(VA.needsCustom());
7490	SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs [++i].getLocReg(),
7491	VT: MVT::i32, Glue);
7492	Chain = RetValue2.getValue(R: `1`);
7493	Glue = RetValue2.getValue(R: `2`);
7494	RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64,
7495	N1: RetValue, N2: RetValue2);
7496	} else
7497	RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
7498
7499	InVals.push_back(Elt: RetValue);
7500	}
7501
7502	return Chain;
7503	}
7504
7505	bool LoongArchTargetLowering::CanLowerReturn(
7506	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7507	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
7508	const Type RetTy) const* {
7509	SmallVector<CCValAssign> RVLocs;
7510	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7511
7512	for (unsigned i = `0`, e = Outs.size(); i != e; ++i) {
7513	LoongArchABI::ABI ABI =
7514	MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7515	if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs [i].VT, LocInfo: CCValAssign::Full,
7516	ArgFlags: Outs [i].Flags, State&: CCInfo, /IsFixed=/true, /IsRet=/true,
7517	OrigTy: nullptr))
7518	return false;
7519	}
7520	return true;
7521	}
7522
7523	SDValue LoongArchTargetLowering::LowerReturn(
7524	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7525	const SmallVectorImpl<ISD::OutputArg> &Outs,
7526	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
7527	SelectionDAG &DAG) const {
7528	// Stores the assignment of the return value to a location.
7529	SmallVector<CCValAssign> RVLocs;
7530
7531	// Info about the registers and stack slot.
7532	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7533	*DAG.getContext());
7534
7535	analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /IsRet=/true,
7536	CLI: nullptr, Fn: CC_LoongArch);
7537	if (CallConv == CallingConv::GHC && !RVLocs.empty())
7538	report_fatal_error(reason: "GHC functions return void only");
7539	SDValue Glue;
7540	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
7541
7542	// Copy the result values into the output registers.
7543	for (unsigned i = `0`, e = RVLocs.size(), OutIdx = `0`; i < e; ++i, ++OutIdx) {
7544	SDValue Val = OutVals [OutIdx];
7545	CCValAssign &VA = RVLocs [i];
7546	assert(VA.isRegLoc() && "Can only return in registers!");
7547
7548	if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7549	// Handle returning f64 on LA32D with a soft float ABI.
7550	assert(VA.isRegLoc() && "Expected return via registers");
7551	assert(VA.needsCustom());
7552	SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
7553	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
7554	SDValue Lo = SplitF64.getValue(R: `0`);
7555	SDValue Hi = SplitF64.getValue(R: `1`);
7556	Register RegLo = VA.getLocReg();
7557	Register RegHi = RVLocs [++i].getLocReg();
7558
7559	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
7560	Glue = Chain.getValue(R: `1`);
7561	RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
7562	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
7563	Glue = Chain.getValue(R: `1`);
7564	RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
7565	} else {
7566	// Handle a 'normal' return.
7567	Val = convertValVTToLocVT(DAG, Val, VA, DL);
7568	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
7569
7570	// Guarantee that all emitted copies are stuck together.
7571	Glue = Chain.getValue(R: `1`);
7572	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7573	}
7574	}
7575
7576	RetOps [`0`] = Chain; // Update chain.
7577
7578	// Add the glue node if we have it.
7579	if (Glue.getNode())
7580	RetOps.push_back(Elt: Glue);
7581
7582	return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
7583	}
7584
7585	bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
7586	EVT VT) const {
7587	if (!Subtarget.hasExtLSX())
7588	return false;
7589
7590	if (VT == MVT::f32) {
7591	uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & `0x7e07ffff`;
7592	return (masked == `0x3e000000` \|\| masked == `0x40000000`);
7593	}
7594
7595	if (VT == MVT::f64) {
7596	uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & `0x7fc0ffffffffffff`;
7597	return (masked == `0x3fc0000000000000` \|\| masked == `0x4000000000000000`);
7598	}
7599
7600	return false;
7601	}
7602
7603	bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7604	bool ForCodeSize) const {
7605	// TODO: Maybe need more checks here after vector extension is supported.
7606	if (VT == MVT::f32 && !Subtarget.hasBasicF())
7607	return false;
7608	if (VT == MVT::f64 && !Subtarget.hasBasicD())
7609	return false;
7610	return (Imm.isZero() \|\| Imm.isExactlyValue(V: `1.0`) \|\| isFPImmVLDILegal(Imm, VT));
7611	}
7612
7613	bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type ) const* {
7614	return true;
7615	}
7616
7617	bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type ) const* {
7618	return true;
7619	}
7620
7621	bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
7622	const Instruction I) const* {
7623	if (!Subtarget.is64Bit())
7624	return isa<LoadInst>(Val: I) \|\| isa<StoreInst>(Val: I);
7625
7626	if (isa<LoadInst>(Val: I))
7627	return true;
7628
7629	// On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
7630	// require fences beacuse we can use amswap_db.[w/d].
7631	Type *Ty = I->getOperand(i: `0`)->getType();
7632	if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) {
7633	unsigned Size = Ty->getIntegerBitWidth();
7634	return (Size == `8` \|\| Size == `16`);
7635	}
7636
7637	return false;
7638	}
7639
7640	EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
7641	LLVMContext &Context,
7642	EVT VT) const {
7643	if (!VT.isVector())
7644	return getPointerTy(DL);
7645	return VT.changeVectorElementTypeToInteger();
7646	}
7647
7648	bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
7649	// TODO: Support vectors.
7650	return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
7651	}
7652
7653	bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
7654	const CallInst &I,
7655	MachineFunction &MF,
7656	unsigned Intrinsic) const {
7657	switch (Intrinsic) {
7658	default:
7659	return false;
7660	case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
7661	case Intrinsic::loongarch_masked_atomicrmw_add_i32:
7662	case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
7663	case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
7664	Info.opc = ISD::INTRINSIC_W_CHAIN;
7665	Info.memVT = MVT::i32;
7666	Info.ptrVal = I.getArgOperand(i: `0`);
7667	Info.offset = `0`;
7668	Info.align = Align (`4`);
7669	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
7670	MachineMemOperand::MOVolatile;
7671	return true;
7672	// TODO: Add more Intrinsics later.
7673	}
7674	}
7675
7676	// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
7677	// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
7678	// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
7679	// regression, we need to implement it manually.
7680	void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst AI) const* {
7681	AtomicRMWInst::BinOp Op = AI->getOperation();
7682
7683	assert((Op == AtomicRMWInst::Or \|\| Op == AtomicRMWInst::Xor \|\|
7684	Op == AtomicRMWInst::And) &&
7685	"Unable to expand");
7686	unsigned MinWordSize = `4`;
7687
7688	IRBuilder<> Builder(AI);
7689	LLVMContext &Ctx = Builder.getContext();
7690	const DataLayout &DL = AI->getDataLayout();
7691	Type *ValueType = AI->getType();
7692	Type WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize `8`);
7693
7694	Value *Addr = AI->getPointerOperand();
7695	PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
7696	IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
7697
7698	Value *AlignedAddr = Builder.CreateIntrinsic(
7699	ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
7700	Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - `1`))}, FMFSource: nullptr,
7701	Name: "AlignedAddr");
7702
7703	Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
7704	Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - `1`, Name: "PtrLSB");
7705	Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: `3`);
7706	ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt");
7707	Value *Mask = Builder.CreateShl(
7708	LHS: ConstantInt::get(Ty: WordType,
7709	V: (`1` << (DL.getTypeStoreSize(Ty: ValueType) * `8`)) - `1`),
7710	RHS: ShiftAmt, Name: "Mask");
7711	Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask");
7712	Value *ValOperand_Shifted =
7713	Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType),
7714	RHS: ShiftAmt, Name: "ValOperand_Shifted");
7715	Value *NewOperand;
7716	if (Op == AtomicRMWInst::And)
7717	NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand");
7718	else
7719	NewOperand = ValOperand_Shifted;
7720
7721	AtomicRMWInst *NewAI =
7722	Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align (MinWordSize),
7723	Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
7724
7725	Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted");
7726	Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted");
7727	Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType);
7728	AI->replaceAllUsesWith(V: FinalOldResult);
7729	AI->eraseFromParent();
7730	}
7731
7732	TargetLowering::AtomicExpansionKind
7733	LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
7734	// TODO: Add more AtomicRMWInst that needs to be extended.
7735
7736	// Since floating-point operation requires a non-trivial set of data
7737	// operations, use CmpXChg to expand.
7738	if (AI->isFloatingPointOperation() \|\|
7739	AI->getOperation() == AtomicRMWInst::UIncWrap \|\|
7740	AI->getOperation() == AtomicRMWInst::UDecWrap \|\|
7741	AI->getOperation() == AtomicRMWInst::USubCond \|\|
7742	AI->getOperation() == AtomicRMWInst::USubSat)
7743	return AtomicExpansionKind::CmpXChg;
7744
7745	if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
7746	(AI->getOperation() == AtomicRMWInst::Xchg \|\|
7747	AI->getOperation() == AtomicRMWInst::Add \|\|
7748	AI->getOperation() == AtomicRMWInst::Sub)) {
7749	return AtomicExpansionKind::None;
7750	}
7751
7752	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
7753	if (Subtarget.hasLAMCAS()) {
7754	if (Size < `32` && (AI->getOperation() == AtomicRMWInst::And \|\|
7755	AI->getOperation() == AtomicRMWInst::Or \|\|
7756	AI->getOperation() == AtomicRMWInst::Xor))
7757	return AtomicExpansionKind::Expand;
7758	if (AI->getOperation() == AtomicRMWInst::Nand \|\| Size < `32`)
7759	return AtomicExpansionKind::CmpXChg;
7760	}
7761
7762	if (Size == `8` \|\| Size == `16`)
7763	return AtomicExpansionKind::MaskedIntrinsic;
7764	return AtomicExpansionKind::None;
7765	}
7766
7767	static Intrinsic::ID
7768	getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
7769	AtomicRMWInst::BinOp BinOp) {
7770	if (GRLen == `64`) {
7771	switch (BinOp) {
7772	default:
7773	llvm_unreachable("Unexpected AtomicRMW BinOp");
7774	case AtomicRMWInst::Xchg:
7775	return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
7776	case AtomicRMWInst::Add:
7777	return Intrinsic::loongarch_masked_atomicrmw_add_i64;
7778	case AtomicRMWInst::Sub:
7779	return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
7780	case AtomicRMWInst::Nand:
7781	return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
7782	case AtomicRMWInst::UMax:
7783	return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
7784	case AtomicRMWInst::UMin:
7785	return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
7786	case AtomicRMWInst::Max:
7787	return Intrinsic::loongarch_masked_atomicrmw_max_i64;
7788	case AtomicRMWInst::Min:
7789	return Intrinsic::loongarch_masked_atomicrmw_min_i64;
7790	// TODO: support other AtomicRMWInst.
7791	}
7792	}
7793
7794	if (GRLen == `32`) {
7795	switch (BinOp) {
7796	default:
7797	llvm_unreachable("Unexpected AtomicRMW BinOp");
7798	case AtomicRMWInst::Xchg:
7799	return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
7800	case AtomicRMWInst::Add:
7801	return Intrinsic::loongarch_masked_atomicrmw_add_i32;
7802	case AtomicRMWInst::Sub:
7803	return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
7804	case AtomicRMWInst::Nand:
7805	return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
7806	case AtomicRMWInst::UMax:
7807	return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
7808	case AtomicRMWInst::UMin:
7809	return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
7810	case AtomicRMWInst::Max:
7811	return Intrinsic::loongarch_masked_atomicrmw_max_i32;
7812	case AtomicRMWInst::Min:
7813	return Intrinsic::loongarch_masked_atomicrmw_min_i32;
7814	// TODO: support other AtomicRMWInst.
7815	}
7816	}
7817
7818	llvm_unreachable("Unexpected GRLen\n");
7819	}
7820
7821	TargetLowering::AtomicExpansionKind
7822	LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
7823	AtomicCmpXchgInst CI) const* {
7824
7825	if (Subtarget.hasLAMCAS())
7826	return AtomicExpansionKind::None;
7827
7828	unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
7829	if (Size == `8` \|\| Size == `16`)
7830	return AtomicExpansionKind::MaskedIntrinsic;
7831	return AtomicExpansionKind::None;
7832	}
7833
7834	Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
7835	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
7836	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
7837	unsigned GRLen = Subtarget.getGRLen();
7838	AtomicOrdering FailOrd = CI->getFailureOrdering();
7839	Value *FailureOrdering =
7840	Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
7841	Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
7842	if (GRLen == `64`) {
7843	CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
7844	CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
7845	NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
7846	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
7847	}
7848	Type *Tys[] = {AlignedAddr->getType()};
7849	Value *Result = Builder.CreateIntrinsic(
7850	ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
7851	if (GRLen == `64`)
7852	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
7853	return Result;
7854	}
7855
7856	Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
7857	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
7858	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
7859	// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
7860	// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
7861	// mask, as this produces better code than the LL/SC loop emitted by
7862	// int_loongarch_masked_atomicrmw_xchg.
7863	if (AI->getOperation() == AtomicRMWInst::Xchg &&
7864	isa<ConstantInt>(Val: AI->getValOperand())) {
7865	ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
7866	if (CVal->isZero())
7867	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
7868	Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
7869	Align: AI->getAlign(), Ordering: Ord);
7870	if (CVal->isMinusOne())
7871	return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
7872	Align: AI->getAlign(), Ordering: Ord);
7873	}
7874
7875	unsigned GRLen = Subtarget.getGRLen();
7876	Value *Ordering =
7877	Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
7878	Type *Tys[] = {AlignedAddr->getType()};
7879	Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
7880	M: AI->getModule(),
7881	id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
7882
7883	if (GRLen == `64`) {
7884	Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
7885	Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
7886	ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
7887	}
7888
7889	Value *Result;
7890
7891	// Must pass the shift amount needed to sign extend the loaded value prior
7892	// to performing a signed comparison for min/max. ShiftAmt is the number of
7893	// bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
7894	// is the number of bits to left+right shift the value in order to
7895	// sign-extend.
7896	if (AI->getOperation() == AtomicRMWInst::Min \|\|
7897	AI->getOperation() == AtomicRMWInst::Max) {
7898	const DataLayout &DL = AI->getDataLayout();
7899	unsigned ValWidth =
7900	DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
7901	Value *SextShamt =
7902	Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
7903	Result = Builder.CreateCall(Callee: LlwOpScwLoop,
7904	Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
7905	} else {
7906	Result =
7907	Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
7908	}
7909
7910	if (GRLen == `64`)
7911	Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
7912	return Result;
7913	}
7914
7915	bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
7916	const MachineFunction &MF, EVT VT) const {
7917	VT = VT.getScalarType();
7918
7919	if (!VT.isSimple())
7920	return false;
7921
7922	switch (VT.getSimpleVT().SimpleTy) {
7923	case MVT::f32:
7924	case MVT::f64:
7925	return true;
7926	default:
7927	break;
7928	}
7929
7930	return false;
7931	}
7932
7933	Register LoongArchTargetLowering::getExceptionPointerRegister(
7934	const Constant PersonalityFn) const* {
7935	return LoongArch::R4;
7936	}
7937
7938	Register LoongArchTargetLowering::getExceptionSelectorRegister(
7939	const Constant PersonalityFn) const* {
7940	return LoongArch::R5;
7941	}
7942
7943	//===----------------------------------------------------------------------===//
7944	// Target Optimization Hooks
7945	//===----------------------------------------------------------------------===//
7946
7947	static int getEstimateRefinementSteps(EVT VT,
7948	const LoongArchSubtarget &Subtarget) {
7949	// Feature FRECIPE instrucions relative accuracy is 2^-14.
7950	// IEEE float has 23 digits and double has 52 digits.
7951	int RefinementSteps = VT.getScalarType() == MVT::f64 ? `2` : `1`;
7952	return RefinementSteps;
7953	}
7954
7955	SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
7956	SelectionDAG &DAG, int Enabled,
7957	int &RefinementSteps,
7958	bool &UseOneConstNR,
7959	bool Reciprocal) const {
7960	if (Subtarget.hasFrecipe()) {
7961	SDLoc DL(Operand);
7962	EVT VT = Operand.getValueType();
7963
7964	if (VT == MVT::f32 \|\| (VT == MVT::f64 && Subtarget.hasBasicD()) \|\|
7965	(VT == MVT::v4f32 && Subtarget.hasExtLSX()) \|\|
7966	(VT == MVT::v2f64 && Subtarget.hasExtLSX()) \|\|
7967	(VT == MVT::v8f32 && Subtarget.hasExtLASX()) \|\|
7968	(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
7969
7970	if (RefinementSteps == ReciprocalEstimate::Unspecified)
7971	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
7972
7973	SDValue Estimate = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand);
7974	if (Reciprocal)
7975	Estimate = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Estimate);
7976
7977	return Estimate;
7978	}
7979	}
7980
7981	return SDValue ();
7982	}
7983
7984	SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
7985	SelectionDAG &DAG,
7986	int Enabled,
7987	int &RefinementSteps) const {
7988	if (Subtarget.hasFrecipe()) {
7989	SDLoc DL(Operand);
7990	EVT VT = Operand.getValueType();
7991
7992	if (VT == MVT::f32 \|\| (VT == MVT::f64 && Subtarget.hasBasicD()) \|\|
7993	(VT == MVT::v4f32 && Subtarget.hasExtLSX()) \|\|
7994	(VT == MVT::v2f64 && Subtarget.hasExtLSX()) \|\|
7995	(VT == MVT::v8f32 && Subtarget.hasExtLASX()) \|\|
7996	(VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
7997
7998	if (RefinementSteps == ReciprocalEstimate::Unspecified)
7999	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8000
8001	return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand);
8002	}
8003	}
8004
8005	return SDValue ();
8006	}
8007
8008	//===----------------------------------------------------------------------===//
8009	// LoongArch Inline Assembly Support
8010	//===----------------------------------------------------------------------===//
8011
8012	LoongArchTargetLowering::ConstraintType
8013	LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8014	// LoongArch specific constraints in GCC: config/loongarch/constraints.md
8015	//
8016	// 'f': A floating-point register (if available).
8017	// 'k': A memory operand whose address is formed by a base register and
8018	// (optionally scaled) index register.
8019	// 'l': A signed 16-bit constant.
8020	// 'm': A memory operand whose address is formed by a base register and
8021	// offset that is suitable for use in instructions with the same
8022	// addressing mode as st.w and ld.w.
8023	// 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8024	// instruction)
8025	// 'I': A signed 12-bit constant (for arithmetic instructions).
8026	// 'J': Integer zero.
8027	// 'K': An unsigned 12-bit constant (for logic instructions).
8028	// "ZB": An address that is held in a general-purpose register. The offset is
8029	// zero.
8030	// "ZC": A memory operand whose address is formed by a base register and
8031	// offset that is suitable for use in instructions with the same
8032	// addressing mode as ll.w and sc.w.
8033	if (Constraint.size() == `1`) {
8034	switch (Constraint [`0`]) {
8035	default:
8036	break;
8037	case `'f'`:
8038	case `'q'`:
8039	return C_RegisterClass;
8040	case `'l'`:
8041	case `'I'`:
8042	case `'J'`:
8043	case `'K'`:
8044	return C_Immediate;
8045	case `'k'`:
8046	return C_Memory;
8047	}
8048	}
8049
8050	if (Constraint == "ZC" \|\| Constraint == "ZB")
8051	return C_Memory;
8052
8053	// 'm' is handled here.
8054	return TargetLowering::getConstraintType(Constraint);
8055	}
8056
8057	InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8058	StringRef ConstraintCode) const {
8059	return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8060	.Case(S: "k", Value: InlineAsm::ConstraintCode::k)
8061	.Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
8062	.Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
8063	.Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8064	}
8065
8066	std::pair<unsigned, const TargetRegisterClass *>
8067	LoongArchTargetLowering::getRegForInlineAsmConstraint(
8068	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
8069	// First, see if this is a constraint that directly corresponds to a LoongArch
8070	// register class.
8071	if (Constraint.size() == `1`) {
8072	switch (Constraint [`0`]) {
8073	case `'r'`:
8074	// TODO: Support fixed vectors up to GRLen?
8075	if (VT.isVector())
8076	break;
8077	return std::make_pair(x: `0U`, y: &LoongArch::GPRRegClass);
8078	case `'q'`:
8079	return std::make_pair(x: `0U`, y: &LoongArch::GPRNoR0R1RegClass);
8080	case `'f'`:
8081	if (Subtarget.hasBasicF() && VT == MVT::f32)
8082	return std::make_pair(x: `0U`, y: &LoongArch::FPR32RegClass);
8083	if (Subtarget.hasBasicD() && VT == MVT::f64)
8084	return std::make_pair(x: `0U`, y: &LoongArch::FPR64RegClass);
8085	if (Subtarget.hasExtLSX() &&
8086	TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
8087	return std::make_pair(x: `0U`, y: &LoongArch::LSX128RegClass);
8088	if (Subtarget.hasExtLASX() &&
8089	TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
8090	return std::make_pair(x: `0U`, y: &LoongArch::LASX256RegClass);
8091	break;
8092	default:
8093	break;
8094	}
8095	}
8096
8097	// TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8098	// record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8099	// constraints while the official register name is prefixed with a '$'. So we
8100	// clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8101	// before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8102	// case insensitive, so no need to convert the constraint to upper case here.
8103	//
8104	// For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8105	// decode the usage of register name aliases into their official names. And
8106	// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8107	// official register names.
8108	if (Constraint.starts_with(Prefix: "{$r") \|\| Constraint.starts_with(Prefix: "{$f") \|\|
8109	Constraint.starts_with(Prefix: "{$vr") \|\| Constraint.starts_with(Prefix: "{$xr")) {
8110	bool IsFP = Constraint [`2`] == `'f'`;
8111	std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: `'$'`);
8112	std::pair<unsigned, const TargetRegisterClass *> R;
8113	R = TargetLowering::getRegForInlineAsmConstraint(
8114	TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
8115	// Match those names to the widest floating point register type available.
8116	if (IsFP) {
8117	unsigned RegNo = R.first;
8118	if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8119	if (Subtarget.hasBasicD() && (VT == MVT::f64 \|\| VT == MVT::Other)) {
8120	unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8121	return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
8122	}
8123	}
8124	}
8125	return R;
8126	}
8127
8128	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8129	}
8130
8131	void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8132	SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8133	SelectionDAG &DAG) const {
8134	// Currently only support length 1 constraints.
8135	if (Constraint.size() == `1`) {
8136	switch (Constraint [`0`]) {
8137	case `'l'`:
8138	// Validate & create a 16-bit signed immediate operand.
8139	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
8140	uint64_t CVal = C->getSExtValue();
8141	if (isInt<`16`>(x: CVal))
8142	Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc (Op),
8143	VT: Subtarget.getGRLenVT()));
8144	}
8145	return;
8146	case `'I'`:
8147	// Validate & create a 12-bit signed immediate operand.
8148	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
8149	uint64_t CVal = C->getSExtValue();
8150	if (isInt<`12`>(x: CVal))
8151	Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc (Op),
8152	VT: Subtarget.getGRLenVT()));
8153	}
8154	return;
8155	case `'J'`:
8156	// Validate & create an integer zero operand.
8157	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
8158	if (C->getZExtValue() == `0`)
8159	Ops.push_back(
8160	x: DAG.getTargetConstant(Val: `0`, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
8161	return;
8162	case `'K'`:
8163	// Validate & create a 12-bit unsigned immediate operand.
8164	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
8165	uint64_t CVal = C->getZExtValue();
8166	if (isUInt<`12`>(x: CVal))
8167	Ops.push_back(
8168	x: DAG.getTargetConstant(Val: CVal, DL: SDLoc (Op), VT: Subtarget.getGRLenVT()));
8169	}
8170	return;
8171	default:
8172	break;
8173	}
8174	}
8175	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8176	}
8177
8178	#define GET_REGISTER_MATCHER
8179	#include "LoongArchGenAsmMatcher.inc"
8180
8181	Register
8182	LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
8183	const MachineFunction &MF) const {
8184	std::pair<StringRef, StringRef> Name = StringRef (RegName).split(Separator: `'$'`);
8185	std::string NewRegName = Name.second.str();
8186	Register Reg = MatchRegisterAltName(Name: NewRegName);
8187	if (!Reg)
8188	Reg = MatchRegisterName(Name: NewRegName);
8189	if (!Reg)
8190	return Reg;
8191	BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8192	if (!ReservedRegs.test(Idx: Reg))
8193	report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
8194	StringRef (RegName) + "\"."));
8195	return Reg;
8196	}
8197
8198	bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
8199	EVT VT, SDValue C) const {
8200	// TODO: Support vectors.
8201	if (!VT.isScalarInteger())
8202	return false;
8203
8204	// Omit the optimization if the data size exceeds GRLen.
8205	if (VT.getSizeInBits() > Subtarget.getGRLen())
8206	return false;
8207
8208	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
8209	const APInt &Imm = ConstNode->getAPIntValue();
8210	// Break MUL into (SLLI + ADD/SUB) or ALSL.
8211	if ((Imm + `1`).isPowerOf2() \|\| (Imm - `1`).isPowerOf2() \|\|
8212	(`1` - Imm).isPowerOf2() \|\| (-`1` - Imm).isPowerOf2())
8213	return true;
8214	// Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8215	if (ConstNode->hasOneUse() &&
8216	((Imm - `2`).isPowerOf2() \|\| (Imm - `4`).isPowerOf2() \|\|
8217	(Imm - `8`).isPowerOf2() \|\| (Imm - `16`).isPowerOf2()))
8218	return true;
8219	// Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8220	// in which the immediate has two set bits. Or Break (MUL x, imm)
8221	// into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8222	// equals to (1 << s0) - (1 << s1).
8223	if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -`2048`) && Imm.sle(RHS: `4095`))) {
8224	unsigned Shifts = Imm.countr_zero();
8225	// Reject immediates which can be composed via a single LUI.
8226	if (Shifts >= `12`)
8227	return false;
8228	// Reject multiplications can be optimized to
8229	// (SLLI (ALSL x, x, 1/2/3/4), s).
8230	APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
8231	if (ImmPop == `3` \|\| ImmPop == `5` \|\| ImmPop == `9` \|\| ImmPop == `17`)
8232	return false;
8233	// We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8234	// since it needs one more instruction than other 3 cases.
8235	APInt ImmSmall = APInt (Imm.getBitWidth(), `1ULL` << Shifts, true);
8236	if ((Imm - ImmSmall).isPowerOf2() \|\| (Imm + ImmSmall).isPowerOf2() \|\|
8237	(ImmSmall - Imm).isPowerOf2())
8238	return true;
8239	}
8240	}
8241
8242	return false;
8243	}
8244
8245	bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
8246	const AddrMode &AM,
8247	Type Ty, unsigned* AS,
8248	Instruction I) const* {
8249	// LoongArch has four basic addressing modes:
8250	// 1. reg
8251	// 2. reg + 12-bit signed offset
8252	// 3. reg + 14-bit signed offset left-shifted by 2
8253	// 4. reg1 + reg2
8254	// TODO: Add more checks after support vector extension.
8255
8256	// No global is ever allowed as a base.
8257	if (AM.BaseGV)
8258	return false;
8259
8260	// Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8261	// with `UAL` feature.
8262	if (!isInt<`12`>(x: AM.BaseOffs) &&
8263	!(isShiftedInt<`14`, `2`>(x: AM.BaseOffs) && Subtarget.hasUAL()))
8264	return false;
8265
8266	switch (AM.Scale) {
8267	case `0`:
8268	// "r+i" or just "i", depending on HasBaseReg.
8269	break;
8270	case `1`:
8271	// "r+r+i" is not allowed.
8272	if (AM.HasBaseReg && AM.BaseOffs)
8273	return false;
8274	// Otherwise we have "r+r" or "r+i".
8275	break;
8276	case `2`:
8277	// "2r+r" or "2r+i" is not allowed.
8278	if (AM.HasBaseReg \|\| AM.BaseOffs)
8279	return false;
8280	// Allow "2r" as "r+r".*
8281	break;
8282	default:
8283	return false;
8284	}
8285
8286	return true;
8287	}
8288
8289	bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8290	return isInt<`12`>(x: Imm);
8291	}
8292
8293	bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8294	return isInt<`12`>(x: Imm);
8295	}
8296
8297	bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
8298	// Zexts are free if they can be combined with a load.
8299	// Don't advertise i32->i64 zextload as being free for LA64. It interacts
8300	// poorly with type legalization of compares preferring sext.
8301	if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8302	EVT MemVT = LD->getMemoryVT();
8303	if ((MemVT == MVT::i8 \|\| MemVT == MVT::i16) &&
8304	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
8305	LD->getExtensionType() == ISD::ZEXTLOAD))
8306	return true;
8307	}
8308
8309	return TargetLowering::isZExtFree(Val, VT2);
8310	}
8311
8312	bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
8313	EVT DstVT) const {
8314	return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8315	}
8316
8317	bool LoongArchTargetLowering::signExtendConstant(const ConstantInt CI) const* {
8318	return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: `32`);
8319	}
8320
8321	bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
8322	// TODO: Support vectors.
8323	if (Y.getValueType().isVector())
8324	return false;
8325
8326	return !isa<ConstantSDNode>(Val: Y);
8327	}
8328
8329	ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
8330	// LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8331	return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8332	}
8333
8334	bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
8335	Type Ty, bool* IsSigned) const {
8336	if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: `32`))
8337	return true;
8338
8339	return IsSigned;
8340	}
8341
8342	bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
8343	// Return false to suppress the unnecessary extensions if the LibCall
8344	// arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8345	if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8346	Type.getSizeInBits() < Subtarget.getGRLen()))
8347	return false;
8348	return true;
8349	}
8350
8351	// memcpy, and other memory intrinsics, typically tries to use wider load/store
8352	// if the source/dest is aligned and the copy size is large enough. We therefore
8353	// want to align such objects passed to memory intrinsics.
8354	bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
8355	unsigned &MinSize,
8356	Align &PrefAlign) const {
8357	if (!isa<MemIntrinsic>(Val: CI))
8358	return false;
8359
8360	if (Subtarget.is64Bit()) {
8361	MinSize = `8`;
8362	PrefAlign = Align (`8`);
8363	} else {
8364	MinSize = `4`;
8365	PrefAlign = Align (`4`);
8366	}
8367
8368	return true;
8369	}
8370
8371	TargetLoweringBase::LegalizeTypeAction
8372	LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
8373	if (!VT.isScalableVector() && VT.getVectorNumElements() != `1` &&
8374	VT.getVectorElementType() != MVT::i1)
8375	return TypeWidenVector;
8376
8377	return TargetLoweringBase::getPreferredVectorAction(VT);
8378	}
8379
8380	bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8381	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8382	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8383	bool IsABIRegCopy = CC.has_value();
8384	EVT ValueVT = Val.getValueType();
8385
8386	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
8387	PartVT == MVT::f32) {
8388	// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8389	// nan, and cast to f32.
8390	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
8391	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
8392	Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
8393	N2: DAG.getConstant(Val: `0xFFFF0000`, DL, VT: MVT::i32));
8394	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val);
8395	Parts[`0`] = Val;
8396	return true;
8397	}
8398
8399	return false;
8400	}
8401
8402	SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
8403	SelectionDAG &DAG, const SDLoc &DL, const SDValue Parts, unsigned* NumParts,
8404	MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
8405	bool IsABIRegCopy = CC.has_value();
8406
8407	if (IsABIRegCopy && (ValueVT == MVT::f16 \|\| ValueVT == MVT::bf16) &&
8408	PartVT == MVT::f32) {
8409	SDValue Val = Parts[`0`];
8410
8411	// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
8412	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
8413	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
8414	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
8415	return Val;
8416	}
8417
8418	return SDValue ();
8419	}
8420
8421	MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
8422	CallingConv::ID CC,
8423	EVT VT) const {
8424	// Use f32 to pass f16.
8425	if (VT == MVT::f16 && Subtarget.hasBasicF())
8426	return MVT::f32;
8427
8428	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
8429	}
8430
8431	unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
8432	LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
8433	// Use f32 to pass f16.
8434	if (VT == MVT::f16 && Subtarget.hasBasicF())
8435	return `1`;
8436
8437	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
8438	}
8439
8440	bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
8441	SDValue Op, const APInt &OriginalDemandedBits,
8442	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
8443	unsigned Depth) const {
8444	EVT VT = Op.getValueType();
8445	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
8446	unsigned Opc = Op.getOpcode();
8447	switch (Opc) {
8448	default:
8449	break;
8450	case LoongArchISD::VMSKLTZ:
8451	case LoongArchISD::XVMSKLTZ: {
8452	SDValue Src = Op.getOperand(i: `0`);
8453	MVT SrcVT = Src.getSimpleValueType();
8454	unsigned SrcBits = SrcVT.getScalarSizeInBits();
8455	unsigned NumElts = SrcVT.getVectorNumElements();
8456
8457	// If we don't need the sign bits at all just return zero.
8458	if (OriginalDemandedBits.countr_zero() >= NumElts)
8459	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
8460
8461	// Only demand the vector elements of the sign bits we need.
8462	APInt KnownUndef, KnownZero;
8463	APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts);
8464	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
8465	TLO, Depth: Depth + `1`))
8466	return true;
8467
8468	Known.Zero = KnownZero.zext(width: BitWidth);
8469	Known.Zero.setHighBits(BitWidth - NumElts);
8470
8471	// [X]VMSKLTZ only uses the MSB from each vector element.
8472	KnownBits KnownSrc;
8473	APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits);
8474	if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO,
8475	Depth: Depth + `1`))
8476	return true;
8477
8478	if (KnownSrc.One [SrcBits - `1`])
8479	Known.One.setLowBits(NumElts);
8480	else if (KnownSrc.Zero [SrcBits - `1`])
8481	Known.Zero.setLowBits(NumElts);
8482
8483	// Attempt to avoid multi-use ops if we don't need anything from it.
8484	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
8485	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
8486	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT, Operand: NewSrc));
8487	return false;
8488	}
8489	}
8490
8491	return TargetLowering::SimplifyDemandedBitsForTargetNode(
8492	Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
8493	}
8494

Browse the source code of llvm_projects/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp