1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSelectionDAGInfo.h"
19#include "LoongArchSubtarget.h"
20#include "MCTargetDesc/LoongArchBaseInfo.h"
21#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22#include "MCTargetDesc/LoongArchMatInt.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/CodeGen/ISDOpcodes.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/RuntimeLibcallUtil.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/CodeGen.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/KnownBits.h"
37#include "llvm/Support/MathExtras.h"
38#include <llvm/Analysis/VectorUtils.h>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
46enum MaterializeFPImm {
47 NoMaterializeFPImm = 0,
48 MaterializeFPImm2Ins = 2,
49 MaterializeFPImm3Ins = 3,
50 MaterializeFPImm4Ins = 4,
51 MaterializeFPImm5Ins = 5,
52 MaterializeFPImm6Ins = 6
53};
54
55static cl::opt<MaterializeFPImm> MaterializeFPImmInsNum(
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
60 cl::init(Val: MaterializeFPImm3Ins),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 clEnumValN(MaterializeFPImm2Ins, "2",
63 "Materialize FP immediate within 2 instructions"),
64 clEnumValN(MaterializeFPImm3Ins, "3",
65 "Materialize FP immediate within 3 instructions"),
66 clEnumValN(MaterializeFPImm4Ins, "4",
67 "Materialize FP immediate within 4 instructions"),
68 clEnumValN(MaterializeFPImm5Ins, "5",
69 "Materialize FP immediate within 5 instructions"),
70 clEnumValN(MaterializeFPImm6Ins, "6",
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(Val: false));
77
78LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
107 setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
108 MemVT: MVT::i1, Action: Promote);
109
110 setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
111 setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
112 setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
113 setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
114 setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
115 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
116
117 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
118 ISD::JumpTable, ISD::GlobalTLSAddress},
119 VT: GRLenVT, Action: Custom);
120
121 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
122
123 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Custom);
124 setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
125 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
126 setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
127
128 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
129 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
130
131 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
132 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
133 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
134
135 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
141 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
142 setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
147 setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
148 } else {
149 setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand);
150 setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand);
151 setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand);
152 setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand);
153 setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom);
154 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
155 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
156 }
157
158 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
159 setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
160 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom);
161 setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
162 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
163 setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
164
165 setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
166 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
171 setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
172 setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
173 setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
174 setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
175 setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
176 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
177 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
178 setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
179 setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
180 setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
181 setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
182 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
183 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
184 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
185 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
186 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
187 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
188
189 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
190 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
191 setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32,
192 Action: Custom);
193 setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
199 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
200 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
201 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
202 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
203 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
204 if (Subtarget.hasBasicD())
205 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
206 }
207
208 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
209
210 static const ISD::CondCode FPCCToExpand[] = {
211 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
212 ISD::SETGE, ISD::SETNE, ISD::SETGT};
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
218 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
219 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
220 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
221 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
222
223 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f32, Action: Custom);
224 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
225 setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
226 setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
227 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
228 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
229 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
230 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
231 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
232 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
233 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
234 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
235 setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
236 setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
237 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
238 setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
239 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
240 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32,
241 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
242 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32,
243 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
244 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
245 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
246 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
249 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
250
251 if (!Subtarget.hasBasicD()) {
252 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
253 if (Subtarget.is64Bit()) {
254 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
255 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
264 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
265 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
266 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
267 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
268 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
269 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
270
271 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f64, Action: Custom);
272 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
273 setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
274 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
275 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
276 setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
277 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
278 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
279 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
280 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
281 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
282 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
283 setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
284 setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
285 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
286 setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
287 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
288 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
289 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64,
290 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
291 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
292 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
293 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
296 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
302 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
306 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
307 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
308 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 setOperationAction(Op, VT, Action: Expand);
314 }
315
316 for (MVT VT : LSXVTs) {
317 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
318 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
319 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
320
321 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
322 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
323 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
324
325 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
326 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
327 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
328 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT, Action: Legal);
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
331 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
332 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
333 Action: Legal);
334 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
335 VT, Action: Legal);
336 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
337 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
338 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
339 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
340 setCondCodeAction(
341 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
342 Action: Expand);
343 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
344 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
345 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
346 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
347 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
348 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
349 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
350 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
351 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
352 setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
353 setOperationAction(Op: ISD::AVGFLOORS, VT, Action: Legal);
354 setOperationAction(Op: ISD::AVGFLOORU, VT, Action: Legal);
355 setOperationAction(Op: ISD::AVGCEILS, VT, Action: Legal);
356 setOperationAction(Op: ISD::AVGCEILU, VT, Action: Legal);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
359 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
361 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
363 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
364 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
365 }
366 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Custom);
367 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
368 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
369 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
370 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
371 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
372 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
373 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
374 ISD::SETUGE, ISD::SETUGT},
375 VT, Action: Expand);
376 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
377 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
378 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
379 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
380 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
381 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
382 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
383 }
384 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal);
385 setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal);
386 setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal);
387 setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal);
388 setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal);
389
390 for (MVT VT :
391 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
392 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
393 setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom);
394 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
395 setOperationAction(Op: ISD::VECREDUCE_AND, VT, Action: Custom);
396 setOperationAction(Op: ISD::VECREDUCE_OR, VT, Action: Custom);
397 setOperationAction(Op: ISD::VECREDUCE_XOR, VT, Action: Custom);
398 setOperationAction(Op: ISD::VECREDUCE_SMAX, VT, Action: Custom);
399 setOperationAction(Op: ISD::VECREDUCE_SMIN, VT, Action: Custom);
400 setOperationAction(Op: ISD::VECREDUCE_UMAX, VT, Action: Custom);
401 setOperationAction(Op: ISD::VECREDUCE_UMIN, VT, Action: Custom);
402 }
403 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v2f32, Action: Custom);
404 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v2f32, Action: Custom);
405 // We want to legalize this to an f64 load rather than an i64 load.
406 setOperationAction(Op: ISD::LOAD, VT: MVT::v2f32, Action: Custom);
407 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16})
408 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT, Action: Custom);
409 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16i32, MVT::v8i64,
410 MVT::v16i64})
411 setOperationAction(Op: ISD::SIGN_EXTEND, VT, Action: Custom);
412 }
413
414 // Set operations for 'LASX' feature.
415
416 if (Subtarget.hasExtLASX()) {
417 for (MVT VT : LASXVTs) {
418 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
419 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
420 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
421
422 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
423 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
424 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
425 setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Custom);
426 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT, Action: Legal);
427
428 setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
429 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
430 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
431 }
432 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
433 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
434 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
435 Action: Legal);
436 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
437 VT, Action: Legal);
438 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
439 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
440 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
441 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
442 setCondCodeAction(
443 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
444 Action: Expand);
445 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
446 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
447 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
448 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
449 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
450 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
451 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
452 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
453 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
454 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
455 setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
456 setOperationAction(Op: ISD::AVGFLOORS, VT, Action: Legal);
457 setOperationAction(Op: ISD::AVGFLOORU, VT, Action: Legal);
458 setOperationAction(Op: ISD::AVGCEILS, VT, Action: Legal);
459 setOperationAction(Op: ISD::AVGCEILU, VT, Action: Legal);
460 }
461 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
462 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
463 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
464 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
465 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
466 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
467 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
468 }
469 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
470 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
471 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
472 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
473 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
474 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
475 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
476 ISD::SETUGE, ISD::SETUGT},
477 VT, Action: Expand);
478 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
479 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
480 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
481 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
482 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
483 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
484 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
485 }
486 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v4f32, Action: Custom);
487 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v4f64, Action: Custom);
488 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
489 setOperationAction(Op: ISD::SIGN_EXTEND, VT, Action: Legal);
490 setOperationAction(Op: ISD::ZERO_EXTEND, VT, Action: Legal);
491 }
492 for (MVT VT :
493 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
494 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT, Action: Legal);
495 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT, Action: Legal);
496 }
497 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
498 setOperationAction(Op: ISD::TRUNCATE, VT, Action: Legal);
499 }
500
501 // Set DAG combine for LA32 and LA64.
502 if (Subtarget.hasBasicF()) {
503 setTargetDAGCombine(ISD::SINT_TO_FP);
504 }
505
506 setTargetDAGCombine(ISD::AND);
507 setTargetDAGCombine(ISD::OR);
508 setTargetDAGCombine(ISD::SRL);
509 setTargetDAGCombine(ISD::SETCC);
510
511 // Set DAG combine for 'LSX' feature.
512
513 if (Subtarget.hasExtLSX()) {
514 setTargetDAGCombine(ISD::ADD);
515 setTargetDAGCombine(ISD::SUB);
516 setTargetDAGCombine(ISD::SHL);
517 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
518 setTargetDAGCombine(ISD::BITCAST);
519 setTargetDAGCombine(ISD::VSELECT);
520 setTargetDAGCombine(ISD::FP_TO_SINT);
521 setTargetDAGCombine(ISD::FP_TO_UINT);
522 setTargetDAGCombine(ISD::UINT_TO_FP);
523 }
524
525 // Set DAG combine for 'LASX' feature.
526 if (Subtarget.hasExtLASX()) {
527 setTargetDAGCombine(ISD::ANY_EXTEND);
528 setTargetDAGCombine(ISD::ZERO_EXTEND);
529 setTargetDAGCombine(ISD::SIGN_EXTEND);
530 setTargetDAGCombine(ISD::CONCAT_VECTORS);
531 }
532
533 // Compute derived properties from the register classes.
534 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
535
536 setStackPointerRegisterToSaveRestore(LoongArch::R3);
537
538 setBooleanContents(ZeroOrOneBooleanContent);
539 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
540
541 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
542
543 setMinCmpXchgSizeInBits(32);
544
545 // Function alignments.
546 setMinFunctionAlignment(Align(4));
547 // Set preferred alignments.
548 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
549 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
550 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
551
552 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
553 if (Subtarget.hasLAMCAS())
554 setMinCmpXchgSizeInBits(8);
555
556 if (Subtarget.hasSCQ()) {
557 setMaxAtomicSizeInBitsSupported(128);
558 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom);
559 }
560
561 // Disable strict node mutation.
562 IsStrictFPEnabled = true;
563}
564
565bool LoongArchTargetLowering::isOffsetFoldingLegal(
566 const GlobalAddressSDNode *GA) const {
567 // In order to maximise the opportunity for common subexpression elimination,
568 // keep a separate ADD node for the global address offset instead of folding
569 // it in the global address node. Later peephole optimisations may choose to
570 // fold it back in when profitable.
571 return false;
572}
573
574SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
575 SelectionDAG &DAG) const {
576 switch (Op.getOpcode()) {
577 case ISD::ATOMIC_FENCE:
578 return lowerATOMIC_FENCE(Op, DAG);
579 case ISD::EH_DWARF_CFA:
580 return lowerEH_DWARF_CFA(Op, DAG);
581 case ISD::GlobalAddress:
582 return lowerGlobalAddress(Op, DAG);
583 case ISD::GlobalTLSAddress:
584 return lowerGlobalTLSAddress(Op, DAG);
585 case ISD::INTRINSIC_WO_CHAIN:
586 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
587 case ISD::INTRINSIC_W_CHAIN:
588 return lowerINTRINSIC_W_CHAIN(Op, DAG);
589 case ISD::INTRINSIC_VOID:
590 return lowerINTRINSIC_VOID(Op, DAG);
591 case ISD::BlockAddress:
592 return lowerBlockAddress(Op, DAG);
593 case ISD::JumpTable:
594 return lowerJumpTable(Op, DAG);
595 case ISD::SHL_PARTS:
596 return lowerShiftLeftParts(Op, DAG);
597 case ISD::SRA_PARTS:
598 return lowerShiftRightParts(Op, DAG, IsSRA: true);
599 case ISD::SRL_PARTS:
600 return lowerShiftRightParts(Op, DAG, IsSRA: false);
601 case ISD::ConstantPool:
602 return lowerConstantPool(Op, DAG);
603 case ISD::FP_TO_SINT:
604 return lowerFP_TO_SINT(Op, DAG);
605 case ISD::FP_TO_UINT:
606 return lowerFP_TO_UINT(Op, DAG);
607 case ISD::BITCAST:
608 return lowerBITCAST(Op, DAG);
609 case ISD::UINT_TO_FP:
610 return lowerUINT_TO_FP(Op, DAG);
611 case ISD::SINT_TO_FP:
612 return lowerSINT_TO_FP(Op, DAG);
613 case ISD::VASTART:
614 return lowerVASTART(Op, DAG);
615 case ISD::FRAMEADDR:
616 return lowerFRAMEADDR(Op, DAG);
617 case ISD::RETURNADDR:
618 return lowerRETURNADDR(Op, DAG);
619 case ISD::WRITE_REGISTER:
620 return lowerWRITE_REGISTER(Op, DAG);
621 case ISD::INSERT_VECTOR_ELT:
622 return lowerINSERT_VECTOR_ELT(Op, DAG);
623 case ISD::EXTRACT_VECTOR_ELT:
624 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
625 case ISD::BUILD_VECTOR:
626 return lowerBUILD_VECTOR(Op, DAG);
627 case ISD::CONCAT_VECTORS:
628 return lowerCONCAT_VECTORS(Op, DAG);
629 case ISD::VECTOR_SHUFFLE:
630 return lowerVECTOR_SHUFFLE(Op, DAG);
631 case ISD::BITREVERSE:
632 return lowerBITREVERSE(Op, DAG);
633 case ISD::SCALAR_TO_VECTOR:
634 return lowerSCALAR_TO_VECTOR(Op, DAG);
635 case ISD::PREFETCH:
636 return lowerPREFETCH(Op, DAG);
637 case ISD::SELECT:
638 return lowerSELECT(Op, DAG);
639 case ISD::BRCOND:
640 return lowerBRCOND(Op, DAG);
641 case ISD::FP_TO_FP16:
642 return lowerFP_TO_FP16(Op, DAG);
643 case ISD::FP16_TO_FP:
644 return lowerFP16_TO_FP(Op, DAG);
645 case ISD::FP_TO_BF16:
646 return lowerFP_TO_BF16(Op, DAG);
647 case ISD::BF16_TO_FP:
648 return lowerBF16_TO_FP(Op, DAG);
649 case ISD::VECREDUCE_ADD:
650 return lowerVECREDUCE_ADD(Op, DAG);
651 case ISD::ROTL:
652 case ISD::ROTR:
653 return lowerRotate(Op, DAG);
654 case ISD::VECREDUCE_AND:
655 case ISD::VECREDUCE_OR:
656 case ISD::VECREDUCE_XOR:
657 case ISD::VECREDUCE_SMAX:
658 case ISD::VECREDUCE_SMIN:
659 case ISD::VECREDUCE_UMAX:
660 case ISD::VECREDUCE_UMIN:
661 return lowerVECREDUCE(Op, DAG);
662 case ISD::ConstantFP:
663 return lowerConstantFP(Op, DAG);
664 case ISD::SETCC:
665 return lowerSETCC(Op, DAG);
666 case ISD::FP_ROUND:
667 return lowerFP_ROUND(Op, DAG);
668 case ISD::FP_EXTEND:
669 return lowerFP_EXTEND(Op, DAG);
670 case ISD::SIGN_EXTEND_VECTOR_INREG:
671 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
672 case ISD::DYNAMIC_STACKALLOC:
673 return lowerDYNAMIC_STACKALLOC(Op, DAG);
674 }
675 return SDValue();
676}
677
678// Helper to attempt to return a cheaper, bit-inverted version of \p V.
679static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
680 // TODO: don't always ignore oneuse constraints.
681 V = peekThroughBitcasts(V);
682 EVT VT = V.getValueType();
683
684 // Match not(xor X, -1) -> X.
685 if (V.getOpcode() == ISD::XOR &&
686 (ISD::isBuildVectorAllOnes(N: V.getOperand(i: 1).getNode()) ||
687 isAllOnesConstant(V: V.getOperand(i: 1))))
688 return V.getOperand(i: 0);
689
690 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
691 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
692 (isNullConstant(V: V.getOperand(i: 1)) || V.getOperand(i: 0).hasOneUse())) {
693 if (SDValue Not = isNOT(V: V.getOperand(i: 0), DAG)) {
694 Not = DAG.getBitcast(VT: V.getOperand(i: 0).getValueType(), V: Not);
695 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc(Not), VT, N1: Not,
696 N2: V.getOperand(i: 1));
697 }
698 }
699
700 // Match not(SplatVector(not(X)) -> SplatVector(X).
701 if (V.getOpcode() == ISD::BUILD_VECTOR) {
702 if (SDValue SplatValue =
703 cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
704 if (!V->isOnlyUserOf(N: SplatValue.getNode()))
705 return SDValue();
706
707 if (SDValue Not = isNOT(V: SplatValue, DAG)) {
708 Not = DAG.getBitcast(VT: V.getOperand(i: 0).getValueType(), V: Not);
709 return DAG.getSplat(VT, DL: SDLoc(Not), Op: Not);
710 }
711 }
712 }
713
714 // Match not(or(not(X),not(Y))) -> and(X, Y).
715 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
716 V.getOperand(i: 0).hasOneUse() && V.getOperand(i: 1).hasOneUse()) {
717 // TODO: Handle cases with single NOT operand -> VANDN
718 if (SDValue Op1 = isNOT(V: V.getOperand(i: 1), DAG))
719 if (SDValue Op0 = isNOT(V: V.getOperand(i: 0), DAG))
720 return DAG.getNode(Opcode: ISD::AND, DL: SDLoc(V), VT, N1: DAG.getBitcast(VT, V: Op0),
721 N2: DAG.getBitcast(VT, V: Op1));
722 }
723
724 // TODO: Add more matching patterns. Such as,
725 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
726 // not(slt(C, X)) -> slt(X - 1, C)
727 return SDValue();
728}
729
730// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
731// LoongArchISD::VFCVT. For example:
732// x1 = fp_round x, 0
733// y1 = fp_round y, 0
734// z = concat_vectors x1, y1
735// Or
736// x1 = LoongArch::VFCVT undef, x
737// y1 = LoongArch::VFCVT undef, y
738// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
739// can be combined to:
740// z = LoongArch::VFCVT y, x
741static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG,
742 const LoongArchSubtarget &Subtarget) {
743 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
744 (N->getOpcode() == LoongArchISD::VPACKEV) ||
745 (N->getOpcode() == LoongArchISD::VPERMI)) &&
746 "Invalid Node");
747
748 SDValue Op0 = peekThroughBitcasts(V: N->getOperand(Num: 0));
749 SDValue Op1 = peekThroughBitcasts(V: N->getOperand(Num: 1));
750 unsigned Opcode0 = Op0.getOpcode();
751 unsigned Opcode1 = Op1.getOpcode();
752 if (Opcode0 != Opcode1)
753 return SDValue();
754
755 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
756 return SDValue();
757
758 // Check if two nodes have only one use.
759 if (!Op0.hasOneUse() || !Op1.hasOneUse())
760 return SDValue();
761
762 EVT VT = N.getValueType();
763 EVT SVT0 = Op0.getValueType();
764 EVT SVT1 = Op1.getValueType();
765 // Check if two nodes have the same result type.
766 if (SVT0 != SVT1)
767 return SDValue();
768
769 // Check if two nodes have the same operand type.
770 EVT SSVT0 = Op0.getOperand(i: 0).getValueType();
771 EVT SSVT1 = Op1.getOperand(i: 0).getValueType();
772 if (SSVT0 != SSVT1)
773 return SDValue();
774
775 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
776 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
777 SSVT0 == MVT::v4f64) {
778 // A vector_shuffle is required in the final step, as xvfcvt instruction
779 // operates on each 128-bit segament as a lane.
780 SDValue Res = DAG.getNode(Opcode: LoongArchISD::VFCVT, DL, VT: MVT::v8f32,
781 N1: Op1.getOperand(i: 0), N2: Op0.getOperand(i: 0));
782 SDValue Undef = DAG.getUNDEF(VT: Res.getValueType());
783 // After VFCVT, the high part of Res comes from the high parts of Op0 and
784 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
785 // the desired order requires Op0 to fully occupy the lower half and Op1
786 // the upper half of Res. The Mask reorders the elements of Res to achieve
787 // this:
788 // - The first four elements (0, 1, 4, 5) come from Op0.
789 // - The next four elements (2, 3, 6, 7) come from Op1.
790 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
791 Res = DAG.getVectorShuffle(VT: Res.getValueType(), dl: DL, N1: Res, N2: Undef, Mask);
792 return DAG.getBitcast(VT, V: Res);
793 }
794 }
795
796 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
797 N->getOpcode() == LoongArchISD::VPERMI) &&
798 Opcode0 == LoongArchISD::VFCVT) {
799 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
800 if (!Op0.getOperand(i: 0).isUndef() || !Op1.getOperand(i: 0).isUndef())
801 return SDValue();
802
803 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
804 return SDValue();
805
806 if (N->getOpcode() == LoongArchISD::VPACKEV &&
807 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
808 SDValue Res = DAG.getNode(Opcode: LoongArchISD::VFCVT, DL, VT: MVT::v4f32,
809 N1: Op0.getOperand(i: 1), N2: Op1.getOperand(i: 1));
810 return DAG.getBitcast(VT, V: Res);
811 }
812
813 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
814 int64_t Imm = cast<ConstantSDNode>(Val: N->getOperand(Num: 2))->getSExtValue();
815 if (Imm != 68)
816 return SDValue();
817 return DAG.getNode(Opcode: LoongArchISD::VFCVT, DL, VT: MVT::v4f32, N1: Op0.getOperand(i: 1),
818 N2: Op1.getOperand(i: 1));
819 }
820 }
821
822 return SDValue();
823}
824
825SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
826 SelectionDAG &DAG) const {
827 SDLoc DL(Op);
828 SDValue In = Op.getOperand(i: 0);
829 MVT VT = Op.getSimpleValueType();
830 MVT SVT = In.getSimpleValueType();
831
832 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
833 SDValue Lo, Hi;
834 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: In, DL);
835 return DAG.getNode(Opcode: LoongArchISD::VFCVT, DL, VT, N1: Hi, N2: Lo);
836 }
837
838 return SDValue();
839}
840
841SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
842 SelectionDAG &DAG) const {
843
844 SDLoc DL(Op);
845 EVT VT = Op.getValueType();
846 SDValue Src = Op->getOperand(Num: 0);
847 EVT SVT = Src.getValueType();
848
849 bool V2F32ToV2F64 =
850 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
851 bool V4F32ToV4F64 =
852 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
853 if (!V2F32ToV2F64 && !V4F32ToV4F64)
854 return SDValue();
855
856 // Check if Op is the high part of vector.
857 auto CheckVecHighPart = [](SDValue Op) {
858 Op = peekThroughBitcasts(V: Op);
859 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
860 SDValue SOp = Op.getOperand(i: 0);
861 EVT SVT = SOp.getValueType();
862 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
863 return SDValue();
864
865 const uint64_t Imm = Op.getConstantOperandVal(i: 1);
866 if (Imm == SVT.getVectorNumElements() / 2)
867 return SOp;
868 return SDValue();
869 }
870 return SDValue();
871 };
872
873 unsigned Opcode;
874 SDValue VFCVTOp;
875 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
876 SDValue ZeroIdx = DAG.getVectorIdxConstant(Val: 0, DL);
877
878 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
879 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
880 if (SDValue V = CheckVecHighPart(Src)) {
881 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
882 "Unexpected wide vector");
883 Opcode = LoongArchISD::VFCVTH;
884 VFCVTOp = DAG.getBitcast(VT: WideOpVT, V);
885 } else {
886 Opcode = LoongArchISD::VFCVTL;
887 VFCVTOp = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WideOpVT,
888 N1: DAG.getUNDEF(VT: WideOpVT), N2: Src, N3: ZeroIdx);
889 }
890
891 // v2f64 = fp_extend v2f32
892 if (V2F32ToV2F64)
893 return DAG.getNode(Opcode, DL, VT, Operand: VFCVTOp);
894
895 // v4f64 = fp_extend v4f32
896 if (V4F32ToV4F64) {
897 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
898 // vector_shuffle is required firstly.
899 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
900 SDValue Res = DAG.getVectorShuffle(VT: WideOpVT, dl: DL, N1: VFCVTOp,
901 N2: DAG.getUNDEF(VT: WideOpVT), Mask);
902 Res = DAG.getNode(Opcode, DL, VT, Operand: Res);
903 return Res;
904 }
905
906 return SDValue();
907}
908
909SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
910 SelectionDAG &DAG) const {
911 EVT VT = Op.getValueType();
912 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Val&: Op);
913 const APFloat &FPVal = CFP->getValueAPF();
914 SDLoc DL(CFP);
915
916 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
917 (VT == MVT::f64 && Subtarget.hasBasicD()));
918
919 // If value is 0.0 or -0.0, just ignore it.
920 if (FPVal.isZero())
921 return SDValue();
922
923 // If lsx enabled, use cheaper 'vldi' instruction if possible.
924 if (isFPImmVLDILegal(Imm: FPVal, VT))
925 return SDValue();
926
927 // Construct as integer, and move to float register.
928 APInt INTVal = FPVal.bitcastToAPInt();
929
930 // If more than MaterializeFPImmInsNum instructions will be used to
931 // generate the INTVal and move it to float register, fallback to
932 // use floating point load from the constant pool.
933 auto Seq = LoongArchMatInt::generateInstSeq(Val: INTVal.getSExtValue());
934 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
935 if (InsNum > MaterializeFPImmInsNum && !FPVal.isOne())
936 return SDValue();
937
938 switch (VT.getSimpleVT().SimpleTy) {
939 default:
940 llvm_unreachable("Unexpected floating point type!");
941 break;
942 case MVT::f32: {
943 SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i32);
944 if (Subtarget.is64Bit())
945 NewVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: NewVal);
946 return DAG.getNode(Opcode: Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
947 : LoongArchISD::MOVGR2FR_W,
948 DL, VT, Operand: NewVal);
949 }
950 case MVT::f64: {
951 if (Subtarget.is64Bit()) {
952 SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i64);
953 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D, DL, VT, Operand: NewVal);
954 }
955 SDValue Lo = DAG.getConstant(Val: INTVal.trunc(width: 32), DL, VT: MVT::i32);
956 SDValue Hi = DAG.getConstant(Val: INTVal.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
957 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, N1: Lo, N2: Hi);
958 }
959 }
960
961 return SDValue();
962}
963
964// Ensure SETCC result and operand have the same bit width; isel does not
965// support mismatched widths.
966SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
967 SelectionDAG &DAG) const {
968 SDLoc DL(Op);
969 EVT ResultVT = Op.getValueType();
970 EVT OperandVT = Op.getOperand(i: 0).getValueType();
971
972 EVT SetCCResultVT =
973 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT);
974
975 if (ResultVT == SetCCResultVT)
976 return Op;
977
978 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
979 "SETCC operands must have the same type!");
980
981 SDValue SetCCNode =
982 DAG.getNode(Opcode: ISD::SETCC, DL, VT: SetCCResultVT, N1: Op.getOperand(i: 0),
983 N2: Op.getOperand(i: 1), N3: Op.getOperand(i: 2));
984
985 if (ResultVT.bitsGT(VT: SetCCResultVT))
986 SetCCNode = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: ResultVT, Operand: SetCCNode);
987 else if (ResultVT.bitsLT(VT: SetCCResultVT))
988 SetCCNode = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: SetCCNode);
989
990 return SetCCNode;
991}
992
993// Lower sext_invec using vslti instructions.
994// For example:
995// %b = sext <4 x i16> %a to <4 x i32>
996// can be lowered to:
997// VSLTI_H vr2, vr1, 0
998// VILVL.H vr1, vr2, vr1
999SDValue LoongArchTargetLowering::lowerSIGN_EXTEND_VECTOR_INREG(
1000 SDValue Op, SelectionDAG &DAG) const {
1001 SDLoc DL(Op);
1002 SDValue Src = Op.getOperand(i: 0);
1003 MVT SrcVT = Src.getSimpleValueType();
1004 MVT DstVT = Op.getSimpleValueType();
1005
1006 if (!SrcVT.is128BitVector())
1007 return SDValue();
1008
1009 // lower to VSLTI + VILVL if extend could be done in single step.
1010 if (DstVT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits() == 2) {
1011 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
1012 SDValue Mask = DAG.getNode(Opcode: ISD::SETCC, DL, VT: SrcVT, N1: Src, N2: Zero,
1013 N3: DAG.getCondCode(Cond: ISD::SETLT));
1014 SDValue LoInterleaved =
1015 DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT: SrcVT, N1: Mask, N2: Src);
1016
1017 return DAG.getBitcast(VT: DstVT, V: LoInterleaved);
1018 }
1019
1020 return SDValue();
1021}
1022
1023// Lower vecreduce_add using vhaddw instructions.
1024// For Example:
1025// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
1026// can be lowered to:
1027// VHADDW_D_W vr0, vr0, vr0
1028// VHADDW_Q_D vr0, vr0, vr0
1029// VPICKVE2GR_D a0, vr0, 0
1030// ADDI_W a0, a0, 0
1031SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
1032 SelectionDAG &DAG) const {
1033
1034 SDLoc DL(Op);
1035 MVT OpVT = Op.getSimpleValueType();
1036 SDValue Val = Op.getOperand(i: 0);
1037
1038 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1039 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1040 unsigned ResBits = OpVT.getScalarSizeInBits();
1041
1042 unsigned LegalVecSize = 128;
1043 bool isLASX256Vector =
1044 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
1045
1046 // Ensure operand type legal or enable it legal.
1047 while (!isTypeLegal(VT: Val.getSimpleValueType())) {
1048 Val = DAG.WidenVector(N: Val, DL);
1049 }
1050
1051 // NumEles is designed for iterations count, v4i32 for LSX
1052 // and v8i32 for LASX should have the same count.
1053 if (isLASX256Vector) {
1054 NumEles /= 2;
1055 LegalVecSize = 256;
1056 }
1057
1058 EleBits *= 2;
1059 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1060 EleBits = std::min(a: EleBits, b: 64u);
1061 MVT IntTy = MVT::getIntegerVT(BitWidth: EleBits);
1062 MVT VecTy = MVT::getVectorVT(VT: IntTy, NumElements: LegalVecSize / EleBits);
1063 Val = DAG.getNode(Opcode: LoongArchISD::VHADDW, DL, VT: VecTy, N1: Val, N2: Val);
1064 }
1065
1066 if (isLASX256Vector) {
1067 SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: Val,
1068 N2: DAG.getConstant(Val: 2, DL, VT: Subtarget.getGRLenVT()));
1069 Val = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i64, N1: Tmp, N2: Val);
1070 }
1071
1072 Val = DAG.getBitcast(VT: MVT::getVectorVT(VT: OpVT, NumElements: LegalVecSize / ResBits), V: Val);
1073 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
1074 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()));
1075}
1076
1077// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1078// For Example:
1079// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1080// can be lowered to:
1081// VBSRL_V vr1, vr0, 8
1082// VMAX_W vr0, vr1, vr0
1083// VBSRL_V vr1, vr0, 4
1084// VMAX_W vr0, vr1, vr0
1085// VPICKVE2GR_W a0, vr0, 0
1086// For 256 bit vector, it is illegal and will be spilt into
1087// two 128 bit vector by default then processed by this.
1088SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1089 SelectionDAG &DAG) const {
1090 SDLoc DL(Op);
1091
1092 MVT OpVT = Op.getSimpleValueType();
1093 SDValue Val = Op.getOperand(i: 0);
1094
1095 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1096 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1097
1098 // Ensure operand type legal or enable it legal.
1099 while (!isTypeLegal(VT: Val.getSimpleValueType())) {
1100 Val = DAG.WidenVector(N: Val, DL);
1101 }
1102
1103 unsigned Opcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
1104 MVT VecTy = Val.getSimpleValueType();
1105 MVT GRLenVT = Subtarget.getGRLenVT();
1106
1107 for (int i = NumEles; i > 1; i /= 2) {
1108 SDValue ShiftAmt = DAG.getConstant(Val: i * EleBits / 16, DL, VT: GRLenVT);
1109 SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: VecTy, N1: Val, N2: ShiftAmt);
1110 Val = DAG.getNode(Opcode, DL, VT: VecTy, N1: Tmp, N2: Val);
1111 }
1112
1113 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
1114 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
1115}
1116
1117SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1118 SelectionDAG &DAG) const {
1119 unsigned IsData = Op.getConstantOperandVal(i: 4);
1120
1121 // We don't support non-data prefetch.
1122 // Just preserve the chain.
1123 if (!IsData)
1124 return Op.getOperand(i: 0);
1125
1126 return Op;
1127}
1128
1129SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1130 SelectionDAG &DAG) const {
1131 MVT VT = Op.getSimpleValueType();
1132 assert(VT.isVector() && "Unexpected type");
1133
1134 SDLoc DL(Op);
1135 SDValue R = Op.getOperand(i: 0);
1136 SDValue Amt = Op.getOperand(i: 1);
1137 unsigned Opcode = Op.getOpcode();
1138 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1139
1140 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1141 if (V.getOpcode() != ISD::BUILD_VECTOR)
1142 return false;
1143 if (SDValue SplatValue =
1144 cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
1145 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
1146 CstSplatValue = C->getAPIntValue();
1147 return true;
1148 }
1149 }
1150 return false;
1151 };
1152
1153 // Check for constant splat rotation amount.
1154 APInt CstSplatValue;
1155 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1156 bool isROTL = Opcode == ISD::ROTL;
1157
1158 // Check for splat rotate by zero.
1159 if (IsCstSplat && CstSplatValue.urem(RHS: EltSizeInBits) == 0)
1160 return R;
1161
1162 // LoongArch targets always prefer ISD::ROTR.
1163 if (isROTL) {
1164 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
1165 return DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: R,
1166 N2: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Amt));
1167 }
1168
1169 // Rotate by a immediate.
1170 if (IsCstSplat) {
1171 // ISD::ROTR: Attemp to rotate by a positive immediate.
1172 SDValue Bits = DAG.getConstant(Val: EltSizeInBits, DL, VT);
1173 if (SDValue Urem =
1174 DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL, VT, Ops: {Amt, Bits}))
1175 return DAG.getNode(Opcode, DL, VT, N1: R, N2: Urem);
1176 }
1177
1178 return Op;
1179}
1180
1181// Return true if Val is equal to (setcc LHS, RHS, CC).
1182// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1183// Otherwise, return std::nullopt.
1184static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1185 ISD::CondCode CC, SDValue Val) {
1186 assert(Val->getOpcode() == ISD::SETCC);
1187 SDValue LHS2 = Val.getOperand(i: 0);
1188 SDValue RHS2 = Val.getOperand(i: 1);
1189 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get();
1190
1191 if (LHS == LHS2 && RHS == RHS2) {
1192 if (CC == CC2)
1193 return true;
1194 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
1195 return false;
1196 } else if (LHS == RHS2 && RHS == LHS2) {
1197 CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
1198 if (CC == CC2)
1199 return true;
1200 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
1201 return false;
1202 }
1203
1204 return std::nullopt;
1205}
1206
1207static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
1208 const LoongArchSubtarget &Subtarget) {
1209 SDValue CondV = N->getOperand(Num: 0);
1210 SDValue TrueV = N->getOperand(Num: 1);
1211 SDValue FalseV = N->getOperand(Num: 2);
1212 MVT VT = N->getSimpleValueType(ResNo: 0);
1213 SDLoc DL(N);
1214
1215 // (select c, -1, y) -> -c | y
1216 if (isAllOnesConstant(V: TrueV)) {
1217 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
1218 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
1219 }
1220 // (select c, y, -1) -> (c-1) | y
1221 if (isAllOnesConstant(V: FalseV)) {
1222 SDValue Neg =
1223 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
1224 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
1225 }
1226
1227 // (select c, 0, y) -> (c-1) & y
1228 if (isNullConstant(V: TrueV)) {
1229 SDValue Neg =
1230 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
1231 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
1232 }
1233 // (select c, y, 0) -> -c & y
1234 if (isNullConstant(V: FalseV)) {
1235 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
1236 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
1237 }
1238
1239 // select c, ~x, x --> xor -c, x
1240 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
1241 const APInt &TrueVal = TrueV->getAsAPIntVal();
1242 const APInt &FalseVal = FalseV->getAsAPIntVal();
1243 if (~TrueVal == FalseVal) {
1244 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
1245 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
1246 }
1247 }
1248
1249 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1250 // when both truev and falsev are also setcc.
1251 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1252 FalseV.getOpcode() == ISD::SETCC) {
1253 SDValue LHS = CondV.getOperand(i: 0);
1254 SDValue RHS = CondV.getOperand(i: 1);
1255 ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1256
1257 // (select x, x, y) -> x | y
1258 // (select !x, x, y) -> x & y
1259 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
1260 return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
1261 N2: DAG.getFreeze(V: FalseV));
1262 }
1263 // (select x, y, x) -> x & y
1264 // (select !x, y, x) -> x | y
1265 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
1266 return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
1267 N1: DAG.getFreeze(V: TrueV), N2: FalseV);
1268 }
1269 }
1270
1271 return SDValue();
1272}
1273
1274// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1275// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1276// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1277// being `0` or `-1`. In such cases we can replace `select` with `and`.
1278// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1279// than `c0`?
1280static SDValue
1281foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
1282 const LoongArchSubtarget &Subtarget) {
1283 unsigned SelOpNo = 0;
1284 SDValue Sel = BO->getOperand(Num: 0);
1285 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1286 SelOpNo = 1;
1287 Sel = BO->getOperand(Num: 1);
1288 }
1289
1290 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1291 return SDValue();
1292
1293 unsigned ConstSelOpNo = 1;
1294 unsigned OtherSelOpNo = 2;
1295 if (!isa<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) {
1296 ConstSelOpNo = 2;
1297 OtherSelOpNo = 1;
1298 }
1299 SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo);
1300 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
1301 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1302 return SDValue();
1303
1304 SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1);
1305 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
1306 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1307 return SDValue();
1308
1309 SDLoc DL(Sel);
1310 EVT VT = BO->getValueType(ResNo: 0);
1311
1312 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1313 if (SelOpNo == 1)
1314 std::swap(a&: NewConstOps[0], b&: NewConstOps[1]);
1315
1316 SDValue NewConstOp =
1317 DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
1318 if (!NewConstOp)
1319 return SDValue();
1320
1321 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1322 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1323 return SDValue();
1324
1325 SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo);
1326 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1327 if (SelOpNo == 1)
1328 std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]);
1329 SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
1330
1331 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1332 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1333 return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF);
1334}
1335
1336// Changes the condition code and swaps operands if necessary, so the SetCC
1337// operation matches one of the comparisons supported directly by branches
1338// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1339// compare with 1/-1.
1340static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1341 ISD::CondCode &CC, SelectionDAG &DAG) {
1342 // If this is a single bit test that can't be handled by ANDI, shift the
1343 // bit to be tested to the MSB and perform a signed compare with 0.
1344 if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
1345 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1346 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) {
1347 uint64_t Mask = LHS.getConstantOperandVal(i: 1);
1348 if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) {
1349 unsigned ShAmt = 0;
1350 if (isPowerOf2_64(Value: Mask)) {
1351 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1352 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask);
1353 } else {
1354 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
1355 }
1356
1357 LHS = LHS.getOperand(i: 0);
1358 if (ShAmt != 0)
1359 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
1360 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
1361 return;
1362 }
1363 }
1364
1365 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
1366 int64_t C = RHSC->getSExtValue();
1367 switch (CC) {
1368 default:
1369 break;
1370 case ISD::SETGT:
1371 // Convert X > -1 to X >= 0.
1372 if (C == -1) {
1373 RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
1374 CC = ISD::SETGE;
1375 return;
1376 }
1377 break;
1378 case ISD::SETLT:
1379 // Convert X < 1 to 0 >= X.
1380 if (C == 1) {
1381 RHS = LHS;
1382 LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
1383 CC = ISD::SETGE;
1384 return;
1385 }
1386 break;
1387 }
1388 }
1389
1390 switch (CC) {
1391 default:
1392 break;
1393 case ISD::SETGT:
1394 case ISD::SETLE:
1395 case ISD::SETUGT:
1396 case ISD::SETULE:
1397 CC = ISD::getSetCCSwappedOperands(Operation: CC);
1398 std::swap(a&: LHS, b&: RHS);
1399 break;
1400 }
1401}
1402
1403SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1404 SelectionDAG &DAG) const {
1405 SDValue CondV = Op.getOperand(i: 0);
1406 SDValue TrueV = Op.getOperand(i: 1);
1407 SDValue FalseV = Op.getOperand(i: 2);
1408 SDLoc DL(Op);
1409 MVT VT = Op.getSimpleValueType();
1410 MVT GRLenVT = Subtarget.getGRLenVT();
1411
1412 if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
1413 return V;
1414
1415 if (Op.hasOneUse()) {
1416 unsigned UseOpc = Op->user_begin()->getOpcode();
1417 if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
1418 SDNode *BinOp = *Op->user_begin();
1419 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->user_begin(),
1420 DAG, Subtarget)) {
1421 DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
1422 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1423 // may return a constant node and cause crash in lowerSELECT.
1424 if (NewSel.getOpcode() == ISD::SELECT)
1425 return lowerSELECT(Op: NewSel, DAG);
1426 return NewSel;
1427 }
1428 }
1429 }
1430
1431 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1432 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1433 // (select condv, truev, falsev)
1434 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1435 if (CondV.getOpcode() != ISD::SETCC ||
1436 CondV.getOperand(i: 0).getSimpleValueType() != GRLenVT) {
1437 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: GRLenVT);
1438 SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
1439
1440 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1441
1442 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1443 }
1444
1445 // If the CondV is the output of a SETCC node which operates on GRLenVT
1446 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1447 // to take advantage of the integer compare+branch instructions. i.e.: (select
1448 // (setcc lhs, rhs, cc), truev, falsev)
1449 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1450 SDValue LHS = CondV.getOperand(i: 0);
1451 SDValue RHS = CondV.getOperand(i: 1);
1452 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1453
1454 // Special case for a select of 2 constants that have a difference of 1.
1455 // Normally this is done by DAGCombine, but if the select is introduced by
1456 // type legalization or op legalization, we miss it. Restricting to SETLT
1457 // case for now because that is what signed saturating add/sub need.
1458 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1459 // but we would probably want to swap the true/false values if the condition
1460 // is SETGE/SETLE to avoid an XORI.
1461 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
1462 CCVal == ISD::SETLT) {
1463 const APInt &TrueVal = TrueV->getAsAPIntVal();
1464 const APInt &FalseVal = FalseV->getAsAPIntVal();
1465 if (TrueVal - 1 == FalseVal)
1466 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
1467 if (TrueVal + 1 == FalseVal)
1468 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
1469 }
1470
1471 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1472 // 1 < x ? x : 1 -> 0 < x ? x : 1
1473 if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1474 RHS == TrueV && LHS == FalseV) {
1475 LHS = DAG.getConstant(Val: 0, DL, VT);
1476 // 0 <u x is the same as x != 0.
1477 if (CCVal == ISD::SETULT) {
1478 std::swap(a&: LHS, b&: RHS);
1479 CCVal = ISD::SETNE;
1480 }
1481 }
1482
1483 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1484 if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1485 RHS == FalseV) {
1486 RHS = DAG.getConstant(Val: 0, DL, VT);
1487 }
1488
1489 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1490
1491 if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
1492 // (select (setcc lhs, rhs, CC), constant, falsev)
1493 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1494 std::swap(a&: TrueV, b&: FalseV);
1495 TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
1496 }
1497
1498 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1499 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1500}
1501
1502SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1503 SelectionDAG &DAG) const {
1504 SDValue CondV = Op.getOperand(i: 1);
1505 SDLoc DL(Op);
1506 MVT GRLenVT = Subtarget.getGRLenVT();
1507
1508 if (CondV.getOpcode() == ISD::SETCC) {
1509 if (CondV.getOperand(i: 0).getValueType() == GRLenVT) {
1510 SDValue LHS = CondV.getOperand(i: 0);
1511 SDValue RHS = CondV.getOperand(i: 1);
1512 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1513
1514 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1515
1516 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1517 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1518 N1: Op.getOperand(i: 0), N2: LHS, N3: RHS, N4: TargetCC,
1519 N5: Op.getOperand(i: 2));
1520 } else if (CondV.getOperand(i: 0).getValueType().isFloatingPoint()) {
1521 return DAG.getNode(Opcode: LoongArchISD::BRCOND, DL, VT: Op.getValueType(),
1522 N1: Op.getOperand(i: 0), N2: CondV, N3: Op.getOperand(i: 2));
1523 }
1524 }
1525
1526 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1527 N1: Op.getOperand(i: 0), N2: CondV, N3: DAG.getConstant(Val: 0, DL, VT: GRLenVT),
1528 N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: 2));
1529}
1530
1531SDValue
1532LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1533 SelectionDAG &DAG) const {
1534 SDLoc DL(Op);
1535 MVT OpVT = Op.getSimpleValueType();
1536
1537 SDValue Vector = DAG.getUNDEF(VT: OpVT);
1538 SDValue Val = Op.getOperand(i: 0);
1539 SDValue Idx = DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT());
1540
1541 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx);
1542}
1543
1544SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1545 SelectionDAG &DAG) const {
1546 EVT ResTy = Op->getValueType(ResNo: 0);
1547 SDValue Src = Op->getOperand(Num: 0);
1548 SDLoc DL(Op);
1549
1550 // LoongArchISD::BITREV_8B is not supported on LA32.
1551 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1552 return SDValue();
1553
1554 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1555 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1556 unsigned int NewEltNum = NewVT.getVectorNumElements();
1557
1558 SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src);
1559
1560 SmallVector<SDValue, 8> Ops;
1561 for (unsigned int i = 0; i < NewEltNum; i++) {
1562 SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc,
1563 N2: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
1564 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1565 ? (unsigned)LoongArchISD::BITREV_8B
1566 : (unsigned)ISD::BITREVERSE;
1567 Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op));
1568 }
1569 SDValue Res =
1570 DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops));
1571
1572 switch (ResTy.getSimpleVT().SimpleTy) {
1573 default:
1574 return SDValue();
1575 case MVT::v16i8:
1576 case MVT::v32i8:
1577 return Res;
1578 case MVT::v8i16:
1579 case MVT::v16i16:
1580 case MVT::v4i32:
1581 case MVT::v8i32: {
1582 SmallVector<int, 32> Mask;
1583 for (unsigned int i = 0; i < NewEltNum; i++)
1584 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1585 Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i);
1586 return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask);
1587 }
1588 }
1589}
1590
1591// Widen element type to get a new mask value (if possible).
1592// For example:
1593// shufflevector <4 x i32> %a, <4 x i32> %b,
1594// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1595// is equivalent to:
1596// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1597// can be lowered to:
1598// VPACKOD_D vr0, vr0, vr1
1599static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1600 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1601 unsigned EltBits = VT.getScalarSizeInBits();
1602
1603 if (EltBits > 32 || EltBits == 1)
1604 return SDValue();
1605
1606 SmallVector<int, 8> NewMask;
1607 if (widenShuffleMaskElts(M: Mask, NewMask)) {
1608 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * 2)
1609 : MVT::getIntegerVT(BitWidth: EltBits * 2);
1610 MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / 2);
1611 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) {
1612 SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1);
1613 SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2);
1614 return DAG.getBitcast(
1615 VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask));
1616 }
1617 }
1618
1619 return SDValue();
1620}
1621
1622/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1623/// instruction.
1624// The funciton matches elements from one of the input vector shuffled to the
1625// left or right with zeroable elements 'shifted in'. It handles both the
1626// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1627// lane.
1628// Mostly copied from X86.
1629static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1630 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1631 int MaskOffset, const APInt &Zeroable) {
1632 int Size = Mask.size();
1633 unsigned SizeInBits = Size * ScalarSizeInBits;
1634
1635 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1636 for (int i = 0; i < Size; i += Scale)
1637 for (int j = 0; j < Shift; ++j)
1638 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1639 return false;
1640
1641 return true;
1642 };
1643
1644 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1645 int Step = 1) {
1646 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1647 if (!(Mask[i] == -1 || Mask[i] == Low))
1648 return false;
1649 return true;
1650 };
1651
1652 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1653 for (int i = 0; i != Size; i += Scale) {
1654 unsigned Pos = Left ? i + Shift : i;
1655 unsigned Low = Left ? i : i + Shift;
1656 unsigned Len = Scale - Shift;
1657 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1658 return -1;
1659 }
1660
1661 int ShiftEltBits = ScalarSizeInBits * Scale;
1662 bool ByteShift = ShiftEltBits > 64;
1663 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1664 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1665 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1666
1667 // Normalize the scale for byte shifts to still produce an i64 element
1668 // type.
1669 Scale = ByteShift ? Scale / 2 : Scale;
1670
1671 // We need to round trip through the appropriate type for the shift.
1672 MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale);
1673 ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / 8)
1674 : MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale);
1675 return (int)ShiftAmt;
1676 };
1677
1678 unsigned MaxWidth = 128;
1679 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1680 for (int Shift = 1; Shift != Scale; ++Shift)
1681 for (bool Left : {true, false})
1682 if (CheckZeros(Shift, Scale, Left)) {
1683 int ShiftAmt = MatchShift(Shift, Scale, Left);
1684 if (0 < ShiftAmt)
1685 return ShiftAmt;
1686 }
1687
1688 // no match
1689 return -1;
1690}
1691
1692/// Lower VECTOR_SHUFFLE as shift (if possible).
1693///
1694/// For example:
1695/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1696/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1697/// is lowered to:
1698/// (VBSLL_V $v0, $v0, 4)
1699///
1700/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1701/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1702/// is lowered to:
1703/// (VSLLI_D $v0, $v0, 32)
1704static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1705 MVT VT, SDValue V1, SDValue V2,
1706 SelectionDAG &DAG,
1707 const LoongArchSubtarget &Subtarget,
1708 const APInt &Zeroable) {
1709 int Size = Mask.size();
1710 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1711
1712 MVT ShiftVT;
1713 SDValue V = V1;
1714 unsigned Opcode;
1715
1716 // Try to match shuffle against V1 shift.
1717 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1718 Mask, MaskOffset: 0, Zeroable);
1719
1720 // If V1 failed, try to match shuffle against V2 shift.
1721 if (ShiftAmt < 0) {
1722 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1723 Mask, MaskOffset: Size, Zeroable);
1724 V = V2;
1725 }
1726
1727 if (ShiftAmt < 0)
1728 return SDValue();
1729
1730 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1731 "Illegal integer vector type");
1732 V = DAG.getBitcast(VT: ShiftVT, V);
1733 V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V,
1734 N2: DAG.getConstant(Val: ShiftAmt, DL, VT: Subtarget.getGRLenVT()));
1735 return DAG.getBitcast(VT, V);
1736}
1737
1738/// Determine whether a range fits a regular pattern of values.
1739/// This function accounts for the possibility of jumping over the End iterator.
1740template <typename ValType>
1741static bool
1742fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1743 unsigned CheckStride,
1744 typename SmallVectorImpl<ValType>::const_iterator End,
1745 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1746 auto &I = Begin;
1747
1748 while (I != End) {
1749 if (*I != -1 && *I != ExpectedIndex)
1750 return false;
1751 ExpectedIndex += ExpectedIndexStride;
1752
1753 // Incrementing past End is undefined behaviour so we must increment one
1754 // step at a time and check for End at each step.
1755 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1756 ; // Empty loop body.
1757 }
1758 return true;
1759}
1760
1761/// Compute whether each element of a shuffle is zeroable.
1762///
1763/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1764static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1765 SDValue V2, APInt &KnownUndef,
1766 APInt &KnownZero) {
1767 int Size = Mask.size();
1768 KnownUndef = KnownZero = APInt::getZero(numBits: Size);
1769
1770 V1 = peekThroughBitcasts(V: V1);
1771 V2 = peekThroughBitcasts(V: V2);
1772
1773 bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode());
1774 bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode());
1775
1776 int VectorSizeInBits = V1.getValueSizeInBits();
1777 int ScalarSizeInBits = VectorSizeInBits / Size;
1778 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1779 (void)ScalarSizeInBits;
1780
1781 for (int i = 0; i < Size; ++i) {
1782 int M = Mask[i];
1783 if (M < 0) {
1784 KnownUndef.setBit(i);
1785 continue;
1786 }
1787 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1788 KnownZero.setBit(i);
1789 continue;
1790 }
1791 }
1792}
1793
1794/// Test whether a shuffle mask is equivalent within each sub-lane.
1795///
1796/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1797/// non-trivial to compute in the face of undef lanes. The representation is
1798/// suitable for use with existing 128-bit shuffles as entries from the second
1799/// vector have been remapped to [LaneSize, 2*LaneSize).
1800static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1801 ArrayRef<int> Mask,
1802 SmallVectorImpl<int> &RepeatedMask) {
1803 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1804 RepeatedMask.assign(NumElts: LaneSize, Elt: -1);
1805 int Size = Mask.size();
1806 for (int i = 0; i < Size; ++i) {
1807 assert(Mask[i] == -1 || Mask[i] >= 0);
1808 if (Mask[i] < 0)
1809 continue;
1810 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1811 // This entry crosses lanes, so there is no way to model this shuffle.
1812 return false;
1813
1814 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1815 // Adjust second vector indices to start at LaneSize instead of Size.
1816 int LocalM =
1817 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1818 if (RepeatedMask[i % LaneSize] < 0)
1819 // This is the first non-undef entry in this slot of a 128-bit lane.
1820 RepeatedMask[i % LaneSize] = LocalM;
1821 else if (RepeatedMask[i % LaneSize] != LocalM)
1822 // Found a mismatch with the repeated mask.
1823 return false;
1824 }
1825 return true;
1826}
1827
1828/// Attempts to match vector shuffle as byte rotation.
1829static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1830 ArrayRef<int> Mask) {
1831
1832 SDValue Lo, Hi;
1833 SmallVector<int, 16> RepeatedMask;
1834
1835 if (!isRepeatedShuffleMask(LaneSizeInBits: 128, VT, Mask, RepeatedMask))
1836 return -1;
1837
1838 int NumElts = RepeatedMask.size();
1839 int Rotation = 0;
1840 int Scale = 16 / NumElts;
1841
1842 for (int i = 0; i < NumElts; ++i) {
1843 int M = RepeatedMask[i];
1844 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1845 "Unexpected mask index.");
1846 if (M < 0)
1847 continue;
1848
1849 // Determine where a rotated vector would have started.
1850 int StartIdx = i - (M % NumElts);
1851 if (StartIdx == 0)
1852 return -1;
1853
1854 // If we found the tail of a vector the rotation must be the missing
1855 // front. If we found the head of a vector, it must be how much of the
1856 // head.
1857 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1858
1859 if (Rotation == 0)
1860 Rotation = CandidateRotation;
1861 else if (Rotation != CandidateRotation)
1862 return -1;
1863
1864 // Compute which value this mask is pointing at.
1865 SDValue MaskV = M < NumElts ? V1 : V2;
1866
1867 // Compute which of the two target values this index should be assigned
1868 // to. This reflects whether the high elements are remaining or the low
1869 // elements are remaining.
1870 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1871
1872 // Either set up this value if we've not encountered it before, or check
1873 // that it remains consistent.
1874 if (!TargetV)
1875 TargetV = MaskV;
1876 else if (TargetV != MaskV)
1877 return -1;
1878 }
1879
1880 // Check that we successfully analyzed the mask, and normalize the results.
1881 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1882 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1883 if (!Lo)
1884 Lo = Hi;
1885 else if (!Hi)
1886 Hi = Lo;
1887
1888 V1 = Lo;
1889 V2 = Hi;
1890
1891 return Rotation * Scale;
1892}
1893
1894/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1895///
1896/// For example:
1897/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1898/// <2 x i32> <i32 3, i32 0>
1899/// is lowered to:
1900/// (VBSRL_V $v1, $v1, 8)
1901/// (VBSLL_V $v0, $v0, 8)
1902/// (VOR_V $v0, $V0, $v1)
1903static SDValue
1904lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1905 SDValue V1, SDValue V2, SelectionDAG &DAG,
1906 const LoongArchSubtarget &Subtarget) {
1907
1908 SDValue Lo = V1, Hi = V2;
1909 int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask);
1910 if (ByteRotation <= 0)
1911 return SDValue();
1912
1913 MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / 8);
1914 Lo = DAG.getBitcast(VT: ByteVT, V: Lo);
1915 Hi = DAG.getBitcast(VT: ByteVT, V: Hi);
1916
1917 int LoByteShift = 16 - ByteRotation;
1918 int HiByteShift = ByteRotation;
1919 MVT GRLenVT = Subtarget.getGRLenVT();
1920
1921 SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo,
1922 N2: DAG.getConstant(Val: LoByteShift, DL, VT: GRLenVT));
1923 SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi,
1924 N2: DAG.getConstant(Val: HiByteShift, DL, VT: GRLenVT));
1925 return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift));
1926}
1927
1928/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1929///
1930/// For example:
1931/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1932/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1933/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1934/// is lowered to:
1935/// (VREPLI $v1, 0)
1936/// (VILVL $v0, $v1, $v0)
1937static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1938 ArrayRef<int> Mask, MVT VT,
1939 SDValue V1, SDValue V2,
1940 SelectionDAG &DAG,
1941 const APInt &Zeroable) {
1942 int Bits = VT.getSizeInBits();
1943 int EltBits = VT.getScalarSizeInBits();
1944 int NumElements = VT.getVectorNumElements();
1945
1946 if (Zeroable.isAllOnes())
1947 return DAG.getConstant(Val: 0, DL, VT);
1948
1949 // Define a helper function to check a particular ext-scale and lower to it if
1950 // valid.
1951 auto Lower = [&](int Scale) -> SDValue {
1952 SDValue InputV;
1953 bool AnyExt = true;
1954 int Offset = 0;
1955 for (int i = 0; i < NumElements; i++) {
1956 int M = Mask[i];
1957 if (M < 0)
1958 continue;
1959 if (i % Scale != 0) {
1960 // Each of the extended elements need to be zeroable.
1961 if (!Zeroable[i])
1962 return SDValue();
1963
1964 AnyExt = false;
1965 continue;
1966 }
1967
1968 // Each of the base elements needs to be consecutive indices into the
1969 // same input vector.
1970 SDValue V = M < NumElements ? V1 : V2;
1971 M = M % NumElements;
1972 if (!InputV) {
1973 InputV = V;
1974 Offset = M - (i / Scale);
1975
1976 // These offset can't be handled
1977 if (Offset % (NumElements / Scale))
1978 return SDValue();
1979 } else if (InputV != V)
1980 return SDValue();
1981
1982 if (M != (Offset + (i / Scale)))
1983 return SDValue(); // Non-consecutive strided elements.
1984 }
1985
1986 // If we fail to find an input, we have a zero-shuffle which should always
1987 // have already been handled.
1988 if (!InputV)
1989 return SDValue();
1990
1991 do {
1992 unsigned VilVLoHi = LoongArchISD::VILVL;
1993 if (Offset >= (NumElements / 2)) {
1994 VilVLoHi = LoongArchISD::VILVH;
1995 Offset -= (NumElements / 2);
1996 }
1997
1998 MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements);
1999 SDValue Ext =
2000 AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: 0, DL, VT: InputVT);
2001 InputV = DAG.getBitcast(VT: InputVT, V: InputV);
2002 InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV);
2003 Scale /= 2;
2004 EltBits *= 2;
2005 NumElements /= 2;
2006 } while (Scale > 1);
2007 return DAG.getBitcast(VT, V: InputV);
2008 };
2009
2010 // Each iteration, try extending the elements half as much, but into twice as
2011 // many elements.
2012 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
2013 NumExtElements *= 2) {
2014 if (SDValue V = Lower(NumElements / NumExtElements))
2015 return V;
2016 }
2017 return SDValue();
2018}
2019
2020/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
2021///
2022/// VREPLVEI performs vector broadcast based on an element specified by an
2023/// integer immediate, with its mask being similar to:
2024/// <x, x, x, ...>
2025/// where x is any valid index.
2026///
2027/// When undef's appear in the mask they are treated as if they were whatever
2028/// value is necessary in order to fit the above form.
2029static SDValue
2030lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2031 SDValue V1, SelectionDAG &DAG,
2032 const LoongArchSubtarget &Subtarget) {
2033 int SplatIndex = -1;
2034 for (const auto &M : Mask) {
2035 if (M != -1) {
2036 SplatIndex = M;
2037 break;
2038 }
2039 }
2040
2041 if (SplatIndex == -1)
2042 return DAG.getUNDEF(VT);
2043
2044 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2045 if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) {
2046 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
2047 N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
2048 }
2049
2050 return SDValue();
2051}
2052
2053/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2054///
2055/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2056/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2057///
2058/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2059/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2060/// When undef's appear they are treated as if they were whatever value is
2061/// necessary in order to fit the above forms.
2062///
2063/// For example:
2064/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2065/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2066/// i32 7, i32 6, i32 5, i32 4>
2067/// is lowered to:
2068/// (VSHUF4I_H $v0, $v1, 27)
2069/// where the 27 comes from:
2070/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2071static SDValue
2072lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2073 SDValue V1, SDValue V2, SelectionDAG &DAG,
2074 const LoongArchSubtarget &Subtarget) {
2075
2076 unsigned SubVecSize = 4;
2077 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2078 SubVecSize = 2;
2079
2080 int SubMask[4] = {-1, -1, -1, -1};
2081 for (unsigned i = 0; i < SubVecSize; ++i) {
2082 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2083 int M = Mask[j];
2084
2085 // Convert from vector index to 4-element subvector index
2086 // If an index refers to an element outside of the subvector then give up
2087 if (M != -1) {
2088 M -= 4 * (j / SubVecSize);
2089 if (M < 0 || M >= 4)
2090 return SDValue();
2091 }
2092
2093 // If the mask has an undef, replace it with the current index.
2094 // Note that it might still be undef if the current index is also undef
2095 if (SubMask[i] == -1)
2096 SubMask[i] = M;
2097 // Check that non-undef values are the same as in the mask. If they
2098 // aren't then give up
2099 else if (M != -1 && M != SubMask[i])
2100 return SDValue();
2101 }
2102 }
2103
2104 // Calculate the immediate. Replace any remaining undefs with zero
2105 int Imm = 0;
2106 for (int i = SubVecSize - 1; i >= 0; --i) {
2107 int M = SubMask[i];
2108
2109 if (M == -1)
2110 M = 0;
2111
2112 Imm <<= 2;
2113 Imm |= M & 0x3;
2114 }
2115
2116 MVT GRLenVT = Subtarget.getGRLenVT();
2117
2118 // Return vshuf4i.d
2119 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2120 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I_D, DL, VT, N1: V1, N2: V2,
2121 N3: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2122
2123 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
2124 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2125}
2126
2127/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2128///
2129/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2130/// reverse whose mask likes:
2131/// <7, 6, 5, 4, 3, 2, 1, 0>
2132///
2133/// When undef's appear in the mask they are treated as if they were whatever
2134/// value is necessary in order to fit the above forms.
2135static SDValue
2136lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2137 SDValue V1, SelectionDAG &DAG,
2138 const LoongArchSubtarget &Subtarget) {
2139 // Only vectors with i8/i16 elements which cannot match other patterns
2140 // directly needs to do this.
2141 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2142 VT != MVT::v16i16)
2143 return SDValue();
2144
2145 if (!ShuffleVectorInst::isReverseMask(Mask, NumSrcElts: Mask.size()))
2146 return SDValue();
2147
2148 int WidenNumElts = VT.getVectorNumElements() / 4;
2149 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2150 for (int i = 0; i < WidenNumElts; ++i)
2151 WidenMask[i] = WidenNumElts - 1 - i;
2152
2153 MVT WidenVT = MVT::getVectorVT(
2154 VT: VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, NumElements: WidenNumElts);
2155 SDValue NewV1 = DAG.getBitcast(VT: WidenVT, V: V1);
2156 SDValue WidenRev = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: NewV1,
2157 N2: DAG.getUNDEF(VT: WidenVT), Mask: WidenMask);
2158
2159 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT,
2160 N1: DAG.getBitcast(VT, V: WidenRev),
2161 N2: DAG.getConstant(Val: 27, DL, VT: Subtarget.getGRLenVT()));
2162}
2163
2164/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2165///
2166/// VPACKEV interleaves the even elements from each vector.
2167///
2168/// It is possible to lower into VPACKEV when the mask consists of two of the
2169/// following forms interleaved:
2170/// <0, 2, 4, ...>
2171/// <n, n+2, n+4, ...>
2172/// where n is the number of elements in the vector.
2173/// For example:
2174/// <0, 0, 2, 2, 4, 4, ...>
2175/// <0, n, 2, n+2, 4, n+4, ...>
2176///
2177/// When undef's appear in the mask they are treated as if they were whatever
2178/// value is necessary in order to fit the above forms.
2179static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
2180 MVT VT, SDValue V1, SDValue V2,
2181 SelectionDAG &DAG) {
2182
2183 const auto &Begin = Mask.begin();
2184 const auto &End = Mask.end();
2185 SDValue OriV1 = V1, OriV2 = V2;
2186
2187 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2188 V1 = OriV1;
2189 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2190 V1 = OriV2;
2191 else
2192 return SDValue();
2193
2194 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2195 V2 = OriV1;
2196 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2197 V2 = OriV2;
2198 else
2199 return SDValue();
2200
2201 return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
2202}
2203
2204/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2205///
2206/// VPACKOD interleaves the odd elements from each vector.
2207///
2208/// It is possible to lower into VPACKOD when the mask consists of two of the
2209/// following forms interleaved:
2210/// <1, 3, 5, ...>
2211/// <n+1, n+3, n+5, ...>
2212/// where n is the number of elements in the vector.
2213/// For example:
2214/// <1, 1, 3, 3, 5, 5, ...>
2215/// <1, n+1, 3, n+3, 5, n+5, ...>
2216///
2217/// When undef's appear in the mask they are treated as if they were whatever
2218/// value is necessary in order to fit the above forms.
2219static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
2220 MVT VT, SDValue V1, SDValue V2,
2221 SelectionDAG &DAG) {
2222
2223 const auto &Begin = Mask.begin();
2224 const auto &End = Mask.end();
2225 SDValue OriV1 = V1, OriV2 = V2;
2226
2227 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2228 V1 = OriV1;
2229 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2230 V1 = OriV2;
2231 else
2232 return SDValue();
2233
2234 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2235 V2 = OriV1;
2236 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2237 V2 = OriV2;
2238 else
2239 return SDValue();
2240
2241 return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
2242}
2243
2244/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2245///
2246/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2247/// of each vector.
2248///
2249/// It is possible to lower into VILVH when the mask consists of two of the
2250/// following forms interleaved:
2251/// <x, x+1, x+2, ...>
2252/// <n+x, n+x+1, n+x+2, ...>
2253/// where n is the number of elements in the vector and x is half n.
2254/// For example:
2255/// <x, x, x+1, x+1, x+2, x+2, ...>
2256/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2257///
2258/// When undef's appear in the mask they are treated as if they were whatever
2259/// value is necessary in order to fit the above forms.
2260static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
2261 MVT VT, SDValue V1, SDValue V2,
2262 SelectionDAG &DAG) {
2263
2264 const auto &Begin = Mask.begin();
2265 const auto &End = Mask.end();
2266 unsigned HalfSize = Mask.size() / 2;
2267 SDValue OriV1 = V1, OriV2 = V2;
2268
2269 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2270 V1 = OriV1;
2271 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2272 V1 = OriV2;
2273 else
2274 return SDValue();
2275
2276 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2277 V2 = OriV1;
2278 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize,
2279 ExpectedIndexStride: 1))
2280 V2 = OriV2;
2281 else
2282 return SDValue();
2283
2284 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2285}
2286
2287/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2288///
2289/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2290/// of each vector.
2291///
2292/// It is possible to lower into VILVL when the mask consists of two of the
2293/// following forms interleaved:
2294/// <0, 1, 2, ...>
2295/// <n, n+1, n+2, ...>
2296/// where n is the number of elements in the vector.
2297/// For example:
2298/// <0, 0, 1, 1, 2, 2, ...>
2299/// <0, n, 1, n+1, 2, n+2, ...>
2300///
2301/// When undef's appear in the mask they are treated as if they were whatever
2302/// value is necessary in order to fit the above forms.
2303static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
2304 MVT VT, SDValue V1, SDValue V2,
2305 SelectionDAG &DAG) {
2306
2307 const auto &Begin = Mask.begin();
2308 const auto &End = Mask.end();
2309 SDValue OriV1 = V1, OriV2 = V2;
2310
2311 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2312 V1 = OriV1;
2313 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
2314 V1 = OriV2;
2315 else
2316 return SDValue();
2317
2318 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2319 V2 = OriV1;
2320 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
2321 V2 = OriV2;
2322 else
2323 return SDValue();
2324
2325 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2326}
2327
2328/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2329///
2330/// VPICKEV copies the even elements of each vector into the result vector.
2331///
2332/// It is possible to lower into VPICKEV when the mask consists of two of the
2333/// following forms concatenated:
2334/// <0, 2, 4, ...>
2335/// <n, n+2, n+4, ...>
2336/// where n is the number of elements in the vector.
2337/// For example:
2338/// <0, 2, 4, ..., 0, 2, 4, ...>
2339/// <0, 2, 4, ..., n, n+2, n+4, ...>
2340///
2341/// When undef's appear in the mask they are treated as if they were whatever
2342/// value is necessary in order to fit the above forms.
2343static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2344 MVT VT, SDValue V1, SDValue V2,
2345 SelectionDAG &DAG) {
2346
2347 const auto &Begin = Mask.begin();
2348 const auto &Mid = Mask.begin() + Mask.size() / 2;
2349 const auto &End = Mask.end();
2350 SDValue OriV1 = V1, OriV2 = V2;
2351
2352 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
2353 V1 = OriV1;
2354 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2355 V1 = OriV2;
2356 else
2357 return SDValue();
2358
2359 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2360 V2 = OriV1;
2361 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2362 V2 = OriV2;
2363
2364 else
2365 return SDValue();
2366
2367 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2368}
2369
2370/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2371///
2372/// VPICKOD copies the odd elements of each vector into the result vector.
2373///
2374/// It is possible to lower into VPICKOD when the mask consists of two of the
2375/// following forms concatenated:
2376/// <1, 3, 5, ...>
2377/// <n+1, n+3, n+5, ...>
2378/// where n is the number of elements in the vector.
2379/// For example:
2380/// <1, 3, 5, ..., 1, 3, 5, ...>
2381/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2382///
2383/// When undef's appear in the mask they are treated as if they were whatever
2384/// value is necessary in order to fit the above forms.
2385static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2386 MVT VT, SDValue V1, SDValue V2,
2387 SelectionDAG &DAG) {
2388
2389 const auto &Begin = Mask.begin();
2390 const auto &Mid = Mask.begin() + Mask.size() / 2;
2391 const auto &End = Mask.end();
2392 SDValue OriV1 = V1, OriV2 = V2;
2393
2394 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
2395 V1 = OriV1;
2396 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2397 V1 = OriV2;
2398 else
2399 return SDValue();
2400
2401 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2402 V2 = OriV1;
2403 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2404 V2 = OriV2;
2405 else
2406 return SDValue();
2407
2408 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2409}
2410
2411/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2412///
2413/// VEXTRINS copies one element of a vector into any place of the result
2414/// vector and makes no change to the rest elements of the result vector.
2415///
2416/// It is possible to lower into VEXTRINS when the mask takes the form:
2417/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2418/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2419/// where n is the number of elements in the vector and i is in [0, n).
2420/// For example:
2421/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2422/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2423///
2424/// When undef's appear in the mask they are treated as if they were whatever
2425/// value is necessary in order to fit the above forms.
2426static SDValue
2427lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2428 SDValue V1, SDValue V2, SelectionDAG &DAG,
2429 const LoongArchSubtarget &Subtarget) {
2430 unsigned NumElts = VT.getVectorNumElements();
2431 MVT EltVT = VT.getVectorElementType();
2432 MVT GRLenVT = Subtarget.getGRLenVT();
2433
2434 if (Mask.size() != NumElts)
2435 return SDValue();
2436
2437 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2438 int DiffCount = 0;
2439 int DiffPos = -1;
2440 for (unsigned i = 0; i < NumElts; ++i) {
2441 if (Mask[i] == -1)
2442 continue;
2443 if (Mask[i] != int(Base + i)) {
2444 ++DiffCount;
2445 DiffPos = int(i);
2446 if (DiffCount > 1)
2447 return SDValue();
2448 }
2449 }
2450
2451 // Need exactly one differing element to lower into VEXTRINS.
2452 if (DiffCount != 1)
2453 return SDValue();
2454
2455 // DiffMask must be in [0, 2N).
2456 int DiffMask = Mask[DiffPos];
2457 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2458 return SDValue();
2459
2460 // Determine source vector and source index.
2461 SDValue SrcVec;
2462 unsigned SrcIdx;
2463 if (unsigned(DiffMask) < NumElts) {
2464 SrcVec = V1;
2465 SrcIdx = unsigned(DiffMask);
2466 } else {
2467 SrcVec = V2;
2468 SrcIdx = unsigned(DiffMask) - NumElts;
2469 }
2470
2471 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2472 // patterns of VEXTRINS in tablegen.
2473 SDValue Extracted = DAG.getNode(
2474 Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2475 N1: SrcVec, N2: DAG.getConstant(Val: SrcIdx, DL, VT: GRLenVT));
2476 SDValue Result =
2477 DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: (Base == 0) ? V1 : V2,
2478 N2: Extracted, N3: DAG.getConstant(Val: DiffPos, DL, VT: GRLenVT));
2479
2480 return Result;
2481 };
2482
2483 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2484 if (SDValue Result = tryLowerToExtrAndIns(0))
2485 return Result;
2486 return tryLowerToExtrAndIns(NumElts);
2487}
2488
2489// Check the Mask and then build SrcVec and MaskImm infos which will
2490// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2491// On success, return true. Otherwise, return false.
2492static bool buildVPERMIInfo(ArrayRef<int> Mask, SDValue V1, SDValue V2,
2493 SmallVectorImpl<SDValue> &SrcVec,
2494 unsigned &MaskImm) {
2495 unsigned MaskSize = Mask.size();
2496
2497 auto isValid = [&](int M, int Off) {
2498 return (M == -1) || (M >= Off && M < Off + 4);
2499 };
2500
2501 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2502 auto immPart = [&](int M, unsigned Off) {
2503 return (M == -1 ? 0 : (M - Off)) & 0x3;
2504 };
2505 MaskImm |= immPart(MLo, Off) << (I * 2);
2506 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2507 };
2508
2509 for (unsigned i = 0; i < 4; i += 2) {
2510 int MLo = Mask[i];
2511 int MHi = Mask[i + 1];
2512
2513 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2514 int M2Lo = Mask[i + 4];
2515 int M2Hi = Mask[i + 5];
2516 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2517 return false;
2518 }
2519
2520 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2521 SrcVec.push_back(Elt: V1);
2522 buildImm(MLo, MHi, 0, i);
2523 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2524 SrcVec.push_back(Elt: V2);
2525 buildImm(MLo, MHi, MaskSize, i);
2526 } else {
2527 return false;
2528 }
2529 }
2530
2531 return true;
2532}
2533
2534/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2535///
2536/// VPERMI selects two elements from each of the two vectors based on the
2537/// mask and places them in the corresponding positions of the result vector
2538/// in order. Only v4i32 and v4f32 types are allowed.
2539///
2540/// It is possible to lower into VPERMI when the mask consists of two of the
2541/// following forms concatenated:
2542/// <i, j, u, v>
2543/// <u, v, i, j>
2544/// where i,j are in [0,4) and u,v are in [4, 8).
2545/// For example:
2546/// <2, 3, 4, 5>
2547/// <5, 7, 0, 2>
2548///
2549/// When undef's appear in the mask they are treated as if they were whatever
2550/// value is necessary in order to fit the above forms.
2551static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef<int> Mask,
2552 MVT VT, SDValue V1, SDValue V2,
2553 SelectionDAG &DAG,
2554 const LoongArchSubtarget &Subtarget) {
2555 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2556 Mask.size() != VT.getVectorNumElements())
2557 return SDValue();
2558
2559 SmallVector<SDValue, 2> SrcVec;
2560 unsigned MaskImm = 0;
2561 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2562 return SDValue();
2563
2564 return DAG.getNode(Opcode: LoongArchISD::VPERMI, DL, VT, N1: SrcVec[1], N2: SrcVec[0],
2565 N3: DAG.getConstant(Val: MaskImm, DL, VT: Subtarget.getGRLenVT()));
2566}
2567
2568/// Lower VECTOR_SHUFFLE into VSHUF.
2569///
2570/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2571/// adding it as an operand to the resulting VSHUF.
2572static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2573 MVT VT, SDValue V1, SDValue V2,
2574 SelectionDAG &DAG,
2575 const LoongArchSubtarget &Subtarget) {
2576
2577 SmallVector<SDValue, 16> Ops;
2578 for (auto M : Mask)
2579 Ops.push_back(Elt: DAG.getSignedConstant(Val: M, DL, VT: Subtarget.getGRLenVT()));
2580
2581 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2582 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
2583
2584 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2585 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2586 // VSHF concatenates the vectors in a bitwise fashion:
2587 // <0b00, 0b01> + <0b10, 0b11> ->
2588 // 0b0100 + 0b1110 -> 0b01001110
2589 // <0b10, 0b11, 0b00, 0b01>
2590 // We must therefore swap the operands to get the correct result.
2591 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2592}
2593
2594/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2595///
2596/// This routine breaks down the specific type of 128-bit shuffle and
2597/// dispatches to the lowering routines accordingly.
2598static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2599 SDValue V1, SDValue V2, SelectionDAG &DAG,
2600 const LoongArchSubtarget &Subtarget) {
2601 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2602 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2603 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2604 "Vector type is unsupported for lsx!");
2605 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2606 "Two operands have different types!");
2607 assert(VT.getVectorNumElements() == Mask.size() &&
2608 "Unexpected mask size for shuffle!");
2609 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2610
2611 APInt KnownUndef, KnownZero;
2612 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2613 APInt Zeroable = KnownUndef | KnownZero;
2614
2615 SDValue Result;
2616 // TODO: Add more comparison patterns.
2617 if (V2.isUndef()) {
2618 if ((Result =
2619 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2620 return Result;
2621 if ((Result =
2622 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2623 return Result;
2624 if ((Result =
2625 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2626 return Result;
2627
2628 // TODO: This comment may be enabled in the future to better match the
2629 // pattern for instruction selection.
2630 /* V2 = V1; */
2631 }
2632
2633 // It is recommended not to change the pattern comparison order for better
2634 // performance.
2635 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2636 return Result;
2637 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2638 return Result;
2639 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2640 return Result;
2641 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2642 return Result;
2643 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2644 return Result;
2645 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2646 return Result;
2647 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2648 (Result =
2649 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2650 return Result;
2651 if ((Result =
2652 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2653 return Result;
2654 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2655 Zeroable)))
2656 return Result;
2657 if ((Result =
2658 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2659 return Result;
2660 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2661 Zeroable)))
2662 return Result;
2663 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2664 Subtarget)))
2665 return Result;
2666 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2667 return NewShuffle;
2668 if ((Result =
2669 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2670 return Result;
2671 return SDValue();
2672}
2673
2674/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2675///
2676/// It is a XVREPLVEI when the mask is:
2677/// <x, x, x, ..., x+n, x+n, x+n, ...>
2678/// where the number of x is equal to n and n is half the length of vector.
2679///
2680/// When undef's appear in the mask they are treated as if they were whatever
2681/// value is necessary in order to fit the above form.
2682static SDValue
2683lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2684 SDValue V1, SelectionDAG &DAG,
2685 const LoongArchSubtarget &Subtarget) {
2686 int SplatIndex = -1;
2687 for (const auto &M : Mask) {
2688 if (M != -1) {
2689 SplatIndex = M;
2690 break;
2691 }
2692 }
2693
2694 if (SplatIndex == -1)
2695 return DAG.getUNDEF(VT);
2696
2697 const auto &Begin = Mask.begin();
2698 const auto &End = Mask.end();
2699 int HalfSize = Mask.size() / 2;
2700
2701 if (SplatIndex >= HalfSize)
2702 return SDValue();
2703
2704 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2705 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) &&
2706 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize,
2707 ExpectedIndexStride: 0)) {
2708 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
2709 N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
2710 }
2711
2712 return SDValue();
2713}
2714
2715/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2716static SDValue
2717lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2718 SDValue V1, SDValue V2, SelectionDAG &DAG,
2719 const LoongArchSubtarget &Subtarget) {
2720 // XVSHUF4I_D must be handled separately because it is different from other
2721 // types of [X]VSHUF4I instructions.
2722 if (Mask.size() == 4) {
2723 unsigned MaskImm = 0;
2724 for (int i = 1; i >= 0; --i) {
2725 int MLo = Mask[i];
2726 int MHi = Mask[i + 2];
2727 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2728 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2729 return SDValue();
2730 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2731 return SDValue();
2732
2733 MaskImm <<= 2;
2734 if (MLo != -1)
2735 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2736 else if (MHi != -1)
2737 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2738 }
2739
2740 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I_D, DL, VT, N1: V1, N2: V2,
2741 N3: DAG.getConstant(Val: MaskImm, DL, VT: Subtarget.getGRLenVT()));
2742 }
2743
2744 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2745}
2746
2747/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2748static SDValue
2749lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2750 SDValue V1, SDValue V2, SelectionDAG &DAG,
2751 const LoongArchSubtarget &Subtarget) {
2752 MVT GRLenVT = Subtarget.getGRLenVT();
2753 unsigned MaskSize = Mask.size();
2754 if (MaskSize != VT.getVectorNumElements())
2755 return SDValue();
2756
2757 // Consider XVPERMI_W.
2758 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2759 SmallVector<SDValue, 2> SrcVec;
2760 unsigned MaskImm = 0;
2761 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2762 return SDValue();
2763
2764 return DAG.getNode(Opcode: LoongArchISD::VPERMI, DL, VT, N1: SrcVec[1], N2: SrcVec[0],
2765 N3: DAG.getConstant(Val: MaskImm, DL, VT: GRLenVT));
2766 }
2767
2768 // Consider XVPERMI_D.
2769 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2770 unsigned MaskImm = 0;
2771 for (unsigned i = 0; i < MaskSize; ++i) {
2772 if (Mask[i] == -1)
2773 continue;
2774 if (Mask[i] >= (int)MaskSize)
2775 return SDValue();
2776 MaskImm |= Mask[i] << (i * 2);
2777 }
2778
2779 return DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT, N1: V1,
2780 N2: DAG.getConstant(Val: MaskImm, DL, VT: GRLenVT));
2781 }
2782
2783 return SDValue();
2784}
2785
2786/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2787static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
2788 MVT VT, SDValue V1, SelectionDAG &DAG,
2789 const LoongArchSubtarget &Subtarget) {
2790 // LoongArch LASX only have XVPERM_W.
2791 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2792 return SDValue();
2793
2794 unsigned NumElts = VT.getVectorNumElements();
2795 unsigned HalfSize = NumElts / 2;
2796 bool FrontLo = true, FrontHi = true;
2797 bool BackLo = true, BackHi = true;
2798
2799 auto inRange = [](int val, int low, int high) {
2800 return (val == -1) || (val >= low && val < high);
2801 };
2802
2803 for (unsigned i = 0; i < HalfSize; ++i) {
2804 int Fronti = Mask[i];
2805 int Backi = Mask[i + HalfSize];
2806
2807 FrontLo &= inRange(Fronti, 0, HalfSize);
2808 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2809 BackLo &= inRange(Backi, 0, HalfSize);
2810 BackHi &= inRange(Backi, HalfSize, NumElts);
2811 }
2812
2813 // If both the lower and upper 128-bit parts access only one half of the
2814 // vector (either lower or upper), avoid using xvperm.w. The latency of
2815 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2816 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2817 return SDValue();
2818
2819 SmallVector<SDValue, 8> Masks;
2820 MVT GRLenVT = Subtarget.getGRLenVT();
2821 for (unsigned i = 0; i < NumElts; ++i)
2822 Masks.push_back(Elt: Mask[i] == -1 ? DAG.getUNDEF(VT: GRLenVT)
2823 : DAG.getConstant(Val: Mask[i], DL, VT: GRLenVT));
2824 SDValue MaskVec = DAG.getBuildVector(VT: MVT::v8i32, DL, Ops: Masks);
2825
2826 return DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT, N1: V1, N2: MaskVec);
2827}
2828
2829/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2830static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
2831 MVT VT, SDValue V1, SDValue V2,
2832 SelectionDAG &DAG) {
2833 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2834}
2835
2836/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2837static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
2838 MVT VT, SDValue V1, SDValue V2,
2839 SelectionDAG &DAG) {
2840 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2841}
2842
2843/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2844static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
2845 MVT VT, SDValue V1, SDValue V2,
2846 SelectionDAG &DAG) {
2847
2848 const auto &Begin = Mask.begin();
2849 const auto &End = Mask.end();
2850 unsigned HalfSize = Mask.size() / 2;
2851 unsigned LeftSize = HalfSize / 2;
2852 SDValue OriV1 = V1, OriV2 = V2;
2853
2854 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2855 ExpectedIndexStride: 1) &&
2856 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1))
2857 V1 = OriV1;
2858 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize,
2859 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
2860 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
2861 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
2862 V1 = OriV2;
2863 else
2864 return SDValue();
2865
2866 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2867 ExpectedIndexStride: 1) &&
2868 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize,
2869 ExpectedIndexStride: 1))
2870 V2 = OriV1;
2871 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize,
2872 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
2873 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
2874 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
2875 V2 = OriV2;
2876 else
2877 return SDValue();
2878
2879 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2880}
2881
2882/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2883static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
2884 MVT VT, SDValue V1, SDValue V2,
2885 SelectionDAG &DAG) {
2886
2887 const auto &Begin = Mask.begin();
2888 const auto &End = Mask.end();
2889 unsigned HalfSize = Mask.size() / 2;
2890 SDValue OriV1 = V1, OriV2 = V2;
2891
2892 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
2893 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2894 V1 = OriV1;
2895 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) &&
2896 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
2897 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2898 V1 = OriV2;
2899 else
2900 return SDValue();
2901
2902 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
2903 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2904 V2 = OriV1;
2905 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(),
2906 ExpectedIndexStride: 1) &&
2907 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
2908 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2909 V2 = OriV2;
2910 else
2911 return SDValue();
2912
2913 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2914}
2915
2916/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2917static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2918 MVT VT, SDValue V1, SDValue V2,
2919 SelectionDAG &DAG) {
2920
2921 const auto &Begin = Mask.begin();
2922 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2923 const auto &Mid = Mask.begin() + Mask.size() / 2;
2924 const auto &RightMid = Mask.end() - Mask.size() / 4;
2925 const auto &End = Mask.end();
2926 unsigned HalfSize = Mask.size() / 2;
2927 SDValue OriV1 = V1, OriV2 = V2;
2928
2929 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
2930 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
2931 V1 = OriV1;
2932 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
2933 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
2934 V1 = OriV2;
2935 else
2936 return SDValue();
2937
2938 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
2939 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
2940 V2 = OriV1;
2941 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
2942 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
2943 V2 = OriV2;
2944
2945 else
2946 return SDValue();
2947
2948 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2949}
2950
2951/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2952static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2953 MVT VT, SDValue V1, SDValue V2,
2954 SelectionDAG &DAG) {
2955
2956 const auto &Begin = Mask.begin();
2957 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2958 const auto &Mid = Mask.begin() + Mask.size() / 2;
2959 const auto &RightMid = Mask.end() - Mask.size() / 4;
2960 const auto &End = Mask.end();
2961 unsigned HalfSize = Mask.size() / 2;
2962 SDValue OriV1 = V1, OriV2 = V2;
2963
2964 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
2965 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
2966 V1 = OriV1;
2967 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
2968 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1,
2969 ExpectedIndexStride: 2))
2970 V1 = OriV2;
2971 else
2972 return SDValue();
2973
2974 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
2975 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
2976 V2 = OriV1;
2977 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
2978 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1,
2979 ExpectedIndexStride: 2))
2980 V2 = OriV2;
2981 else
2982 return SDValue();
2983
2984 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2985}
2986
2987/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2988static SDValue
2989lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2990 SDValue V1, SDValue V2, SelectionDAG &DAG,
2991 const LoongArchSubtarget &Subtarget) {
2992 int NumElts = VT.getVectorNumElements();
2993 int HalfSize = NumElts / 2;
2994 MVT EltVT = VT.getVectorElementType();
2995 MVT GRLenVT = Subtarget.getGRLenVT();
2996
2997 if ((int)Mask.size() != NumElts)
2998 return SDValue();
2999
3000 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
3001 SmallVector<int> DiffPos;
3002 for (int i = 0; i < NumElts; ++i) {
3003 if (Mask[i] == -1)
3004 continue;
3005 if (Mask[i] != Base + i) {
3006 DiffPos.push_back(Elt: i);
3007 if (DiffPos.size() > 2)
3008 return SDValue();
3009 }
3010 }
3011
3012 // Need exactly two differing element to lower into XVEXTRINS.
3013 // If only one differing element, the element at a distance of
3014 // HalfSize from it must be undef.
3015 if (DiffPos.size() == 1) {
3016 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
3017 DiffPos.push_back(Elt: DiffPos[0] + HalfSize);
3018 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
3019 DiffPos.insert(I: DiffPos.begin(), Elt: DiffPos[0] - HalfSize);
3020 else
3021 return SDValue();
3022 }
3023 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
3024 return SDValue();
3025
3026 // DiffMask must be in its low or high part.
3027 int DiffMaskLo = Mask[DiffPos[0]];
3028 int DiffMaskHi = Mask[DiffPos[1]];
3029 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
3030 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
3031 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
3032 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
3033 return SDValue();
3034 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
3035 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
3036 return SDValue();
3037 if (DiffMaskHi != DiffMaskLo + HalfSize)
3038 return SDValue();
3039
3040 // Determine source vector and source index.
3041 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
3042 int SrcIdxLo =
3043 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
3044 bool IsEltFP = EltVT.isFloatingPoint();
3045
3046 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
3047 // the patterns of XVEXTRINS in tablegen.
3048 SDValue BaseVec = (Base == 0) ? V1 : V2;
3049 SDValue EltLo =
3050 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IsEltFP ? EltVT : GRLenVT,
3051 N1: SrcVec, N2: DAG.getConstant(Val: SrcIdxLo, DL, VT: GRLenVT));
3052 SDValue InsLo = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: BaseVec, N2: EltLo,
3053 N3: DAG.getConstant(Val: DiffPos[0], DL, VT: GRLenVT));
3054 SDValue EltHi =
3055 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IsEltFP ? EltVT : GRLenVT,
3056 N1: SrcVec, N2: DAG.getConstant(Val: SrcIdxLo + HalfSize, DL, VT: GRLenVT));
3057 SDValue Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: InsLo, N2: EltHi,
3058 N3: DAG.getConstant(Val: DiffPos[1], DL, VT: GRLenVT));
3059
3060 return Result;
3061 };
3062
3063 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3064 if (SDValue Result = tryLowerToExtrAndIns(0))
3065 return Result;
3066 return tryLowerToExtrAndIns(NumElts);
3067}
3068
3069/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3070static SDValue
3071lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
3072 SDValue V1, SDValue V2, SelectionDAG &DAG,
3073 const LoongArchSubtarget &Subtarget) {
3074 // LoongArch LASX only supports xvinsve0.{w/d}.
3075 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3076 VT != MVT::v4f64)
3077 return SDValue();
3078
3079 MVT GRLenVT = Subtarget.getGRLenVT();
3080 int MaskSize = Mask.size();
3081 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3082
3083 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3084 // all other elements are either 'Base + i' or undef (-1). On success, return
3085 // the index of the replaced element. Otherwise, just return -1.
3086 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3087 int Idx = -1;
3088 for (int i = 0; i < MaskSize; ++i) {
3089 if (Mask[i] == Base + i || Mask[i] == -1)
3090 continue;
3091 if (Mask[i] != Replaced)
3092 return -1;
3093 if (Idx == -1)
3094 Idx = i;
3095 else
3096 return -1;
3097 }
3098 return Idx;
3099 };
3100
3101 // Case 1: the lowest element of V2 replaces one element in V1.
3102 int Idx = checkReplaceOne(0, MaskSize);
3103 if (Idx != -1)
3104 return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V1, N2: V2,
3105 N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
3106
3107 // Case 2: the lowest element of V1 replaces one element in V2.
3108 Idx = checkReplaceOne(MaskSize, 0);
3109 if (Idx != -1)
3110 return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V2, N2: V1,
3111 N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
3112
3113 return SDValue();
3114}
3115
3116/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3117static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
3118 MVT VT, SDValue V1, SDValue V2,
3119 SelectionDAG &DAG) {
3120
3121 int MaskSize = Mask.size();
3122 int HalfSize = Mask.size() / 2;
3123 const auto &Begin = Mask.begin();
3124 const auto &Mid = Mask.begin() + HalfSize;
3125 const auto &End = Mask.end();
3126
3127 // VECTOR_SHUFFLE concatenates the vectors:
3128 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3129 // shuffling ->
3130 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3131 //
3132 // XVSHUF concatenates the vectors:
3133 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3134 // shuffling ->
3135 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3136 SmallVector<SDValue, 8> MaskAlloc;
3137 for (auto it = Begin; it < Mid; it++) {
3138 if (*it < 0) // UNDEF
3139 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
3140 else if ((*it >= 0 && *it < HalfSize) ||
3141 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3142 int M = *it < HalfSize ? *it : *it - HalfSize;
3143 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
3144 } else
3145 return SDValue();
3146 }
3147 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3148
3149 for (auto it = Mid; it < End; it++) {
3150 if (*it < 0) // UNDEF
3151 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
3152 else if ((*it >= HalfSize && *it < MaskSize) ||
3153 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3154 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3155 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
3156 } else
3157 return SDValue();
3158 }
3159 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3160
3161 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3162 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
3163 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
3164}
3165
3166/// Shuffle vectors by lane to generate more optimized instructions.
3167/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3168///
3169/// Therefore, except for the following four cases, other cases are regarded
3170/// as cross-lane shuffles, where optimization is relatively limited.
3171///
3172/// - Shuffle high, low lanes of two inputs vector
3173/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3174/// - Shuffle low, high lanes of two inputs vector
3175/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3176/// - Shuffle low, low lanes of two inputs vector
3177/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3178/// - Shuffle high, high lanes of two inputs vector
3179/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3180///
3181/// The first case is the closest to LoongArch instructions and the other
3182/// cases need to be converted to it for processing.
3183///
3184/// This function will return true for the last three cases above and will
3185/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3186/// cross-lane shuffle cases.
3187static bool canonicalizeShuffleVectorByLane(
3188 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3189 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3190
3191 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3192
3193 int MaskSize = Mask.size();
3194 int HalfSize = Mask.size() / 2;
3195 MVT GRLenVT = Subtarget.getGRLenVT();
3196
3197 HalfMaskType preMask = None, postMask = None;
3198
3199 if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
3200 return M < 0 || (M >= 0 && M < HalfSize) ||
3201 (M >= MaskSize && M < MaskSize + HalfSize);
3202 }))
3203 preMask = HighLaneTy;
3204 else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
3205 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3206 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3207 }))
3208 preMask = LowLaneTy;
3209
3210 if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
3211 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3212 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3213 }))
3214 postMask = LowLaneTy;
3215 else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
3216 return M < 0 || (M >= 0 && M < HalfSize) ||
3217 (M >= MaskSize && M < MaskSize + HalfSize);
3218 }))
3219 postMask = HighLaneTy;
3220
3221 // The pre-half of mask is high lane type, and the post-half of mask
3222 // is low lane type, which is closest to the LoongArch instructions.
3223 //
3224 // Note: In the LoongArch architecture, the high lane of mask corresponds
3225 // to the lower 128-bit of vector register, and the low lane of mask
3226 // corresponds the higher 128-bit of vector register.
3227 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3228 return false;
3229 }
3230 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3231 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
3232 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
3233 N2: DAG.getConstant(Val: 0b01001110, DL, VT: GRLenVT));
3234 V1 = DAG.getBitcast(VT, V: V1);
3235
3236 if (!V2.isUndef()) {
3237 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
3238 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
3239 N2: DAG.getConstant(Val: 0b01001110, DL, VT: GRLenVT));
3240 V2 = DAG.getBitcast(VT, V: V2);
3241 }
3242
3243 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3244 *it = *it < 0 ? *it : *it - HalfSize;
3245 }
3246 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3247 *it = *it < 0 ? *it : *it + HalfSize;
3248 }
3249 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3250 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
3251 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
3252 N2: DAG.getConstant(Val: 0b11101110, DL, VT: GRLenVT));
3253 V1 = DAG.getBitcast(VT, V: V1);
3254
3255 if (!V2.isUndef()) {
3256 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
3257 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
3258 N2: DAG.getConstant(Val: 0b11101110, DL, VT: GRLenVT));
3259 V2 = DAG.getBitcast(VT, V: V2);
3260 }
3261
3262 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3263 *it = *it < 0 ? *it : *it - HalfSize;
3264 }
3265 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3266 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
3267 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
3268 N2: DAG.getConstant(Val: 0b01000100, DL, VT: GRLenVT));
3269 V1 = DAG.getBitcast(VT, V: V1);
3270
3271 if (!V2.isUndef()) {
3272 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
3273 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
3274 N2: DAG.getConstant(Val: 0b01000100, DL, VT: GRLenVT));
3275 V2 = DAG.getBitcast(VT, V: V2);
3276 }
3277
3278 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3279 *it = *it < 0 ? *it : *it + HalfSize;
3280 }
3281 } else { // cross-lane
3282 return false;
3283 }
3284
3285 return true;
3286}
3287
3288/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3289/// Only for 256-bit vector.
3290///
3291/// For example:
3292/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3293/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3294/// is lowerded to:
3295/// (XVPERMI $xr2, $xr0, 78)
3296/// (XVSHUF $xr1, $xr2, $xr0)
3297/// (XVORI $xr0, $xr1, 0)
3298static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
3299 ArrayRef<int> Mask,
3300 MVT VT, SDValue V1,
3301 SDValue V2,
3302 SelectionDAG &DAG) {
3303 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3304 int Size = Mask.size();
3305 int LaneSize = Size / 2;
3306
3307 bool LaneCrossing[2] = {false, false};
3308 for (int i = 0; i < Size; ++i)
3309 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3310 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3311
3312 // Ensure that all lanes ared involved.
3313 if (!LaneCrossing[0] && !LaneCrossing[1])
3314 return SDValue();
3315
3316 SmallVector<int> InLaneMask;
3317 InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end());
3318 for (int i = 0; i < Size; ++i) {
3319 int &M = InLaneMask[i];
3320 if (M < 0)
3321 continue;
3322 if (((M % Size) / LaneSize) != (i / LaneSize))
3323 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3324 }
3325
3326 SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1);
3327 Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped,
3328 N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {2, 3, 0, 1});
3329 Flipped = DAG.getBitcast(VT, V: Flipped);
3330 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask);
3331}
3332
3333/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3334///
3335/// This routine breaks down the specific type of 256-bit shuffle and
3336/// dispatches to the lowering routines accordingly.
3337static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
3338 SDValue V1, SDValue V2, SelectionDAG &DAG,
3339 const LoongArchSubtarget &Subtarget) {
3340 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3341 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3342 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3343 "Vector type is unsupported for lasx!");
3344 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
3345 "Two operands have different types!");
3346 assert(VT.getVectorNumElements() == Mask.size() &&
3347 "Unexpected mask size for shuffle!");
3348 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3349 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3350
3351 APInt KnownUndef, KnownZero;
3352 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3353 APInt Zeroable = KnownUndef | KnownZero;
3354
3355 SDValue Result;
3356 // TODO: Add more comparison patterns.
3357 if (V2.isUndef()) {
3358 if ((Result =
3359 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3360 return Result;
3361 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3362 Subtarget)))
3363 return Result;
3364 // Try to widen vectors to gain more optimization opportunities.
3365 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3366 return NewShuffle;
3367 if ((Result =
3368 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3369 return Result;
3370 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3371 return Result;
3372 if ((Result =
3373 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3374 return Result;
3375
3376 // TODO: This comment may be enabled in the future to better match the
3377 // pattern for instruction selection.
3378 /* V2 = V1; */
3379 }
3380
3381 // It is recommended not to change the pattern comparison order for better
3382 // performance.
3383 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3384 return Result;
3385 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3386 return Result;
3387 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3388 return Result;
3389 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3390 return Result;
3391 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3392 return Result;
3393 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3394 return Result;
3395 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3396 (Result =
3397 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3398 return Result;
3399 if ((Result =
3400 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3401 return Result;
3402 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3403 Zeroable)))
3404 return Result;
3405 if ((Result =
3406 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3407 return Result;
3408 if ((Result =
3409 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3410 return Result;
3411 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3412 Subtarget)))
3413 return Result;
3414
3415 // canonicalize non cross-lane shuffle vector
3416 SmallVector<int> NewMask(Mask);
3417 if (canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget))
3418 return lower256BitShuffle(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget);
3419
3420 // FIXME: Handling the remaining cases earlier can degrade performance
3421 // in some situations. Further analysis is required to enable more
3422 // effective optimizations.
3423 if (V2.isUndef()) {
3424 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT,
3425 V1, V2, DAG)))
3426 return Result;
3427 }
3428
3429 if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG))
3430 return NewShuffle;
3431 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
3432 return Result;
3433
3434 return SDValue();
3435}
3436
3437SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3438 SelectionDAG &DAG) const {
3439 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
3440 ArrayRef<int> OrigMask = SVOp->getMask();
3441 SDValue V1 = Op.getOperand(i: 0);
3442 SDValue V2 = Op.getOperand(i: 1);
3443 MVT VT = Op.getSimpleValueType();
3444 int NumElements = VT.getVectorNumElements();
3445 SDLoc DL(Op);
3446
3447 bool V1IsUndef = V1.isUndef();
3448 bool V2IsUndef = V2.isUndef();
3449 if (V1IsUndef && V2IsUndef)
3450 return DAG.getUNDEF(VT);
3451
3452 // When we create a shuffle node we put the UNDEF node to second operand,
3453 // but in some cases the first operand may be transformed to UNDEF.
3454 // In this case we should just commute the node.
3455 if (V1IsUndef)
3456 return DAG.getCommutedVectorShuffle(SV: *SVOp);
3457
3458 // Check for non-undef masks pointing at an undef vector and make the masks
3459 // undef as well. This makes it easier to match the shuffle based solely on
3460 // the mask.
3461 if (V2IsUndef &&
3462 any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
3463 SmallVector<int, 8> NewMask(OrigMask);
3464 for (int &M : NewMask)
3465 if (M >= NumElements)
3466 M = -1;
3467 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
3468 }
3469
3470 // Check for illegal shuffle mask element index values.
3471 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3472 (void)MaskUpperLimit;
3473 assert(llvm::all_of(OrigMask,
3474 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3475 "Out of bounds shuffle index");
3476
3477 // For each vector width, delegate to a specialized lowering routine.
3478 if (VT.is128BitVector())
3479 return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
3480
3481 if (VT.is256BitVector())
3482 return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
3483
3484 return SDValue();
3485}
3486
3487SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3488 SelectionDAG &DAG) const {
3489 // Custom lower to ensure the libcall return is passed in an FPR on hard
3490 // float ABIs.
3491 SDLoc DL(Op);
3492 MakeLibCallOptions CallOptions;
3493 SDValue Op0 = Op.getOperand(i: 0);
3494 SDValue Chain = SDValue();
3495 RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
3496 SDValue Res;
3497 std::tie(args&: Res, args&: Chain) =
3498 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
3499 if (Subtarget.is64Bit())
3500 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
3501 return DAG.getBitcast(VT: MVT::i32, V: Res);
3502}
3503
3504SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3505 SelectionDAG &DAG) const {
3506 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3507 // float ABIs.
3508 SDLoc DL(Op);
3509 MakeLibCallOptions CallOptions;
3510 SDValue Op0 = Op.getOperand(i: 0);
3511 SDValue Chain = SDValue();
3512 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
3513 DL, VT: MVT::f32, Operand: Op0)
3514 : DAG.getBitcast(VT: MVT::f32, V: Op0);
3515 SDValue Res;
3516 std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
3517 CallOptions, dl: DL, Chain);
3518 return Res;
3519}
3520
3521SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3522 SelectionDAG &DAG) const {
3523 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3524 SDLoc DL(Op);
3525 MakeLibCallOptions CallOptions;
3526 RTLIB::Libcall LC =
3527 RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::bf16);
3528 SDValue Res =
3529 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first;
3530 if (Subtarget.is64Bit())
3531 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
3532 return DAG.getBitcast(VT: MVT::i32, V: Res);
3533}
3534
3535SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3536 SelectionDAG &DAG) const {
3537 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3538 MVT VT = Op.getSimpleValueType();
3539 SDLoc DL(Op);
3540 Op = DAG.getNode(
3541 Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0),
3542 N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL));
3543 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
3544 DL, VT: MVT::f32, Operand: Op)
3545 : DAG.getBitcast(VT: MVT::f32, V: Op);
3546 if (VT != MVT::f32)
3547 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
3548 return Res;
3549}
3550
3551// Lower BUILD_VECTOR as broadcast load (if possible).
3552// For example:
3553// %a = load i8, ptr %ptr
3554// %b = build_vector %a, %a, %a, %a
3555// is lowered to :
3556// (VLDREPL_B $a0, 0)
3557static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
3558 const SDLoc &DL,
3559 SelectionDAG &DAG) {
3560 MVT VT = BVOp->getSimpleValueType(ResNo: 0);
3561 int NumOps = BVOp->getNumOperands();
3562
3563 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3564 "Unsupported vector type for broadcast.");
3565
3566 SDValue IdentitySrc;
3567 bool IsIdeneity = true;
3568
3569 for (int i = 0; i != NumOps; i++) {
3570 SDValue Op = BVOp->getOperand(Num: i);
3571 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3572 IsIdeneity = false;
3573 break;
3574 }
3575 IdentitySrc = BVOp->getOperand(Num: 0);
3576 }
3577
3578 // make sure that this load is valid and only has one user.
3579 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(N: IdentitySrc.getNode()))
3580 return SDValue();
3581
3582 auto *LN = cast<LoadSDNode>(Val&: IdentitySrc);
3583 auto ExtType = LN->getExtensionType();
3584
3585 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3586 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3587 // Indexed loads and stores are not supported on LoongArch.
3588 assert(LN->isUnindexed() && "Unexpected indexed load.");
3589
3590 SDVTList Tys = DAG.getVTList(VT1: VT, VT2: MVT::Other);
3591 // The offset operand of unindexed load is always undefined, so there is
3592 // no need to pass it to VLDREPL.
3593 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3594 SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops);
3595 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN, 1), To: BCast.getValue(R: 1));
3596 return BCast;
3597 }
3598 return SDValue();
3599}
3600
3601// Sequentially insert elements from Ops into Vector, from low to high indices.
3602// Note: Ops can have fewer elements than Vector.
3603static void fillVector(ArrayRef<SDValue> Ops, SelectionDAG &DAG, SDLoc DL,
3604 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3605 EVT ResTy) {
3606 assert(Ops.size() <= ResTy.getVectorNumElements());
3607
3608 SDValue Op0 = Ops[0];
3609 if (!Op0.isUndef())
3610 Vector = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: ResTy, Operand: Op0);
3611 for (unsigned i = 1; i < Ops.size(); ++i) {
3612 SDValue Opi = Ops[i];
3613 if (Opi.isUndef())
3614 continue;
3615 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, N2: Opi,
3616 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3617 }
3618}
3619
3620// Build a ResTy subvector from Node, taking NumElts elements starting at index
3621// 'first'.
3622static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
3623 SelectionDAG &DAG, SDLoc DL,
3624 const LoongArchSubtarget &Subtarget,
3625 EVT ResTy, unsigned first) {
3626 unsigned NumElts = ResTy.getVectorNumElements();
3627
3628 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3629
3630 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3631 Node->op_begin() + first + NumElts);
3632 SDValue Vector = DAG.getUNDEF(VT: ResTy);
3633 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3634 return Vector;
3635}
3636
3637SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3638 SelectionDAG &DAG) const {
3639 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
3640 MVT VT = Node->getSimpleValueType(ResNo: 0);
3641 EVT ResTy = Op->getValueType(ResNo: 0);
3642 unsigned NumElts = ResTy.getVectorNumElements();
3643 SDLoc DL(Op);
3644 APInt SplatValue, SplatUndef;
3645 unsigned SplatBitSize;
3646 bool HasAnyUndefs;
3647 bool IsConstant = false;
3648 bool UseSameConstant = true;
3649 SDValue ConstantValue;
3650 bool Is128Vec = ResTy.is128BitVector();
3651 bool Is256Vec = ResTy.is256BitVector();
3652
3653 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3654 (!Subtarget.hasExtLASX() || !Is256Vec))
3655 return SDValue();
3656
3657 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG))
3658 return Result;
3659
3660 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3661 /*MinSplatBits=*/8) &&
3662 SplatBitSize <= 64) {
3663 // We can only cope with 8, 16, 32, or 64-bit elements.
3664 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3665 SplatBitSize != 64)
3666 return SDValue();
3667
3668 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3669 // We can only handle 64-bit elements that are within
3670 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3671 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3672 if (!SplatValue.isSignedIntN(N: 10) &&
3673 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3674 return SDValue();
3675 if ((Is128Vec && ResTy == MVT::v4i32) ||
3676 (Is256Vec && ResTy == MVT::v8i32))
3677 return Op;
3678 }
3679
3680 EVT ViaVecTy;
3681
3682 switch (SplatBitSize) {
3683 default:
3684 return SDValue();
3685 case 8:
3686 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3687 break;
3688 case 16:
3689 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3690 break;
3691 case 32:
3692 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3693 break;
3694 case 64:
3695 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3696 break;
3697 }
3698
3699 // SelectionDAG::getConstant will promote SplatValue appropriately.
3700 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
3701
3702 // Bitcast to the type we originally wanted.
3703 if (ViaVecTy != ResTy)
3704 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
3705
3706 return Result;
3707 }
3708
3709 if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false))
3710 return Op;
3711
3712 for (unsigned i = 0; i < NumElts; ++i) {
3713 SDValue Opi = Node->getOperand(Num: i);
3714 if (isIntOrFPConstant(V: Opi)) {
3715 IsConstant = true;
3716 if (!ConstantValue.getNode())
3717 ConstantValue = Opi;
3718 else if (ConstantValue != Opi)
3719 UseSameConstant = false;
3720 }
3721 }
3722
3723 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3724 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3725 SDValue Result = DAG.getSplatBuildVector(VT: ResTy, DL, Op: ConstantValue);
3726 for (unsigned i = 0; i < NumElts; ++i) {
3727 SDValue Opi = Node->getOperand(Num: i);
3728 if (!isIntOrFPConstant(V: Opi))
3729 Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Result, N2: Opi,
3730 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3731 }
3732 return Result;
3733 }
3734
3735 if (!IsConstant) {
3736 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3737 // the sub-sequence of the vector and then broadcast the sub-sequence.
3738 //
3739 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3740 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3741 // generates worse code in some cases. This could be further optimized
3742 // with more consideration.
3743 SmallVector<SDValue> Sequence;
3744 BitVector UndefElements;
3745 if (Node->getRepeatedSequence(Sequence, UndefElements: &UndefElements) &&
3746 UndefElements.count() == 0) {
3747 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3748 // because the high part can be simply treated as undef.
3749 SDValue Vector = DAG.getUNDEF(VT: ResTy);
3750 EVT FillTy = Is256Vec
3751 ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext())
3752 : ResTy;
3753 SDValue FillVec =
3754 Is256Vec ? DAG.getExtractSubvector(DL, VT: FillTy, Vec: Vector, Idx: 0) : Vector;
3755
3756 fillVector(Ops: Sequence, DAG, DL, Subtarget, Vector&: FillVec, ResTy: FillTy);
3757
3758 unsigned SeqLen = Sequence.size();
3759 unsigned SplatLen = NumElts / SeqLen;
3760 MVT SplatEltTy = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits() * SeqLen);
3761 MVT SplatTy = MVT::getVectorVT(VT: SplatEltTy, NumElements: SplatLen);
3762
3763 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3764 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3765 if (SplatEltTy == MVT::i128)
3766 SplatTy = MVT::v4i64;
3767
3768 SDValue SplatVec;
3769 SDValue SrcVec = DAG.getBitcast(
3770 VT: SplatTy,
3771 V: Is256Vec ? DAG.getInsertSubvector(DL, Vec: Vector, SubVec: FillVec, Idx: 0) : FillVec);
3772 if (Is256Vec) {
3773 SplatVec =
3774 DAG.getNode(Opcode: (SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3775 : LoongArchISD::XVREPLVE0,
3776 DL, VT: SplatTy, Operand: SrcVec);
3777 } else {
3778 SplatVec = DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT: SplatTy, N1: SrcVec,
3779 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()));
3780 }
3781
3782 return DAG.getBitcast(VT: ResTy, V: SplatVec);
3783 }
3784
3785 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3786 // using memory operations is much lower.
3787 //
3788 // For 256-bit vectors, normally split into two halves and concatenate.
3789 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3790 // one non-undef element, skip spliting to avoid a worse result.
3791 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3792 ResTy == MVT::v4f64) {
3793 unsigned NonUndefCount = 0;
3794 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3795 if (!Node->getOperand(Num: i).isUndef()) {
3796 ++NonUndefCount;
3797 if (NonUndefCount > 1)
3798 break;
3799 }
3800 }
3801 if (NonUndefCount == 1)
3802 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, first: 0);
3803 }
3804
3805 EVT VecTy =
3806 Is256Vec ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext()) : ResTy;
3807 SDValue Vector =
3808 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy: VecTy, first: 0);
3809
3810 if (Is128Vec)
3811 return Vector;
3812
3813 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3814 ResTy: VecTy, first: NumElts / 2);
3815
3816 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResTy, N1: Vector, N2: VectorHi);
3817 }
3818
3819 return SDValue();
3820}
3821
3822SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3823 SelectionDAG &DAG) const {
3824 SDLoc DL(Op);
3825 MVT ResVT = Op.getSimpleValueType();
3826 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3827
3828 if (Op.getOperand(i: 0).getOpcode() == ISD::TRUNCATE &&
3829 Op.getOperand(i: 1).getOpcode() == ISD::TRUNCATE)
3830 return Op;
3831
3832 unsigned NumOperands = Op.getNumOperands();
3833 unsigned NumFreezeUndef = 0;
3834 unsigned NumZero = 0;
3835 unsigned NumNonZero = 0;
3836 unsigned NonZeros = 0;
3837 SmallSet<SDValue, 4> Undefs;
3838 for (unsigned i = 0; i != NumOperands; ++i) {
3839 SDValue SubVec = Op.getOperand(i);
3840 if (SubVec.isUndef())
3841 continue;
3842 if (ISD::isFreezeUndef(N: SubVec.getNode())) {
3843 // If the freeze(undef) has multiple uses then we must fold to zero.
3844 if (SubVec.hasOneUse()) {
3845 ++NumFreezeUndef;
3846 } else {
3847 ++NumZero;
3848 Undefs.insert(V: SubVec);
3849 }
3850 } else if (ISD::isBuildVectorAllZeros(N: SubVec.getNode()))
3851 ++NumZero;
3852 else {
3853 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3854 NonZeros |= 1 << i;
3855 ++NumNonZero;
3856 }
3857 }
3858
3859 // If we have more than 2 non-zeros, build each half separately.
3860 if (NumNonZero > 2) {
3861 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3862 ArrayRef<SDUse> Ops = Op->ops();
3863 SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3864 Ops: Ops.slice(N: 0, M: NumOperands / 2));
3865 SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3866 Ops: Ops.slice(N: NumOperands / 2));
3867 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResVT, N1: Lo, N2: Hi);
3868 }
3869
3870 // Otherwise, build it up through insert_subvectors.
3871 SDValue Vec = NumZero ? DAG.getConstant(Val: 0, DL, VT: ResVT)
3872 : (NumFreezeUndef ? DAG.getFreeze(V: DAG.getUNDEF(VT: ResVT))
3873 : DAG.getUNDEF(VT: ResVT));
3874
3875 // Replace Undef operands with ZeroVector.
3876 for (SDValue U : Undefs)
3877 DAG.ReplaceAllUsesWith(From: U, To: DAG.getConstant(Val: 0, DL, VT: U.getSimpleValueType()));
3878
3879 MVT SubVT = Op.getOperand(i: 0).getSimpleValueType();
3880 unsigned NumSubElems = SubVT.getVectorNumElements();
3881 for (unsigned i = 0; i != NumOperands; ++i) {
3882 if ((NonZeros & (1 << i)) == 0)
3883 continue;
3884
3885 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ResVT, N1: Vec, N2: Op.getOperand(i),
3886 N3: DAG.getVectorIdxConstant(Val: i * NumSubElems, DL));
3887 }
3888
3889 return Vec;
3890}
3891
3892SDValue
3893LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3894 SelectionDAG &DAG) const {
3895 MVT EltVT = Op.getSimpleValueType();
3896 SDValue Vec = Op->getOperand(Num: 0);
3897 EVT VecTy = Vec->getValueType(ResNo: 0);
3898 SDValue Idx = Op->getOperand(Num: 1);
3899 SDLoc DL(Op);
3900 MVT GRLenVT = Subtarget.getGRLenVT();
3901
3902 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3903
3904 if (isa<ConstantSDNode>(Val: Idx))
3905 return Op;
3906
3907 switch (VecTy.getSimpleVT().SimpleTy) {
3908 default:
3909 llvm_unreachable("Unexpected type");
3910 case MVT::v32i8:
3911 case MVT::v16i16:
3912 case MVT::v4i64:
3913 case MVT::v4f64: {
3914 // Extract the high half subvector and place it to the low half of a new
3915 // vector. It doesn't matter what the high half of the new vector is.
3916 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
3917 SDValue VecHi =
3918 DAG.getExtractSubvector(DL, VT: HalfTy, Vec, Idx: HalfTy.getVectorNumElements());
3919 SDValue TmpVec =
3920 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecTy, N1: DAG.getUNDEF(VT: VecTy),
3921 N2: VecHi, N3: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3922
3923 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3924 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3925 // desired element.
3926 SDValue IdxCp =
3927 Subtarget.is64Bit()
3928 ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Idx)
3929 : DAG.getBitcast(VT: MVT::f32, V: Idx);
3930 SDValue IdxVec = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v8f32, Operand: IdxCp);
3931 SDValue MaskVec =
3932 DAG.getBitcast(VT: (VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, V: IdxVec);
3933 SDValue ResVec =
3934 DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT: VecTy, N1: MaskVec, N2: TmpVec, N3: Vec);
3935
3936 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: ResVec,
3937 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3938 }
3939 case MVT::v8i32:
3940 case MVT::v8f32: {
3941 SDValue SplatIdx = DAG.getSplatBuildVector(VT: MVT::v8i32, DL, Op: Idx);
3942 SDValue SplatValue =
3943 DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT: VecTy, N1: Vec, N2: SplatIdx);
3944
3945 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: SplatValue,
3946 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3947 }
3948 }
3949}
3950
3951SDValue
3952LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3953 SelectionDAG &DAG) const {
3954 MVT VT = Op.getSimpleValueType();
3955 MVT EltVT = VT.getVectorElementType();
3956 unsigned NumElts = VT.getVectorNumElements();
3957 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3958 SDLoc DL(Op);
3959 SDValue Op0 = Op.getOperand(i: 0);
3960 SDValue Op1 = Op.getOperand(i: 1);
3961 SDValue Op2 = Op.getOperand(i: 2);
3962
3963 if (isa<ConstantSDNode>(Val: Op2))
3964 return Op;
3965
3966 MVT IdxTy = MVT::getIntegerVT(BitWidth: EltSizeInBits);
3967 MVT IdxVTy = MVT::getVectorVT(VT: IdxTy, NumElements: NumElts);
3968
3969 if (!isTypeLegal(VT) || !isTypeLegal(VT: IdxVTy))
3970 return SDValue();
3971
3972 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op: Op1);
3973 SmallVector<SDValue, 32> RawIndices;
3974 SDValue SplatIdx;
3975 SDValue Indices;
3976
3977 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3978 MVT PairVTy = MVT::getVectorVT(VT: MVT::i32, NumElements: NumElts * 2);
3979 for (unsigned i = 0; i < NumElts; ++i) {
3980 RawIndices.push_back(Elt: Op2);
3981 RawIndices.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
3982 }
3983 SplatIdx = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3984 SplatIdx = DAG.getBitcast(VT: IdxVTy, V: SplatIdx);
3985
3986 RawIndices.clear();
3987 for (unsigned i = 0; i < NumElts; ++i) {
3988 RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: MVT::i32));
3989 RawIndices.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
3990 }
3991 Indices = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3992 Indices = DAG.getBitcast(VT: IdxVTy, V: Indices);
3993 } else {
3994 SplatIdx = DAG.getSplatBuildVector(VT: IdxVTy, DL, Op: Op2);
3995
3996 for (unsigned i = 0; i < NumElts; ++i)
3997 RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3998 Indices = DAG.getBuildVector(VT: IdxVTy, DL, Ops: RawIndices);
3999 }
4000
4001 // insert vec, elt, idx
4002 // =>
4003 // select (splatidx == {0,1,2...}) ? splatelt : vec
4004 SDValue SelectCC =
4005 DAG.getSetCC(DL, VT: IdxVTy, LHS: SplatIdx, RHS: Indices, Cond: ISD::CondCode::SETEQ);
4006 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectCC, N2: SplatElt, N3: Op0);
4007}
4008
4009SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4010 SelectionDAG &DAG) const {
4011 SDLoc DL(Op);
4012 SyncScope::ID FenceSSID =
4013 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
4014
4015 // singlethread fences only synchronize with signal handlers on the same
4016 // thread and thus only need to preserve instruction order, not actually
4017 // enforce memory ordering.
4018 if (FenceSSID == SyncScope::SingleThread)
4019 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4020 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
4021
4022 return Op;
4023}
4024
4025SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
4026 SelectionDAG &DAG) const {
4027
4028 if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) {
4029 DAG.getContext()->emitError(
4030 ErrorStr: "On LA64, only 64-bit registers can be written.");
4031 return Op.getOperand(i: 0);
4032 }
4033
4034 if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) {
4035 DAG.getContext()->emitError(
4036 ErrorStr: "On LA32, only 32-bit registers can be written.");
4037 return Op.getOperand(i: 0);
4038 }
4039
4040 return Op;
4041}
4042
4043SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
4044 SelectionDAG &DAG) const {
4045 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
4046 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
4047 "be a constant integer");
4048 return SDValue();
4049 }
4050
4051 MachineFunction &MF = DAG.getMachineFunction();
4052 MF.getFrameInfo().setFrameAddressIsTaken(true);
4053 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
4054 EVT VT = Op.getValueType();
4055 SDLoc DL(Op);
4056 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
4057 unsigned Depth = Op.getConstantOperandVal(i: 0);
4058 int GRLenInBytes = Subtarget.getGRLen() / 8;
4059
4060 while (Depth--) {
4061 int Offset = -(GRLenInBytes * 2);
4062 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
4063 N2: DAG.getSignedConstant(Val: Offset, DL, VT));
4064 FrameAddr =
4065 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
4066 }
4067 return FrameAddr;
4068}
4069
4070SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4071 SelectionDAG &DAG) const {
4072 // Currently only support lowering return address for current frame.
4073 if (Op.getConstantOperandVal(i: 0) != 0) {
4074 DAG.getContext()->emitError(
4075 ErrorStr: "return address can only be determined for the current frame");
4076 return SDValue();
4077 }
4078
4079 MachineFunction &MF = DAG.getMachineFunction();
4080 MF.getFrameInfo().setReturnAddressIsTaken(true);
4081 MVT GRLenVT = Subtarget.getGRLenVT();
4082
4083 // Return the value of the return address register, marking it an implicit
4084 // live-in.
4085 Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
4086 RC: getRegClassFor(VT: GRLenVT));
4087 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT);
4088}
4089
4090SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4091 SelectionDAG &DAG) const {
4092 MachineFunction &MF = DAG.getMachineFunction();
4093 auto Size = Subtarget.getGRLen() / 8;
4094 auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false);
4095 return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
4096}
4097
4098SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4099 SelectionDAG &DAG) const {
4100 MachineFunction &MF = DAG.getMachineFunction();
4101 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4102
4103 SDLoc DL(Op);
4104 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
4105 VT: getPointerTy(DL: MF.getDataLayout()));
4106
4107 // vastart just stores the address of the VarArgsFrameIndex slot into the
4108 // memory location argument.
4109 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
4110 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
4111 PtrInfo: MachinePointerInfo(SV));
4112}
4113
4114SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4115 SelectionDAG &DAG) const {
4116 SDLoc DL(Op);
4117 SDValue Op0 = Op.getOperand(i: 0);
4118 EVT VT = Op.getValueType();
4119 EVT Op0VT = Op0.getValueType();
4120
4121 if ((DAG.SignBitIsZero(Op: Op0) || Op->getFlags().hasNonNeg()) &&
4122 !isOperationLegal(Op: ISD::UINT_TO_FP, VT: Op0VT) &&
4123 isOperationLegal(Op: ISD::SINT_TO_FP, VT: Op0VT))
4124 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT, Operand: Op0);
4125
4126 if (Subtarget.hasExtLSX() && Op0VT == MVT::i64 &&
4127 (VT == MVT::f32 || VT == MVT::f64)) {
4128 Op0 = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v2i64, Operand: Op0);
4129 SDValue Conv = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: MVT::v2f64, Operand: Op0);
4130 Conv = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f64, N1: Conv,
4131 N2: DAG.getIntPtrConstant(Val: 0, DL));
4132 if (VT == MVT::f32)
4133 Conv = DAG.getFPExtendOrRound(Op: Conv, DL, VT);
4134 return Conv;
4135 }
4136
4137 if (!Subtarget.is64Bit() || !Subtarget.hasBasicF() || Subtarget.hasBasicD())
4138 return SDValue();
4139
4140 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4141 !Subtarget.hasBasicD() && "unexpected target features");
4142
4143 if (Op0->getOpcode() == ISD::AND) {
4144 auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
4145 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4146 return Op;
4147 }
4148
4149 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4150 Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) &&
4151 Op0.getConstantOperandVal(i: 2) == UINT64_C(0))
4152 return Op;
4153
4154 if (Op0.getOpcode() == ISD::AssertZext &&
4155 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32))
4156 return Op;
4157
4158 EVT OpVT = Op0.getValueType();
4159 EVT RetVT = Op.getValueType();
4160 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4161 MakeLibCallOptions CallOptions;
4162 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
4163 SDValue Chain = SDValue();
4164 SDValue Result;
4165 std::tie(args&: Result, args&: Chain) =
4166 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
4167 return Result;
4168}
4169
4170SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4171 SelectionDAG &DAG) const {
4172 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4173 !Subtarget.hasBasicD() && "unexpected target features");
4174
4175 SDLoc DL(Op);
4176 SDValue Op0 = Op.getOperand(i: 0);
4177
4178 if ((Op0.getOpcode() == ISD::AssertSext ||
4179 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
4180 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32))
4181 return Op;
4182
4183 EVT OpVT = Op0.getValueType();
4184 EVT RetVT = Op.getValueType();
4185 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4186 MakeLibCallOptions CallOptions;
4187 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
4188 SDValue Chain = SDValue();
4189 SDValue Result;
4190 std::tie(args&: Result, args&: Chain) =
4191 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
4192 return Result;
4193}
4194
4195SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4196 SelectionDAG &DAG) const {
4197
4198 SDLoc DL(Op);
4199 EVT VT = Op.getValueType();
4200 SDValue Op0 = Op.getOperand(i: 0);
4201 EVT Op0VT = Op0.getValueType();
4202
4203 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4204 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4205 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
4206 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
4207 }
4208 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4209 SDValue Lo, Hi;
4210 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
4211 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
4212 }
4213 return Op;
4214}
4215
4216SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4217 SelectionDAG &DAG) const {
4218
4219 SDLoc DL(Op);
4220 SDValue Op0 = Op.getOperand(i: 0);
4221
4222 if (Op0.getValueType() == MVT::f16)
4223 Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
4224
4225 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4226 !Subtarget.hasBasicD()) {
4227 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0);
4228 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
4229 }
4230
4231 EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
4232 SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0);
4233 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
4234}
4235
4236SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op,
4237 SelectionDAG &DAG) const {
4238 if (!Subtarget.hasExtLSX())
4239 return SDValue();
4240
4241 SDLoc DL(Op);
4242 SDValue Src = Op.getOperand(i: 0);
4243 EVT VT = Op.getValueType();
4244 EVT SrcVT = Src.getValueType();
4245
4246 if (VT != MVT::i64)
4247 return SDValue();
4248
4249 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
4250 return SDValue();
4251
4252 if (SrcVT == MVT::f32)
4253 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f64, Operand: Src);
4254 Src = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v2f64, Operand: Src);
4255 SDValue Conv = DAG.getNode(Opcode: ISD::FP_TO_UINT, DL, VT: MVT::v2i64, Operand: Src);
4256 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: Conv,
4257 N2: DAG.getIntPtrConstant(Val: 0, DL));
4258}
4259
4260static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
4261 SelectionDAG &DAG, unsigned Flags) {
4262 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
4263}
4264
4265static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
4266 SelectionDAG &DAG, unsigned Flags) {
4267 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
4268 TargetFlags: Flags);
4269}
4270
4271static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
4272 SelectionDAG &DAG, unsigned Flags) {
4273 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
4274 Offset: N->getOffset(), TargetFlags: Flags);
4275}
4276
4277static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
4278 SelectionDAG &DAG, unsigned Flags) {
4279 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
4280}
4281
4282template <class NodeTy>
4283SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4284 CodeModel::Model M,
4285 bool IsLocal) const {
4286 SDLoc DL(N);
4287 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
4288 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4289 SDValue Load;
4290
4291 switch (M) {
4292 default:
4293 report_fatal_error(reason: "Unsupported code model");
4294
4295 case CodeModel::Large: {
4296 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4297
4298 // This is not actually used, but is necessary for successfully matching
4299 // the PseudoLA_*_LARGE nodes.
4300 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
4301 if (IsLocal) {
4302 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4303 // eventually becomes the desired 5-insn code sequence.
4304 Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
4305 Op1: Tmp, Op2: Addr),
4306 0);
4307 } else {
4308 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4309 // eventually becomes the desired 5-insn code sequence.
4310 Load = SDValue(
4311 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
4312 0);
4313 }
4314 break;
4315 }
4316
4317 case CodeModel::Small:
4318 case CodeModel::Medium:
4319 if (IsLocal) {
4320 // This generates the pattern (PseudoLA_PCREL sym), which
4321 //
4322 // for la32r expands to:
4323 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4324 //
4325 // for la32s and la64 expands to:
4326 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4327 Load = SDValue(
4328 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0);
4329 } else {
4330 // This generates the pattern (PseudoLA_GOT sym), which
4331 //
4332 // for la32r expands to:
4333 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4334 //
4335 // for la32s and la64 expands to:
4336 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4337 Load =
4338 SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0);
4339 }
4340 }
4341
4342 if (!IsLocal) {
4343 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4344 MachineFunction &MF = DAG.getMachineFunction();
4345 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4346 PtrInfo: MachinePointerInfo::getGOT(MF),
4347 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
4348 MachineMemOperand::MOInvariant,
4349 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
4350 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
4351 }
4352
4353 return Load;
4354}
4355
4356SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4357 SelectionDAG &DAG) const {
4358 return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
4359 M: DAG.getTarget().getCodeModel());
4360}
4361
4362SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4363 SelectionDAG &DAG) const {
4364 return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
4365 M: DAG.getTarget().getCodeModel());
4366}
4367
4368SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4369 SelectionDAG &DAG) const {
4370 return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
4371 M: DAG.getTarget().getCodeModel());
4372}
4373
4374SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4375 SelectionDAG &DAG) const {
4376 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
4377 assert(N->getOffset() == 0 && "unexpected offset in global node");
4378 auto CM = DAG.getTarget().getCodeModel();
4379 const GlobalValue *GV = N->getGlobal();
4380
4381 if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
4382 if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
4383 CM = *GCM;
4384 }
4385
4386 return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
4387}
4388
4389SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4390 SelectionDAG &DAG,
4391 unsigned Opc, bool UseGOT,
4392 bool Large) const {
4393 SDLoc DL(N);
4394 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
4395 MVT GRLenVT = Subtarget.getGRLenVT();
4396
4397 // This is not actually used, but is necessary for successfully matching the
4398 // PseudoLA_*_LARGE nodes.
4399 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
4400 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
4401
4402 // Only IE needs an extra argument for large code model.
4403 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4404 ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
4405 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
4406
4407 // If it is LE for normal/medium code model, the add tp operation will occur
4408 // during the pseudo-instruction expansion.
4409 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4410 return Offset;
4411
4412 if (UseGOT) {
4413 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4414 MachineFunction &MF = DAG.getMachineFunction();
4415 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4416 PtrInfo: MachinePointerInfo::getGOT(MF),
4417 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
4418 MachineMemOperand::MOInvariant,
4419 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
4420 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
4421 }
4422
4423 // Add the thread pointer.
4424 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
4425 N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
4426}
4427
4428SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4429 SelectionDAG &DAG,
4430 unsigned Opc,
4431 bool Large) const {
4432 SDLoc DL(N);
4433 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
4434 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
4435
4436 // This is not actually used, but is necessary for successfully matching the
4437 // PseudoLA_*_LARGE nodes.
4438 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
4439
4440 // Use a PC-relative addressing mode to access the dynamic GOT address.
4441 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
4442 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
4443 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
4444
4445 // Prepare argument list to generate call.
4446 ArgListTy Args;
4447 Args.emplace_back(args&: Load, args&: CallTy);
4448
4449 // Setup call to __tls_get_addr.
4450 TargetLowering::CallLoweringInfo CLI(DAG);
4451 CLI.setDebugLoc(DL)
4452 .setChain(DAG.getEntryNode())
4453 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
4454 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
4455 ArgsList: std::move(Args));
4456
4457 return LowerCallTo(CLI).first;
4458}
4459
4460SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4461 SelectionDAG &DAG, unsigned Opc,
4462 bool Large) const {
4463 SDLoc DL(N);
4464 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
4465 const GlobalValue *GV = N->getGlobal();
4466
4467 // This is not actually used, but is necessary for successfully matching the
4468 // PseudoLA_*_LARGE nodes.
4469 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
4470
4471 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4472 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4473 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
4474 return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
4475 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
4476}
4477
4478SDValue
4479LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4480 SelectionDAG &DAG) const {
4481 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
4482 CallingConv::GHC)
4483 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
4484
4485 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4486 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4487
4488 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
4489 assert(N->getOffset() == 0 && "unexpected offset in global node");
4490
4491 if (DAG.getTarget().useEmulatedTLS())
4492 reportFatalUsageError(reason: "the emulated TLS is prohibited");
4493
4494 bool IsDesc = DAG.getTarget().useTLSDESC();
4495
4496 switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
4497 case TLSModel::GeneralDynamic:
4498 // In this model, application code calls the dynamic linker function
4499 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4500 // runtime.
4501 if (!IsDesc)
4502 return getDynamicTLSAddr(N, DAG,
4503 Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4504 : LoongArch::PseudoLA_TLS_GD,
4505 Large);
4506 break;
4507 case TLSModel::LocalDynamic:
4508 // Same as GeneralDynamic, except for assembly modifiers and relocation
4509 // records.
4510 if (!IsDesc)
4511 return getDynamicTLSAddr(N, DAG,
4512 Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4513 : LoongArch::PseudoLA_TLS_LD,
4514 Large);
4515 break;
4516 case TLSModel::InitialExec:
4517 // This model uses the GOT to resolve TLS offsets.
4518 return getStaticTLSAddr(N, DAG,
4519 Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4520 : LoongArch::PseudoLA_TLS_IE,
4521 /*UseGOT=*/true, Large);
4522 case TLSModel::LocalExec:
4523 // This model is used when static linking as the TLS offsets are resolved
4524 // during program linking.
4525 //
4526 // This node doesn't need an extra argument for the large code model.
4527 return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
4528 /*UseGOT=*/false, Large);
4529 }
4530
4531 return getTLSDescAddr(N, DAG,
4532 Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4533 : LoongArch::PseudoLA_TLS_DESC,
4534 Large);
4535}
4536
4537template <unsigned N>
4538static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
4539 SelectionDAG &DAG, bool IsSigned = false) {
4540 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
4541 // Check the ImmArg.
4542 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4543 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4544 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) +
4545 ": argument out of range.");
4546 return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType());
4547 }
4548 return SDValue();
4549}
4550
4551SDValue
4552LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4553 SelectionDAG &DAG) const {
4554 switch (Op.getConstantOperandVal(i: 0)) {
4555 default:
4556 return SDValue(); // Don't custom lower most intrinsics.
4557 case Intrinsic::thread_pointer: {
4558 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4559 return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
4560 }
4561 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4562 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4563 case Intrinsic::loongarch_lsx_vreplvei_d:
4564 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4565 return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG);
4566 case Intrinsic::loongarch_lsx_vreplvei_w:
4567 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4568 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4569 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4570 case Intrinsic::loongarch_lasx_xvpickve_d:
4571 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4572 return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG);
4573 case Intrinsic::loongarch_lasx_xvinsve0_d:
4574 return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG);
4575 case Intrinsic::loongarch_lsx_vsat_b:
4576 case Intrinsic::loongarch_lsx_vsat_bu:
4577 case Intrinsic::loongarch_lsx_vrotri_b:
4578 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4579 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4580 case Intrinsic::loongarch_lsx_vsrlri_b:
4581 case Intrinsic::loongarch_lsx_vsrari_b:
4582 case Intrinsic::loongarch_lsx_vreplvei_h:
4583 case Intrinsic::loongarch_lasx_xvsat_b:
4584 case Intrinsic::loongarch_lasx_xvsat_bu:
4585 case Intrinsic::loongarch_lasx_xvrotri_b:
4586 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4587 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4588 case Intrinsic::loongarch_lasx_xvsrlri_b:
4589 case Intrinsic::loongarch_lasx_xvsrari_b:
4590 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4591 case Intrinsic::loongarch_lasx_xvpickve_w:
4592 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4593 return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG);
4594 case Intrinsic::loongarch_lasx_xvinsve0_w:
4595 return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG);
4596 case Intrinsic::loongarch_lsx_vsat_h:
4597 case Intrinsic::loongarch_lsx_vsat_hu:
4598 case Intrinsic::loongarch_lsx_vrotri_h:
4599 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4600 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4601 case Intrinsic::loongarch_lsx_vsrlri_h:
4602 case Intrinsic::loongarch_lsx_vsrari_h:
4603 case Intrinsic::loongarch_lsx_vreplvei_b:
4604 case Intrinsic::loongarch_lasx_xvsat_h:
4605 case Intrinsic::loongarch_lasx_xvsat_hu:
4606 case Intrinsic::loongarch_lasx_xvrotri_h:
4607 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4608 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4609 case Intrinsic::loongarch_lasx_xvsrlri_h:
4610 case Intrinsic::loongarch_lasx_xvsrari_h:
4611 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4612 return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG);
4613 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4614 case Intrinsic::loongarch_lsx_vsrani_b_h:
4615 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4616 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4617 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4618 case Intrinsic::loongarch_lsx_vssrani_b_h:
4619 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4620 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4621 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4622 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4623 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4624 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4625 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4626 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4627 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4628 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4629 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4630 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4631 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4632 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4633 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4634 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4635 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4636 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4637 return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG);
4638 case Intrinsic::loongarch_lsx_vsat_w:
4639 case Intrinsic::loongarch_lsx_vsat_wu:
4640 case Intrinsic::loongarch_lsx_vrotri_w:
4641 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4642 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4643 case Intrinsic::loongarch_lsx_vsrlri_w:
4644 case Intrinsic::loongarch_lsx_vsrari_w:
4645 case Intrinsic::loongarch_lsx_vslei_bu:
4646 case Intrinsic::loongarch_lsx_vslei_hu:
4647 case Intrinsic::loongarch_lsx_vslei_wu:
4648 case Intrinsic::loongarch_lsx_vslei_du:
4649 case Intrinsic::loongarch_lsx_vslti_bu:
4650 case Intrinsic::loongarch_lsx_vslti_hu:
4651 case Intrinsic::loongarch_lsx_vslti_wu:
4652 case Intrinsic::loongarch_lsx_vslti_du:
4653 case Intrinsic::loongarch_lsx_vbsll_v:
4654 case Intrinsic::loongarch_lsx_vbsrl_v:
4655 case Intrinsic::loongarch_lasx_xvsat_w:
4656 case Intrinsic::loongarch_lasx_xvsat_wu:
4657 case Intrinsic::loongarch_lasx_xvrotri_w:
4658 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4659 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4660 case Intrinsic::loongarch_lasx_xvsrlri_w:
4661 case Intrinsic::loongarch_lasx_xvsrari_w:
4662 case Intrinsic::loongarch_lasx_xvslei_bu:
4663 case Intrinsic::loongarch_lasx_xvslei_hu:
4664 case Intrinsic::loongarch_lasx_xvslei_wu:
4665 case Intrinsic::loongarch_lasx_xvslei_du:
4666 case Intrinsic::loongarch_lasx_xvslti_bu:
4667 case Intrinsic::loongarch_lasx_xvslti_hu:
4668 case Intrinsic::loongarch_lasx_xvslti_wu:
4669 case Intrinsic::loongarch_lasx_xvslti_du:
4670 case Intrinsic::loongarch_lasx_xvbsll_v:
4671 case Intrinsic::loongarch_lasx_xvbsrl_v:
4672 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG);
4673 case Intrinsic::loongarch_lsx_vseqi_b:
4674 case Intrinsic::loongarch_lsx_vseqi_h:
4675 case Intrinsic::loongarch_lsx_vseqi_w:
4676 case Intrinsic::loongarch_lsx_vseqi_d:
4677 case Intrinsic::loongarch_lsx_vslei_b:
4678 case Intrinsic::loongarch_lsx_vslei_h:
4679 case Intrinsic::loongarch_lsx_vslei_w:
4680 case Intrinsic::loongarch_lsx_vslei_d:
4681 case Intrinsic::loongarch_lsx_vslti_b:
4682 case Intrinsic::loongarch_lsx_vslti_h:
4683 case Intrinsic::loongarch_lsx_vslti_w:
4684 case Intrinsic::loongarch_lsx_vslti_d:
4685 case Intrinsic::loongarch_lasx_xvseqi_b:
4686 case Intrinsic::loongarch_lasx_xvseqi_h:
4687 case Intrinsic::loongarch_lasx_xvseqi_w:
4688 case Intrinsic::loongarch_lasx_xvseqi_d:
4689 case Intrinsic::loongarch_lasx_xvslei_b:
4690 case Intrinsic::loongarch_lasx_xvslei_h:
4691 case Intrinsic::loongarch_lasx_xvslei_w:
4692 case Intrinsic::loongarch_lasx_xvslei_d:
4693 case Intrinsic::loongarch_lasx_xvslti_b:
4694 case Intrinsic::loongarch_lasx_xvslti_h:
4695 case Intrinsic::loongarch_lasx_xvslti_w:
4696 case Intrinsic::loongarch_lasx_xvslti_d:
4697 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true);
4698 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4699 case Intrinsic::loongarch_lsx_vsrani_h_w:
4700 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4701 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4702 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4703 case Intrinsic::loongarch_lsx_vssrani_h_w:
4704 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4705 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4706 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4707 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4708 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4709 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4710 case Intrinsic::loongarch_lsx_vfrstpi_b:
4711 case Intrinsic::loongarch_lsx_vfrstpi_h:
4712 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4713 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4714 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4715 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4716 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4717 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4718 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4719 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4720 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4721 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4722 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4723 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4724 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4725 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4726 return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG);
4727 case Intrinsic::loongarch_lsx_vsat_d:
4728 case Intrinsic::loongarch_lsx_vsat_du:
4729 case Intrinsic::loongarch_lsx_vrotri_d:
4730 case Intrinsic::loongarch_lsx_vsrlri_d:
4731 case Intrinsic::loongarch_lsx_vsrari_d:
4732 case Intrinsic::loongarch_lasx_xvsat_d:
4733 case Intrinsic::loongarch_lasx_xvsat_du:
4734 case Intrinsic::loongarch_lasx_xvrotri_d:
4735 case Intrinsic::loongarch_lasx_xvsrlri_d:
4736 case Intrinsic::loongarch_lasx_xvsrari_d:
4737 return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG);
4738 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4739 case Intrinsic::loongarch_lsx_vsrani_w_d:
4740 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4741 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4742 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4743 case Intrinsic::loongarch_lsx_vssrani_w_d:
4744 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4745 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4746 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4747 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4748 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4749 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4750 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4751 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4752 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4753 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4754 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4755 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4756 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4757 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4758 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4759 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4760 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4761 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4762 return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG);
4763 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4764 case Intrinsic::loongarch_lsx_vsrani_d_q:
4765 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4766 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4767 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4768 case Intrinsic::loongarch_lsx_vssrani_d_q:
4769 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4770 case Intrinsic::loongarch_lsx_vssrani_du_q:
4771 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4772 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4773 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4774 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4775 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4776 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4777 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4778 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4779 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4780 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4781 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4782 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4783 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4784 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4785 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4786 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4787 return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG);
4788 case Intrinsic::loongarch_lsx_vnori_b:
4789 case Intrinsic::loongarch_lsx_vshuf4i_b:
4790 case Intrinsic::loongarch_lsx_vshuf4i_h:
4791 case Intrinsic::loongarch_lsx_vshuf4i_w:
4792 case Intrinsic::loongarch_lasx_xvnori_b:
4793 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4794 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4795 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4796 case Intrinsic::loongarch_lasx_xvpermi_d:
4797 return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG);
4798 case Intrinsic::loongarch_lsx_vshuf4i_d:
4799 case Intrinsic::loongarch_lsx_vpermi_w:
4800 case Intrinsic::loongarch_lsx_vbitseli_b:
4801 case Intrinsic::loongarch_lsx_vextrins_b:
4802 case Intrinsic::loongarch_lsx_vextrins_h:
4803 case Intrinsic::loongarch_lsx_vextrins_w:
4804 case Intrinsic::loongarch_lsx_vextrins_d:
4805 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4806 case Intrinsic::loongarch_lasx_xvpermi_w:
4807 case Intrinsic::loongarch_lasx_xvpermi_q:
4808 case Intrinsic::loongarch_lasx_xvbitseli_b:
4809 case Intrinsic::loongarch_lasx_xvextrins_b:
4810 case Intrinsic::loongarch_lasx_xvextrins_h:
4811 case Intrinsic::loongarch_lasx_xvextrins_w:
4812 case Intrinsic::loongarch_lasx_xvextrins_d:
4813 return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG);
4814 case Intrinsic::loongarch_lsx_vrepli_b:
4815 case Intrinsic::loongarch_lsx_vrepli_h:
4816 case Intrinsic::loongarch_lsx_vrepli_w:
4817 case Intrinsic::loongarch_lsx_vrepli_d:
4818 case Intrinsic::loongarch_lasx_xvrepli_b:
4819 case Intrinsic::loongarch_lasx_xvrepli_h:
4820 case Intrinsic::loongarch_lasx_xvrepli_w:
4821 case Intrinsic::loongarch_lasx_xvrepli_d:
4822 return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
4823 case Intrinsic::loongarch_lsx_vldi:
4824 case Intrinsic::loongarch_lasx_xvldi:
4825 return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
4826 }
4827}
4828
4829// Helper function that emits error message for intrinsics with chain and return
4830// merge values of a UNDEF and the chain.
4831static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
4832 StringRef ErrorMsg,
4833 SelectionDAG &DAG) {
4834 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4835 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)},
4836 dl: SDLoc(Op));
4837}
4838
4839SDValue
4840LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4841 SelectionDAG &DAG) const {
4842 SDLoc DL(Op);
4843 MVT GRLenVT = Subtarget.getGRLenVT();
4844 EVT VT = Op.getValueType();
4845 SDValue Chain = Op.getOperand(i: 0);
4846 const StringRef ErrorMsgOOR = "argument out of range";
4847 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4848 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4849
4850 switch (Op.getConstantOperandVal(i: 1)) {
4851 default:
4852 return Op;
4853 case Intrinsic::loongarch_crc_w_b_w:
4854 case Intrinsic::loongarch_crc_w_h_w:
4855 case Intrinsic::loongarch_crc_w_w_w:
4856 case Intrinsic::loongarch_crc_w_d_w:
4857 case Intrinsic::loongarch_crcc_w_b_w:
4858 case Intrinsic::loongarch_crcc_w_h_w:
4859 case Intrinsic::loongarch_crcc_w_w_w:
4860 case Intrinsic::loongarch_crcc_w_d_w:
4861 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4862 case Intrinsic::loongarch_csrrd_w:
4863 case Intrinsic::loongarch_csrrd_d: {
4864 unsigned Imm = Op.getConstantOperandVal(i: 2);
4865 return !isUInt<14>(x: Imm)
4866 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4867 : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
4868 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4869 }
4870 case Intrinsic::loongarch_csrwr_w:
4871 case Intrinsic::loongarch_csrwr_d: {
4872 unsigned Imm = Op.getConstantOperandVal(i: 3);
4873 return !isUInt<14>(x: Imm)
4874 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4875 : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
4876 Ops: {Chain, Op.getOperand(i: 2),
4877 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4878 }
4879 case Intrinsic::loongarch_csrxchg_w:
4880 case Intrinsic::loongarch_csrxchg_d: {
4881 unsigned Imm = Op.getConstantOperandVal(i: 4);
4882 return !isUInt<14>(x: Imm)
4883 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4884 : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
4885 Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3),
4886 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4887 }
4888 case Intrinsic::loongarch_iocsrrd_d: {
4889 return DAG.getNode(
4890 Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
4891 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))});
4892 }
4893#define IOCSRRD_CASE(NAME, NODE) \
4894 case Intrinsic::loongarch_##NAME: { \
4895 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4896 {Chain, Op.getOperand(2)}); \
4897 }
4898 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4899 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4900 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4901#undef IOCSRRD_CASE
4902 case Intrinsic::loongarch_cpucfg: {
4903 return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
4904 Ops: {Chain, Op.getOperand(i: 2)});
4905 }
4906 case Intrinsic::loongarch_lddir_d: {
4907 unsigned Imm = Op.getConstantOperandVal(i: 3);
4908 return !isUInt<8>(x: Imm)
4909 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4910 : Op;
4911 }
4912 case Intrinsic::loongarch_movfcsr2gr: {
4913 if (!Subtarget.hasBasicF())
4914 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4915 unsigned Imm = Op.getConstantOperandVal(i: 2);
4916 return !isUInt<2>(x: Imm)
4917 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4918 : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
4919 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4920 }
4921 case Intrinsic::loongarch_lsx_vld:
4922 case Intrinsic::loongarch_lsx_vldrepl_b:
4923 case Intrinsic::loongarch_lasx_xvld:
4924 case Intrinsic::loongarch_lasx_xvldrepl_b:
4925 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4926 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4927 : SDValue();
4928 case Intrinsic::loongarch_lsx_vldrepl_h:
4929 case Intrinsic::loongarch_lasx_xvldrepl_h:
4930 return !isShiftedInt<11, 1>(
4931 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4932 ? emitIntrinsicWithChainErrorMessage(
4933 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4934 : SDValue();
4935 case Intrinsic::loongarch_lsx_vldrepl_w:
4936 case Intrinsic::loongarch_lasx_xvldrepl_w:
4937 return !isShiftedInt<10, 2>(
4938 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4939 ? emitIntrinsicWithChainErrorMessage(
4940 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4941 : SDValue();
4942 case Intrinsic::loongarch_lsx_vldrepl_d:
4943 case Intrinsic::loongarch_lasx_xvldrepl_d:
4944 return !isShiftedInt<9, 3>(
4945 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4946 ? emitIntrinsicWithChainErrorMessage(
4947 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4948 : SDValue();
4949 }
4950}
4951
4952// Helper function that emits error message for intrinsics with void return
4953// value and return the chain.
4954static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
4955 SelectionDAG &DAG) {
4956
4957 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4958 return Op.getOperand(i: 0);
4959}
4960
4961SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4962 SelectionDAG &DAG) const {
4963 SDLoc DL(Op);
4964 MVT GRLenVT = Subtarget.getGRLenVT();
4965 SDValue Chain = Op.getOperand(i: 0);
4966 uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1);
4967 SDValue Op2 = Op.getOperand(i: 2);
4968 const StringRef ErrorMsgOOR = "argument out of range";
4969 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4970 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4971 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4972
4973 switch (IntrinsicEnum) {
4974 default:
4975 // TODO: Add more Intrinsics.
4976 return SDValue();
4977 case Intrinsic::loongarch_cacop_d:
4978 case Intrinsic::loongarch_cacop_w: {
4979 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4980 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4981 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4982 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
4983 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4984 unsigned Imm1 = Op2->getAsZExtVal();
4985 int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue();
4986 if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2))
4987 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
4988 return Op;
4989 }
4990 case Intrinsic::loongarch_dbar: {
4991 unsigned Imm = Op2->getAsZExtVal();
4992 return !isUInt<15>(x: Imm)
4993 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4994 : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
4995 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4996 }
4997 case Intrinsic::loongarch_ibar: {
4998 unsigned Imm = Op2->getAsZExtVal();
4999 return !isUInt<15>(x: Imm)
5000 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5001 : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
5002 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
5003 }
5004 case Intrinsic::loongarch_break: {
5005 unsigned Imm = Op2->getAsZExtVal();
5006 return !isUInt<15>(x: Imm)
5007 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5008 : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
5009 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
5010 }
5011 case Intrinsic::loongarch_movgr2fcsr: {
5012 if (!Subtarget.hasBasicF())
5013 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
5014 unsigned Imm = Op2->getAsZExtVal();
5015 return !isUInt<2>(x: Imm)
5016 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5017 : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
5018 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
5019 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
5020 Operand: Op.getOperand(i: 3)));
5021 }
5022 case Intrinsic::loongarch_syscall: {
5023 unsigned Imm = Op2->getAsZExtVal();
5024 return !isUInt<15>(x: Imm)
5025 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5026 : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
5027 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
5028 }
5029#define IOCSRWR_CASE(NAME, NODE) \
5030 case Intrinsic::loongarch_##NAME: { \
5031 SDValue Op3 = Op.getOperand(3); \
5032 return Subtarget.is64Bit() \
5033 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
5034 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5035 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
5036 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
5037 Op3); \
5038 }
5039 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
5040 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
5041 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
5042#undef IOCSRWR_CASE
5043 case Intrinsic::loongarch_iocsrwr_d: {
5044 return !Subtarget.is64Bit()
5045 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
5046 : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
5047 N2: Op2,
5048 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
5049 Operand: Op.getOperand(i: 3)));
5050 }
5051#define ASRT_LE_GT_CASE(NAME) \
5052 case Intrinsic::loongarch_##NAME: { \
5053 return !Subtarget.is64Bit() \
5054 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
5055 : Op; \
5056 }
5057 ASRT_LE_GT_CASE(asrtle_d)
5058 ASRT_LE_GT_CASE(asrtgt_d)
5059#undef ASRT_LE_GT_CASE
5060 case Intrinsic::loongarch_ldpte_d: {
5061 unsigned Imm = Op.getConstantOperandVal(i: 3);
5062 return !Subtarget.is64Bit()
5063 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
5064 : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5065 : Op;
5066 }
5067 case Intrinsic::loongarch_lsx_vst:
5068 case Intrinsic::loongarch_lasx_xvst:
5069 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue())
5070 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5071 : SDValue();
5072 case Intrinsic::loongarch_lasx_xvstelm_b:
5073 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5074 !isUInt<5>(x: Op.getConstantOperandVal(i: 5)))
5075 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5076 : SDValue();
5077 case Intrinsic::loongarch_lsx_vstelm_b:
5078 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5079 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
5080 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
5081 : SDValue();
5082 case Intrinsic::loongarch_lasx_xvstelm_h:
5083 return (!isShiftedInt<8, 1>(
5084 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5085 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
5086 ? emitIntrinsicErrorMessage(
5087 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
5088 : SDValue();
5089 case Intrinsic::loongarch_lsx_vstelm_h:
5090 return (!isShiftedInt<8, 1>(
5091 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5092 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
5093 ? emitIntrinsicErrorMessage(
5094 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
5095 : SDValue();
5096 case Intrinsic::loongarch_lasx_xvstelm_w:
5097 return (!isShiftedInt<8, 2>(
5098 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5099 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
5100 ? emitIntrinsicErrorMessage(
5101 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
5102 : SDValue();
5103 case Intrinsic::loongarch_lsx_vstelm_w:
5104 return (!isShiftedInt<8, 2>(
5105 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5106 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
5107 ? emitIntrinsicErrorMessage(
5108 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
5109 : SDValue();
5110 case Intrinsic::loongarch_lasx_xvstelm_d:
5111 return (!isShiftedInt<8, 3>(
5112 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5113 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
5114 ? emitIntrinsicErrorMessage(
5115 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
5116 : SDValue();
5117 case Intrinsic::loongarch_lsx_vstelm_d:
5118 return (!isShiftedInt<8, 3>(
5119 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
5120 !isUInt<1>(x: Op.getConstantOperandVal(i: 5)))
5121 ? emitIntrinsicErrorMessage(
5122 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
5123 : SDValue();
5124 }
5125}
5126
5127SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5128 SelectionDAG &DAG) const {
5129 SDLoc DL(Op);
5130 SDValue Lo = Op.getOperand(i: 0);
5131 SDValue Hi = Op.getOperand(i: 1);
5132 SDValue Shamt = Op.getOperand(i: 2);
5133 EVT VT = Lo.getValueType();
5134
5135 // if Shamt-GRLen < 0: // Shamt < GRLen
5136 // Lo = Lo << Shamt
5137 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5138 // else:
5139 // Lo = 0
5140 // Hi = Lo << (Shamt-GRLen)
5141
5142 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
5143 SDValue One = DAG.getConstant(Val: 1, DL, VT);
5144 SDValue MinusGRLen =
5145 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
5146 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
5147 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
5148 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
5149
5150 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
5151 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
5152 SDValue ShiftRightLo =
5153 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
5154 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
5155 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
5156 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
5157
5158 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
5159
5160 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
5161 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
5162
5163 SDValue Parts[2] = {Lo, Hi};
5164 return DAG.getMergeValues(Ops: Parts, dl: DL);
5165}
5166
5167SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5168 SelectionDAG &DAG,
5169 bool IsSRA) const {
5170 SDLoc DL(Op);
5171 SDValue Lo = Op.getOperand(i: 0);
5172 SDValue Hi = Op.getOperand(i: 1);
5173 SDValue Shamt = Op.getOperand(i: 2);
5174 EVT VT = Lo.getValueType();
5175
5176 // SRA expansion:
5177 // if Shamt-GRLen < 0: // Shamt < GRLen
5178 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5179 // Hi = Hi >>s Shamt
5180 // else:
5181 // Lo = Hi >>s (Shamt-GRLen);
5182 // Hi = Hi >>s (GRLen-1)
5183 //
5184 // SRL expansion:
5185 // if Shamt-GRLen < 0: // Shamt < GRLen
5186 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5187 // Hi = Hi >>u Shamt
5188 // else:
5189 // Lo = Hi >>u (Shamt-GRLen);
5190 // Hi = 0;
5191
5192 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5193
5194 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
5195 SDValue One = DAG.getConstant(Val: 1, DL, VT);
5196 SDValue MinusGRLen =
5197 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
5198 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
5199 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
5200 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
5201
5202 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
5203 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
5204 SDValue ShiftLeftHi =
5205 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
5206 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
5207 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
5208 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
5209 SDValue HiFalse =
5210 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
5211
5212 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
5213
5214 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
5215 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
5216
5217 SDValue Parts[2] = {Lo, Hi};
5218 return DAG.getMergeValues(Ops: Parts, dl: DL);
5219}
5220
5221// Returns the opcode of the target-specific SDNode that implements the 32-bit
5222// form of the given Opcode.
5223static unsigned getLoongArchWOpcode(unsigned Opcode) {
5224 switch (Opcode) {
5225 default:
5226 llvm_unreachable("Unexpected opcode");
5227 case ISD::SDIV:
5228 return LoongArchISD::DIV_W;
5229 case ISD::UDIV:
5230 return LoongArchISD::DIV_WU;
5231 case ISD::SREM:
5232 return LoongArchISD::MOD_W;
5233 case ISD::UREM:
5234 return LoongArchISD::MOD_WU;
5235 case ISD::SHL:
5236 return LoongArchISD::SLL_W;
5237 case ISD::SRA:
5238 return LoongArchISD::SRA_W;
5239 case ISD::SRL:
5240 return LoongArchISD::SRL_W;
5241 case ISD::ROTL:
5242 case ISD::ROTR:
5243 return LoongArchISD::ROTR_W;
5244 case ISD::CTTZ:
5245 return LoongArchISD::CTZ_W;
5246 case ISD::CTLZ:
5247 return LoongArchISD::CLZ_W;
5248 }
5249}
5250
5251// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5252// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5253// otherwise be promoted to i64, making it difficult to select the
5254// SLL_W/.../*W later one because the fact the operation was originally of
5255// type i8/i16/i32 is lost.
5256static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
5257 unsigned ExtOpc = ISD::ANY_EXTEND) {
5258 SDLoc DL(N);
5259 unsigned WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
5260 SDValue NewOp0, NewRes;
5261
5262 switch (NumOp) {
5263 default:
5264 llvm_unreachable("Unexpected NumOp");
5265 case 1: {
5266 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
5267 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
5268 break;
5269 }
5270 case 2: {
5271 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
5272 SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
5273 if (N->getOpcode() == ISD::ROTL) {
5274 SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64);
5275 NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
5276 }
5277 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
5278 break;
5279 }
5280 // TODO:Handle more NumOp.
5281 }
5282
5283 // ReplaceNodeResults requires we maintain the same type for the return
5284 // value.
5285 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
5286}
5287
5288// Converts the given 32-bit operation to a i64 operation with signed extension
5289// semantic to reduce the signed extension instructions.
5290static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
5291 SDLoc DL(N);
5292 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
5293 SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
5294 SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
5295 SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
5296 N2: DAG.getValueType(MVT::i32));
5297 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
5298}
5299
5300// Helper function that emits error message for intrinsics with/without chain
5301// and return a UNDEF or and the chain as the results.
5302static void emitErrorAndReplaceIntrinsicResults(
5303 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
5304 StringRef ErrorMsg, bool WithChain = true) {
5305 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
5306 Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)));
5307 if (!WithChain)
5308 return;
5309 Results.push_back(Elt: N->getOperand(Num: 0));
5310}
5311
5312template <unsigned N>
5313static void
5314replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
5315 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5316 unsigned ResOp) {
5317 const StringRef ErrorMsgOOR = "argument out of range";
5318 unsigned Imm = Node->getConstantOperandVal(Num: 2);
5319 if (!isUInt<N>(Imm)) {
5320 emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
5321 /*WithChain=*/false);
5322 return;
5323 }
5324 SDLoc DL(Node);
5325 SDValue Vec = Node->getOperand(Num: 1);
5326
5327 SDValue PickElt =
5328 DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
5329 N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
5330 N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
5331 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0),
5332 Operand: PickElt.getValue(R: 0)));
5333}
5334
5335static void replaceVecCondBranchResults(SDNode *N,
5336 SmallVectorImpl<SDValue> &Results,
5337 SelectionDAG &DAG,
5338 const LoongArchSubtarget &Subtarget,
5339 unsigned ResOp) {
5340 SDLoc DL(N);
5341 SDValue Vec = N->getOperand(Num: 1);
5342
5343 SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
5344 Results.push_back(
5345 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0)));
5346}
5347
5348static void
5349replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
5350 SelectionDAG &DAG,
5351 const LoongArchSubtarget &Subtarget) {
5352 switch (N->getConstantOperandVal(Num: 0)) {
5353 default:
5354 llvm_unreachable("Unexpected Intrinsic.");
5355 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5356 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
5357 ResOp: LoongArchISD::VPICK_SEXT_ELT);
5358 break;
5359 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5360 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5361 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
5362 ResOp: LoongArchISD::VPICK_SEXT_ELT);
5363 break;
5364 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5365 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
5366 ResOp: LoongArchISD::VPICK_SEXT_ELT);
5367 break;
5368 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5369 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
5370 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
5371 break;
5372 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5373 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5374 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
5375 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
5376 break;
5377 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5378 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
5379 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
5380 break;
5381 case Intrinsic::loongarch_lsx_bz_b:
5382 case Intrinsic::loongarch_lsx_bz_h:
5383 case Intrinsic::loongarch_lsx_bz_w:
5384 case Intrinsic::loongarch_lsx_bz_d:
5385 case Intrinsic::loongarch_lasx_xbz_b:
5386 case Intrinsic::loongarch_lasx_xbz_h:
5387 case Intrinsic::loongarch_lasx_xbz_w:
5388 case Intrinsic::loongarch_lasx_xbz_d:
5389 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5390 ResOp: LoongArchISD::VALL_ZERO);
5391 break;
5392 case Intrinsic::loongarch_lsx_bz_v:
5393 case Intrinsic::loongarch_lasx_xbz_v:
5394 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5395 ResOp: LoongArchISD::VANY_ZERO);
5396 break;
5397 case Intrinsic::loongarch_lsx_bnz_b:
5398 case Intrinsic::loongarch_lsx_bnz_h:
5399 case Intrinsic::loongarch_lsx_bnz_w:
5400 case Intrinsic::loongarch_lsx_bnz_d:
5401 case Intrinsic::loongarch_lasx_xbnz_b:
5402 case Intrinsic::loongarch_lasx_xbnz_h:
5403 case Intrinsic::loongarch_lasx_xbnz_w:
5404 case Intrinsic::loongarch_lasx_xbnz_d:
5405 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5406 ResOp: LoongArchISD::VALL_NONZERO);
5407 break;
5408 case Intrinsic::loongarch_lsx_bnz_v:
5409 case Intrinsic::loongarch_lasx_xbnz_v:
5410 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5411 ResOp: LoongArchISD::VANY_NONZERO);
5412 break;
5413 }
5414}
5415
5416static void replaceCMP_XCHG_128Results(SDNode *N,
5417 SmallVectorImpl<SDValue> &Results,
5418 SelectionDAG &DAG) {
5419 assert(N->getValueType(0) == MVT::i128 &&
5420 "AtomicCmpSwap on types less than 128 should be legal");
5421 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
5422
5423 unsigned Opcode;
5424 switch (MemOp->getMergedOrdering()) {
5425 case AtomicOrdering::Acquire:
5426 case AtomicOrdering::AcquireRelease:
5427 case AtomicOrdering::SequentiallyConsistent:
5428 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5429 break;
5430 case AtomicOrdering::Monotonic:
5431 case AtomicOrdering::Release:
5432 Opcode = LoongArch::PseudoCmpXchg128;
5433 break;
5434 default:
5435 llvm_unreachable("Unexpected ordering!");
5436 }
5437
5438 SDLoc DL(N);
5439 auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: 2), DL, LoVT: MVT::i64, HiVT: MVT::i64);
5440 auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: 3), DL, LoVT: MVT::i64, HiVT: MVT::i64);
5441 SDValue Ops[] = {N->getOperand(Num: 1), CmpVal.first, CmpVal.second,
5442 NewVal.first, NewVal.second, N->getOperand(Num: 0)};
5443
5444 SDNode *CmpSwap = DAG.getMachineNode(
5445 Opcode, dl: SDLoc(N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other),
5446 Ops);
5447 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
5448 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128,
5449 N1: SDValue(CmpSwap, 0), N2: SDValue(CmpSwap, 1)));
5450 Results.push_back(Elt: SDValue(CmpSwap, 3));
5451}
5452
5453void LoongArchTargetLowering::ReplaceNodeResults(
5454 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
5455 SDLoc DL(N);
5456 EVT VT = N->getValueType(ResNo: 0);
5457 switch (N->getOpcode()) {
5458 default:
5459 llvm_unreachable("Don't know how to legalize this operation");
5460 case ISD::ADD:
5461 case ISD::SUB:
5462 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5463 "Unexpected custom legalisation");
5464 Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
5465 break;
5466 case ISD::SDIV:
5467 case ISD::UDIV:
5468 case ISD::SREM:
5469 case ISD::UREM:
5470 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5471 "Unexpected custom legalisation");
5472 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2,
5473 ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32
5474 ? ISD::ANY_EXTEND
5475 : ISD::SIGN_EXTEND));
5476 break;
5477 case ISD::SHL:
5478 case ISD::SRA:
5479 case ISD::SRL:
5480 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5481 "Unexpected custom legalisation");
5482 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
5483 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
5484 break;
5485 }
5486 break;
5487 case ISD::ROTL:
5488 case ISD::ROTR:
5489 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5490 "Unexpected custom legalisation");
5491 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
5492 break;
5493 case ISD::LOAD: {
5494 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5495 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5496 // cast since type legalization will try to use an i64 load.
5497 MVT VT = N->getSimpleValueType(ResNo: 0);
5498 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5499 "Unexpected custom legalisation");
5500 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
5501 "Unexpected type action!");
5502 if (!ISD::isNON_EXTLoad(N))
5503 return;
5504 auto *Ld = cast<LoadSDNode>(Val: N);
5505 SDValue Res = DAG.getLoad(VT: MVT::f64, dl: DL, Chain: Ld->getChain(), Ptr: Ld->getBasePtr(),
5506 PtrInfo: Ld->getPointerInfo(), Alignment: Ld->getBaseAlign(),
5507 MMOFlags: Ld->getMemOperand()->getFlags());
5508 SDValue Chain = Res.getValue(R: 1);
5509 MVT VecVT = MVT::getVectorVT(VT: MVT::f64, NumElements: 2);
5510 Res = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: VecVT, Operand: Res);
5511 EVT WideVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
5512 Res = DAG.getBitcast(VT: WideVT, V: Res);
5513 Results.push_back(Elt: Res);
5514 Results.push_back(Elt: Chain);
5515 break;
5516 }
5517 case ISD::FP_TO_SINT: {
5518 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5519 "Unexpected custom legalisation");
5520 SDValue Src = N->getOperand(Num: 0);
5521 EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0));
5522 if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
5523 TargetLowering::TypeSoftenFloat) {
5524 if (!isTypeLegal(VT: Src.getValueType()))
5525 return;
5526 if (Src.getValueType() == MVT::f16)
5527 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src);
5528 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
5529 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
5530 return;
5531 }
5532 // If the FP type needs to be softened, emit a library call using the 'si'
5533 // version. If we left it to default legalization we'd end up with 'di'.
5534 RTLIB::Libcall LC;
5535 LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
5536 MakeLibCallOptions CallOptions;
5537 EVT OpVT = Src.getValueType();
5538 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT);
5539 SDValue Chain = SDValue();
5540 SDValue Result;
5541 std::tie(args&: Result, args&: Chain) =
5542 makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
5543 Results.push_back(Elt: Result);
5544 break;
5545 }
5546 case ISD::BITCAST: {
5547 SDValue Src = N->getOperand(Num: 0);
5548 EVT SrcVT = Src.getValueType();
5549 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5550 Subtarget.hasBasicF()) {
5551 SDValue Dst =
5552 DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
5553 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
5554 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5555 SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
5556 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src);
5557 SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
5558 N1: NewReg.getValue(R: 0), N2: NewReg.getValue(R: 1));
5559 Results.push_back(Elt: RetReg);
5560 }
5561 break;
5562 }
5563 case ISD::FP_TO_UINT: {
5564 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5565 "Unexpected custom legalisation");
5566 auto &TLI = DAG.getTargetLoweringInfo();
5567 SDValue Tmp1, Tmp2;
5568 TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
5569 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
5570 break;
5571 }
5572 case ISD::FP_ROUND: {
5573 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5574 "Unexpected custom legalisation");
5575 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5576 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5577 // a target-specific LoongArchISD::VFCVT to optimize it.
5578 SDValue Op0 = N->getOperand(Num: 0);
5579 EVT OpVT = Op0.getValueType();
5580 if (OpVT == MVT::v2f64) {
5581 SDValue Undef = DAG.getUNDEF(VT: OpVT);
5582 SDValue Dst =
5583 DAG.getNode(Opcode: LoongArchISD::VFCVT, DL, VT: MVT::v4f32, N1: Undef, N2: Op0);
5584 Results.push_back(Elt: Dst);
5585 }
5586 break;
5587 }
5588 case ISD::BSWAP: {
5589 SDValue Src = N->getOperand(Num: 0);
5590 assert((VT == MVT::i16 || VT == MVT::i32) &&
5591 "Unexpected custom legalization");
5592 MVT GRLenVT = Subtarget.getGRLenVT();
5593 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
5594 SDValue Tmp;
5595 switch (VT.getSizeInBits()) {
5596 default:
5597 llvm_unreachable("Unexpected operand width");
5598 case 16:
5599 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
5600 break;
5601 case 32:
5602 // Only LA64 will get to here due to the size mismatch between VT and
5603 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5604 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
5605 break;
5606 }
5607 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
5608 break;
5609 }
5610 case ISD::BITREVERSE: {
5611 SDValue Src = N->getOperand(Num: 0);
5612 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5613 "Unexpected custom legalization");
5614 MVT GRLenVT = Subtarget.getGRLenVT();
5615 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
5616 SDValue Tmp;
5617 switch (VT.getSizeInBits()) {
5618 default:
5619 llvm_unreachable("Unexpected operand width");
5620 case 8:
5621 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
5622 break;
5623 case 32:
5624 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
5625 break;
5626 }
5627 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
5628 break;
5629 }
5630 case ISD::CTLZ:
5631 case ISD::CTTZ: {
5632 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5633 "Unexpected custom legalisation");
5634 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1));
5635 break;
5636 }
5637 case ISD::INTRINSIC_W_CHAIN: {
5638 SDValue Chain = N->getOperand(Num: 0);
5639 SDValue Op2 = N->getOperand(Num: 2);
5640 MVT GRLenVT = Subtarget.getGRLenVT();
5641 const StringRef ErrorMsgOOR = "argument out of range";
5642 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5643 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5644
5645 switch (N->getConstantOperandVal(Num: 1)) {
5646 default:
5647 llvm_unreachable("Unexpected Intrinsic.");
5648 case Intrinsic::loongarch_movfcsr2gr: {
5649 if (!Subtarget.hasBasicF()) {
5650 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
5651 return;
5652 }
5653 unsigned Imm = Op2->getAsZExtVal();
5654 if (!isUInt<2>(x: Imm)) {
5655 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5656 return;
5657 }
5658 SDValue MOVFCSR2GRResults = DAG.getNode(
5659 Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other},
5660 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5661 Results.push_back(
5662 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0)));
5663 Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1));
5664 break;
5665 }
5666#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5667 case Intrinsic::loongarch_##NAME: { \
5668 SDValue NODE = DAG.getNode( \
5669 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5670 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5671 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5672 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5673 Results.push_back(NODE.getValue(1)); \
5674 break; \
5675 }
5676 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5677 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5678 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5679 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5680 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5681 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5682#undef CRC_CASE_EXT_BINARYOP
5683
5684#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5685 case Intrinsic::loongarch_##NAME: { \
5686 SDValue NODE = DAG.getNode( \
5687 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5688 {Chain, Op2, \
5689 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5690 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5691 Results.push_back(NODE.getValue(1)); \
5692 break; \
5693 }
5694 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5695 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5696#undef CRC_CASE_EXT_UNARYOP
5697#define CSR_CASE(ID) \
5698 case Intrinsic::loongarch_##ID: { \
5699 if (!Subtarget.is64Bit()) \
5700 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5701 break; \
5702 }
5703 CSR_CASE(csrrd_d);
5704 CSR_CASE(csrwr_d);
5705 CSR_CASE(csrxchg_d);
5706 CSR_CASE(iocsrrd_d);
5707#undef CSR_CASE
5708 case Intrinsic::loongarch_csrrd_w: {
5709 unsigned Imm = Op2->getAsZExtVal();
5710 if (!isUInt<14>(x: Imm)) {
5711 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5712 return;
5713 }
5714 SDValue CSRRDResults =
5715 DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
5716 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5717 Results.push_back(
5718 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0)));
5719 Results.push_back(Elt: CSRRDResults.getValue(R: 1));
5720 break;
5721 }
5722 case Intrinsic::loongarch_csrwr_w: {
5723 unsigned Imm = N->getConstantOperandVal(Num: 3);
5724 if (!isUInt<14>(x: Imm)) {
5725 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5726 return;
5727 }
5728 SDValue CSRWRResults =
5729 DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
5730 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5731 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5732 Results.push_back(
5733 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0)));
5734 Results.push_back(Elt: CSRWRResults.getValue(R: 1));
5735 break;
5736 }
5737 case Intrinsic::loongarch_csrxchg_w: {
5738 unsigned Imm = N->getConstantOperandVal(Num: 4);
5739 if (!isUInt<14>(x: Imm)) {
5740 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5741 return;
5742 }
5743 SDValue CSRXCHGResults = DAG.getNode(
5744 Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
5745 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5746 DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)),
5747 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5748 Results.push_back(
5749 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0)));
5750 Results.push_back(Elt: CSRXCHGResults.getValue(R: 1));
5751 break;
5752 }
5753#define IOCSRRD_CASE(NAME, NODE) \
5754 case Intrinsic::loongarch_##NAME: { \
5755 SDValue IOCSRRDResults = \
5756 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5757 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5758 Results.push_back( \
5759 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5760 Results.push_back(IOCSRRDResults.getValue(1)); \
5761 break; \
5762 }
5763 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5764 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5765 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5766#undef IOCSRRD_CASE
5767 case Intrinsic::loongarch_cpucfg: {
5768 SDValue CPUCFGResults =
5769 DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
5770 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
5771 Results.push_back(
5772 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0)));
5773 Results.push_back(Elt: CPUCFGResults.getValue(R: 1));
5774 break;
5775 }
5776 case Intrinsic::loongarch_lddir_d: {
5777 if (!Subtarget.is64Bit()) {
5778 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
5779 return;
5780 }
5781 break;
5782 }
5783 }
5784 break;
5785 }
5786 case ISD::READ_REGISTER: {
5787 if (Subtarget.is64Bit())
5788 DAG.getContext()->emitError(
5789 ErrorStr: "On LA64, only 64-bit registers can be read.");
5790 else
5791 DAG.getContext()->emitError(
5792 ErrorStr: "On LA32, only 32-bit registers can be read.");
5793 Results.push_back(Elt: DAG.getUNDEF(VT));
5794 Results.push_back(Elt: N->getOperand(Num: 0));
5795 break;
5796 }
5797 case ISD::INTRINSIC_WO_CHAIN: {
5798 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5799 break;
5800 }
5801 case ISD::LROUND: {
5802 SDValue Op0 = N->getOperand(Num: 0);
5803 EVT OpVT = Op0.getValueType();
5804 RTLIB::Libcall LC =
5805 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5806 MakeLibCallOptions CallOptions;
5807 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64);
5808 SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
5809 Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
5810 Results.push_back(Elt: Result);
5811 break;
5812 }
5813 case ISD::ATOMIC_CMP_SWAP: {
5814 replaceCMP_XCHG_128Results(N, Results, DAG);
5815 break;
5816 }
5817 case ISD::TRUNCATE: {
5818 MVT VT = N->getSimpleValueType(ResNo: 0);
5819 if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector)
5820 return;
5821
5822 MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT();
5823 SDValue In = N->getOperand(Num: 0);
5824 EVT InVT = In.getValueType();
5825 EVT InEltVT = InVT.getVectorElementType();
5826 EVT EltVT = VT.getVectorElementType();
5827 unsigned MinElts = VT.getVectorNumElements();
5828 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5829 unsigned InBits = InVT.getSizeInBits();
5830
5831 // v8i64 -> (v8i32) -> v8i8
5832 if (InVT == MVT::v8i64 && WidenVT.is128BitVector()) {
5833 InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 256 / MinElts), NumElements: MinElts);
5834 In = DAG.getNode(Opcode: N->getOpcode(), DL, VT: InVT, Operand: In);
5835 InBits = 256;
5836 }
5837
5838 // v8i32 -> v8i8 / v4i64 -> v4i16 / v4i64 -> v4i8
5839 if ((InVT == MVT::v8i32 || InVT == MVT::v4i64) &&
5840 WidenVT.is128BitVector()) {
5841 InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 128 / MinElts), NumElements: MinElts);
5842 In = DAG.getNode(Opcode: N->getOpcode(), DL, VT: InVT, Operand: In);
5843 InBits = 128;
5844 InEltVT = InVT.getVectorElementType();
5845 }
5846
5847 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5848 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5849 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5850 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5851 for (unsigned I = 0; I < MinElts; ++I)
5852 TruncMask[I] = Scale * I;
5853
5854 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5855 MVT SVT = In.getSimpleValueType().getScalarType();
5856 MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts);
5857 SDValue WidenIn =
5858 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In,
5859 N3: DAG.getVectorIdxConstant(Val: 0, DL));
5860 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5861 "Illegal vector type in truncation");
5862 WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn);
5863 Results.push_back(
5864 Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask));
5865 return;
5866 }
5867 }
5868
5869 break;
5870 }
5871 case ISD::SIGN_EXTEND: {
5872 // LASX has native VEXT2XV_* for sign extension.
5873 if (!Subtarget.hasExtLSX() || Subtarget.hasExtLASX())
5874 return;
5875
5876 EVT DstVT = N->getValueType(ResNo: 0);
5877 SDValue Src = N->getOperand(Num: 0);
5878 MVT SrcVT = Src.getSimpleValueType();
5879
5880 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5881 unsigned DstEltBits = DstVT.getScalarSizeInBits();
5882 unsigned NumElts = DstVT.getVectorNumElements();
5883
5884 if (SrcVT.getSizeInBits() > 128)
5885 return;
5886
5887 if (!DstVT.isVector() || DstVT.getSizeInBits() <= 128)
5888 return;
5889
5890 // Legalize and extend the src to 128-bit first.
5891 if (SrcVT.getSizeInBits() < 128) {
5892 unsigned WidenSrcElts = 128 / SrcEltBits;
5893 MVT WidenSrcVT = MVT::getVectorVT(VT: SrcVT.getScalarType(), NumElements: WidenSrcElts);
5894 Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: WidenSrcVT,
5895 N1: DAG.getUNDEF(VT: WidenSrcVT), N2: Src,
5896 N3: DAG.getVectorIdxConstant(Val: 0, DL));
5897 SrcVT = WidenSrcVT;
5898
5899 unsigned FirstStageEltBits = 128 / NumElts;
5900 MVT FirstStageEltVT = MVT::getIntegerVT(BitWidth: FirstStageEltBits);
5901 MVT FirstStageVT = MVT::getVectorVT(VT: FirstStageEltVT, NumElements: NumElts);
5902 Src = DAG.getNode(Opcode: ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT: FirstStageVT, Operand: Src);
5903 SrcVT = FirstStageVT;
5904 SrcEltBits = FirstStageEltBits;
5905 }
5906
5907 SmallVector<SDValue, 8> Blocks;
5908 Blocks.push_back(Elt: Src);
5909
5910 // Sign-extend the src by using SLTI + VILVL + VILVH recursively.
5911 while (SrcEltBits < DstEltBits) {
5912 unsigned NextEltBits = SrcEltBits * 2;
5913 MVT NextEltVT = MVT::getIntegerVT(BitWidth: NextEltBits);
5914 unsigned CurEltsPerBlock = SrcVT.getVectorNumElements();
5915 unsigned NextEltsPerBlock = CurEltsPerBlock / 2;
5916 MVT NextBlockVT = MVT::getVectorVT(VT: NextEltVT, NumElements: NextEltsPerBlock);
5917
5918 SmallVector<SDValue, 8> NextBlocks;
5919 NextBlocks.reserve(N: Blocks.size() * 2);
5920 for (SDValue Block : Blocks) {
5921 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
5922 SDValue Mask = DAG.getNode(Opcode: ISD::SETCC, DL, VT: SrcVT, N1: Block, N2: Zero,
5923 N3: DAG.getCondCode(Cond: ISD::SETLT));
5924 SDValue LoInterleaved =
5925 DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT: SrcVT, N1: Mask, N2: Block);
5926 SDValue HiInterleaved =
5927 DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT: SrcVT, N1: Mask, N2: Block);
5928
5929 NextBlocks.push_back(Elt: DAG.getBitcast(VT: NextBlockVT, V: LoInterleaved));
5930 NextBlocks.push_back(Elt: DAG.getBitcast(VT: NextBlockVT, V: HiInterleaved));
5931 }
5932
5933 Blocks = std::move(NextBlocks);
5934 SrcVT = NextBlockVT;
5935 SrcEltBits = NextEltBits;
5936 }
5937
5938 Results.push_back(Elt: DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: DstVT, Ops: Blocks));
5939 break;
5940 }
5941 case ISD::FP_EXTEND:
5942 // FP_EXTEND may reach here due to the Custom action for v2f32 results, but
5943 // no target-specific lowering is required. Leave it unchanged and rely on
5944 // the default type legalization.
5945 break;
5946 }
5947}
5948
5949/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5950static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
5951 SelectionDAG &DAG) {
5952 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5953
5954 MVT VT = N->getSimpleValueType(ResNo: 0);
5955 if (!VT.is128BitVector() && !VT.is256BitVector())
5956 return SDValue();
5957
5958 SDValue X, Y;
5959 SDValue N0 = N->getOperand(Num: 0);
5960 SDValue N1 = N->getOperand(Num: 1);
5961
5962 if (SDValue Not = isNOT(V: N0, DAG)) {
5963 X = Not;
5964 Y = N1;
5965 } else if (SDValue Not = isNOT(V: N1, DAG)) {
5966 X = Not;
5967 Y = N0;
5968 } else
5969 return SDValue();
5970
5971 X = DAG.getBitcast(VT, V: X);
5972 Y = DAG.getBitcast(VT, V: Y);
5973 return DAG.getNode(Opcode: LoongArchISD::VANDN, DL, VT, N1: X, N2: Y);
5974}
5975
5976static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5977 unsigned MinSizeInBits) {
5978 N = peekThroughBitcasts(V: N);
5979 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(Val&: N);
5980
5981 if (!Node)
5982 return false;
5983
5984 APInt SplatUndef;
5985 unsigned SplatBitSize;
5986 bool HasAnyUndefs;
5987
5988 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5989 HasAnyUndefs, MinSplatBits: MinSizeInBits,
5990 /*IsBigEndian=*/isBigEndian: false);
5991}
5992
5993static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex) {
5994 auto *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
5995 if (!BV)
5996 return SDValue();
5997
5998 SDValue Src;
5999 int Start = -1;
6000
6001 for (unsigned i = 0, NumElts = BV->getNumOperands(); i < NumElts; ++i) {
6002 SDValue Op = BV->getOperand(Num: i);
6003 if (Op.isUndef())
6004 continue;
6005 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6006 return SDValue();
6007
6008 auto *IdxC = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
6009 if (!IdxC)
6010 return SDValue();
6011
6012 unsigned EltIdx = IdxC->getZExtValue();
6013 if (Start < 0)
6014 Start = (int)EltIdx - (int)(i * 2);
6015 if (Start < 0 || Start > 1 || EltIdx != (unsigned)(Start + (int)(i * 2)))
6016 return SDValue();
6017
6018 SDValue CurSrc = Op.getOperand(i: 0);
6019 if (!Src)
6020 Src = CurSrc;
6021 else if (Src != CurSrc)
6022 return SDValue();
6023 }
6024
6025 if (!Src || Start < 0)
6026 return SDValue();
6027
6028 StartIndex = (unsigned)Start;
6029 return Src;
6030}
6031
6032static SDValue
6033performHorizWideningCombine(SDNode *N, SelectionDAG &DAG,
6034 const LoongArchSubtarget &Subtarget) {
6035 if (!Subtarget.hasExtLSX())
6036 return SDValue();
6037
6038 unsigned Opc = N->getOpcode();
6039 assert((Opc == ISD::ADD || Opc == ISD::SUB) && "Unexpected opcode");
6040
6041 EVT VT = N->getValueType(ResNo: 0);
6042 SDLoc DL(N);
6043
6044 SDValue LHS = N->getOperand(Num: 0);
6045 SDValue RHS = N->getOperand(Num: 1);
6046
6047 bool isSigned;
6048 unsigned ExtOpc = LHS.getOpcode();
6049 if (ExtOpc == ISD::SIGN_EXTEND)
6050 isSigned = true;
6051 else if (ExtOpc == ISD::ZERO_EXTEND)
6052 isSigned = false;
6053 else
6054 return SDValue();
6055
6056 if (ExtOpc != RHS.getOpcode())
6057 return SDValue();
6058
6059 if (!LHS.hasOneUse() || !RHS.hasOneUse())
6060 return SDValue();
6061
6062 unsigned OddIdx, EvenIdx;
6063 SDValue LHSVec = matchDeinterleaveBuildVector(N: LHS.getOperand(i: 0), StartIndex&: OddIdx);
6064 SDValue RHSVec = matchDeinterleaveBuildVector(N: RHS.getOperand(i: 0), StartIndex&: EvenIdx);
6065
6066 if (!LHSVec || !RHSVec)
6067 return SDValue();
6068 if (OddIdx != 1 || EvenIdx != 0)
6069 return SDValue();
6070 if (LHSVec.getValueType() != RHSVec.getValueType())
6071 return SDValue();
6072
6073 EVT SrcVT = LHSVec.getValueType();
6074 EVT SrcEltVT = SrcVT.getVectorElementType();
6075 EVT DstEltVT = VT.getVectorElementType();
6076 auto &TLI = DAG.getTargetLoweringInfo();
6077
6078 if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(VT: SrcVT))
6079 return SDValue();
6080 if (!SrcVT.isVector() || !VT.isVector())
6081 return SDValue();
6082 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6083 return SDValue();
6084 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6085 return SDValue();
6086 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6087 return SDValue();
6088
6089 unsigned TargetOpc;
6090 if (Opc == ISD::ADD)
6091 TargetOpc = isSigned ? LoongArchISD::VHADDW : LoongArchISD::VHADDW_U;
6092 else
6093 TargetOpc = isSigned ? LoongArchISD::VHSUBW : LoongArchISD::VHSUBW_U;
6094
6095 return DAG.getNode(Opcode: TargetOpc, DL, VT, N1: LHSVec, N2: RHSVec);
6096}
6097
6098static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
6099 TargetLowering::DAGCombinerInfo &DCI,
6100 const LoongArchSubtarget &Subtarget) {
6101 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6102 return V;
6103
6104 if (DCI.isBeforeLegalizeOps())
6105 return SDValue();
6106
6107 EVT VT = N->getValueType(ResNo: 0);
6108 if (!VT.isVector())
6109 return SDValue();
6110
6111 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
6112 return SDValue();
6113
6114 EVT EltVT = VT.getVectorElementType();
6115 if (!EltVT.isInteger())
6116 return SDValue();
6117
6118 // match:
6119 //
6120 // add
6121 // (and
6122 // (srl X, shift-1) / X
6123 // 1)
6124 // (srl/sra X, shift)
6125
6126 SDValue Add0 = N->getOperand(Num: 0);
6127 SDValue Add1 = N->getOperand(Num: 1);
6128 SDValue And;
6129 SDValue Shr;
6130
6131 if (Add0.getOpcode() == ISD::AND) {
6132 And = Add0;
6133 Shr = Add1;
6134 } else if (Add1.getOpcode() == ISD::AND) {
6135 And = Add1;
6136 Shr = Add0;
6137 } else {
6138 return SDValue();
6139 }
6140
6141 // match:
6142 //
6143 // srl/sra X, shift
6144
6145 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
6146 return SDValue();
6147
6148 SDValue X = Shr.getOperand(i: 0);
6149 SDValue Shift = Shr.getOperand(i: 1);
6150 APInt ShiftVal;
6151
6152 if (!isConstantSplatVector(N: Shift, SplatValue&: ShiftVal, MinSizeInBits: EltVT.getSizeInBits()))
6153 return SDValue();
6154
6155 if (ShiftVal == 0)
6156 return SDValue();
6157
6158 // match:
6159 //
6160 // and
6161 // (srl X, shift-1) / X
6162 // 1
6163
6164 SDValue One = And.getOperand(i: 1);
6165 APInt SplatVal;
6166
6167 if (!isConstantSplatVector(N: One, SplatValue&: SplatVal, MinSizeInBits: EltVT.getSizeInBits()))
6168 return SDValue();
6169
6170 if (SplatVal != 1)
6171 return SDValue();
6172
6173 if (And.getOperand(i: 0) == X) {
6174 // match:
6175 //
6176 // shift == 1
6177
6178 if (ShiftVal != 1)
6179 return SDValue();
6180 } else {
6181 // match:
6182 //
6183 // srl X, shift-1
6184
6185 SDValue Srl = And.getOperand(i: 0);
6186
6187 if (Srl.getOpcode() != ISD::SRL)
6188 return SDValue();
6189
6190 if (Srl.getOperand(i: 0) != X)
6191 return SDValue();
6192
6193 // match:
6194 //
6195 // shift-1
6196
6197 SDValue ShiftMinus1 = Srl.getOperand(i: 1);
6198
6199 if (!isConstantSplatVector(N: ShiftMinus1, SplatValue&: SplatVal, MinSizeInBits: EltVT.getSizeInBits()))
6200 return SDValue();
6201
6202 if (ShiftVal != (SplatVal + 1))
6203 return SDValue();
6204 }
6205
6206 // We matched a rounded right shift pattern and can lower it
6207 // to a single vector rounded shift instruction.
6208
6209 SDLoc DL(N);
6210 return DAG.getNode(Opcode: Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
6211 : LoongArchISD::VSRAR,
6212 DL, VT, N1: X, N2: Shift);
6213}
6214
6215static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
6216 TargetLowering::DAGCombinerInfo &DCI,
6217 const LoongArchSubtarget &Subtarget) {
6218 if (DCI.isBeforeLegalizeOps())
6219 return SDValue();
6220
6221 SDValue FirstOperand = N->getOperand(Num: 0);
6222 SDValue SecondOperand = N->getOperand(Num: 1);
6223 unsigned FirstOperandOpc = FirstOperand.getOpcode();
6224 EVT ValTy = N->getValueType(ResNo: 0);
6225 SDLoc DL(N);
6226 uint64_t lsb, msb;
6227 unsigned SMIdx, SMLen;
6228 ConstantSDNode *CN;
6229 SDValue NewOperand;
6230 MVT GRLenVT = Subtarget.getGRLenVT();
6231
6232 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
6233 return R;
6234
6235 // BSTRPICK requires the 32S feature.
6236 if (!Subtarget.has32S())
6237 return SDValue();
6238
6239 // Op's second operand must be a shifted mask.
6240 if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) ||
6241 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
6242 return SDValue();
6243
6244 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
6245 // Pattern match BSTRPICK.
6246 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
6247 // => BSTRPICK $dst, $src, msb, lsb
6248 // where msb = lsb + len - 1
6249
6250 // The second operand of the shift must be an immediate.
6251 if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))))
6252 return SDValue();
6253
6254 lsb = CN->getZExtValue();
6255
6256 // Return if the shifted mask does not start at bit 0 or the sum of its
6257 // length and lsb exceeds the word's size.
6258 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
6259 return SDValue();
6260
6261 NewOperand = FirstOperand.getOperand(i: 0);
6262 } else {
6263 // Pattern match BSTRPICK.
6264 // $dst = and $src, (2**len- 1) , if len > 12
6265 // => BSTRPICK $dst, $src, msb, lsb
6266 // where lsb = 0 and msb = len - 1
6267
6268 // If the mask is <= 0xfff, andi can be used instead.
6269 if (CN->getZExtValue() <= 0xfff)
6270 return SDValue();
6271
6272 // Return if the MSB exceeds.
6273 if (SMIdx + SMLen > ValTy.getSizeInBits())
6274 return SDValue();
6275
6276 if (SMIdx > 0) {
6277 // Omit if the constant has more than 2 uses. This a conservative
6278 // decision. Whether it is a win depends on the HW microarchitecture.
6279 // However it should always be better for 1 and 2 uses.
6280 if (CN->use_size() > 2)
6281 return SDValue();
6282 // Return if the constant can be composed by a single LU12I.W.
6283 if ((CN->getZExtValue() & 0xfff) == 0)
6284 return SDValue();
6285 // Return if the constand can be composed by a single ADDI with
6286 // the zero register.
6287 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
6288 return SDValue();
6289 }
6290
6291 lsb = SMIdx;
6292 NewOperand = FirstOperand;
6293 }
6294
6295 msb = lsb + SMLen - 1;
6296 SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
6297 N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
6298 N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
6299 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6300 return NR0;
6301 // Try to optimize to
6302 // bstrpick $Rd, $Rs, msb, lsb
6303 // slli $Rd, $Rd, lsb
6304 return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
6305 N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
6306}
6307
6308// Return the original source vector if N consists of the low half
6309// of each 128-bit lane.
6310static SDValue matchLowHalfOf128BitLanes(SDValue N) {
6311 N = peekThroughBitcasts(V: N);
6312
6313 EVT DstVT = N.getValueType();
6314 if (!DstVT.isVector())
6315 return SDValue();
6316
6317 // LSX canonical form:
6318 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6319 SDValue Src = N.getOperand(i: 0);
6320 EVT SrcVT = Src.getValueType();
6321
6322 if (!SrcVT.isVector() || !SrcVT.is128BitVector())
6323 return SDValue();
6324 if (N.getConstantOperandVal(i: 1) != 0)
6325 return SDValue();
6326 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6327 return SDValue();
6328 if (SrcVT.getVectorNumElements() != DstVT.getVectorNumElements() * 2)
6329 return SDValue();
6330
6331 return Src;
6332 }
6333
6334 // LASX canonical form:
6335 auto *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
6336 if (!BV)
6337 return SDValue();
6338
6339 unsigned NumElts = DstVT.getVectorNumElements();
6340 if (NumElts % 2 != 0)
6341 return SDValue();
6342
6343 SDValue Src;
6344 EVT SrcVT;
6345
6346 for (unsigned I = 0; I != NumElts; ++I) {
6347 SDValue Elt = BV->getOperand(Num: I);
6348 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6349 return SDValue();
6350
6351 SDValue ThisSrc = Elt.getOperand(i: 0);
6352 SDValue Idx = Elt.getOperand(i: 1);
6353 auto *CI = dyn_cast<ConstantSDNode>(Val&: Idx);
6354 if (!CI)
6355 return SDValue();
6356
6357 if (!Src) {
6358 Src = ThisSrc;
6359 SrcVT = Src.getValueType();
6360 if (!SrcVT.isVector())
6361 return SDValue();
6362
6363 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6364 return SDValue();
6365 if (SrcVT.getVectorNumElements() != NumElts * 2)
6366 return SDValue();
6367 if (!SrcVT.is256BitVector())
6368 return SDValue();
6369 } else if (ThisSrc != Src) {
6370 return SDValue();
6371 }
6372
6373 unsigned Half = NumElts / 2;
6374 unsigned ExpectedIdx = (I < Half) ? I : (I + Half);
6375 if (CI->getZExtValue() != ExpectedIdx)
6376 return SDValue();
6377 }
6378
6379 return Src;
6380}
6381
6382static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
6383 TargetLowering::DAGCombinerInfo &DCI,
6384 const LoongArchSubtarget &Subtarget) {
6385 if (!Subtarget.hasExtLSX())
6386 return SDValue();
6387
6388 assert(N->getOpcode() == ISD::SHL && "Unexpected opcode");
6389
6390 EVT VT = N->getValueType(ResNo: 0);
6391 SDLoc DL(N);
6392
6393 SDValue LHS = N->getOperand(Num: 0);
6394 SDValue RHS = N->getOperand(Num: 1);
6395
6396 bool isSigned;
6397 unsigned ExtOpc = LHS.getOpcode();
6398 if (ExtOpc == ISD::SIGN_EXTEND)
6399 isSigned = true;
6400 else if (ExtOpc == ISD::ZERO_EXTEND)
6401 isSigned = false;
6402 else
6403 return SDValue();
6404
6405 if (!LHS.hasOneUse())
6406 return SDValue();
6407
6408 SDValue Vec = matchLowHalfOf128BitLanes(N: LHS.getOperand(i: 0));
6409 if (!Vec)
6410 return SDValue();
6411
6412 EVT SrcVT = Vec.getValueType();
6413 EVT SrcEltVT = SrcVT.getVectorElementType();
6414 EVT DstEltVT = VT.getVectorElementType();
6415
6416 if (!SrcVT.isVector() || !VT.isVector())
6417 return SDValue();
6418 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6419 return SDValue();
6420 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6421 return SDValue();
6422 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6423 return SDValue();
6424
6425 APInt Imm;
6426 if (!isConstantSplatVector(N: RHS, SplatValue&: Imm, MinSizeInBits: DstEltVT.getSizeInBits()))
6427 return SDValue();
6428 if (!Imm.ult(RHS: SrcEltVT.getSizeInBits()))
6429 return SDValue();
6430
6431 unsigned Opc = isSigned ? LoongArchISD::VSLLWIL : LoongArchISD::VSLLWIL_U;
6432 SDValue Sht = DAG.getConstant(Val: Imm.getZExtValue(), DL, VT: Subtarget.getGRLenVT());
6433 return DAG.getNode(Opcode: Opc, DL, VT, N1: Vec, N2: Sht);
6434}
6435
6436static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
6437 TargetLowering::DAGCombinerInfo &DCI,
6438 const LoongArchSubtarget &Subtarget) {
6439 // BSTRPICK requires the 32S feature.
6440 if (!Subtarget.has32S())
6441 return SDValue();
6442
6443 if (DCI.isBeforeLegalizeOps())
6444 return SDValue();
6445
6446 // $dst = srl (and $src, Mask), Shamt
6447 // =>
6448 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6449 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6450 //
6451
6452 SDValue FirstOperand = N->getOperand(Num: 0);
6453 ConstantSDNode *CN;
6454 EVT ValTy = N->getValueType(ResNo: 0);
6455 SDLoc DL(N);
6456 MVT GRLenVT = Subtarget.getGRLenVT();
6457 unsigned MaskIdx, MaskLen;
6458 uint64_t Shamt;
6459
6460 // The first operand must be an AND and the second operand of the AND must be
6461 // a shifted mask.
6462 if (FirstOperand.getOpcode() != ISD::AND ||
6463 !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) ||
6464 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
6465 return SDValue();
6466
6467 // The second operand (shift amount) must be an immediate.
6468 if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))))
6469 return SDValue();
6470
6471 Shamt = CN->getZExtValue();
6472 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6473 return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
6474 N1: FirstOperand->getOperand(Num: 0),
6475 N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
6476 N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
6477
6478 return SDValue();
6479}
6480
6481static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
6482 TargetLowering::DAGCombinerInfo &DCI,
6483 const LoongArchSubtarget &Subtarget) {
6484 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6485 return V;
6486
6487 return SDValue();
6488}
6489
6490// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6491// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6492static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6493 unsigned Depth) {
6494 // Limit recursion.
6495 if (Depth >= SelectionDAG::MaxRecursionDepth)
6496 return false;
6497 switch (Src.getOpcode()) {
6498 case ISD::SETCC:
6499 case ISD::TRUNCATE:
6500 return Src.getOperand(i: 0).getValueSizeInBits() == Size;
6501 case ISD::FREEZE:
6502 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1);
6503 case ISD::AND:
6504 case ISD::XOR:
6505 case ISD::OR:
6506 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1) &&
6507 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1);
6508 case ISD::SELECT:
6509 case ISD::VSELECT:
6510 return Src.getOperand(i: 0).getScalarValueSizeInBits() == 1 &&
6511 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1) &&
6512 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 2), Size, Depth: Depth + 1);
6513 case ISD::BUILD_VECTOR:
6514 return ISD::isBuildVectorAllZeros(N: Src.getNode()) ||
6515 ISD::isBuildVectorAllOnes(N: Src.getNode());
6516 }
6517 return false;
6518}
6519
6520// Helper to push sign extension of vXi1 SETCC result through bitops.
6521static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
6522 SDValue Src, const SDLoc &DL) {
6523 switch (Src.getOpcode()) {
6524 case ISD::SETCC:
6525 case ISD::FREEZE:
6526 case ISD::TRUNCATE:
6527 case ISD::BUILD_VECTOR:
6528 return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
6529 case ISD::AND:
6530 case ISD::XOR:
6531 case ISD::OR:
6532 return DAG.getNode(
6533 Opcode: Src.getOpcode(), DL, VT: SExtVT,
6534 N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 0), DL),
6535 N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL));
6536 case ISD::SELECT:
6537 case ISD::VSELECT:
6538 return DAG.getSelect(
6539 DL, VT: SExtVT, Cond: Src.getOperand(i: 0),
6540 LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL),
6541 RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 2), DL));
6542 }
6543 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6544}
6545
6546static SDValue
6547performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
6548 TargetLowering::DAGCombinerInfo &DCI,
6549 const LoongArchSubtarget &Subtarget) {
6550 SDLoc DL(N);
6551 EVT VT = N->getValueType(ResNo: 0);
6552 SDValue Src = N->getOperand(Num: 0);
6553 EVT SrcVT = Src.getValueType();
6554
6555 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6556 return SDValue();
6557
6558 bool UseLASX;
6559 unsigned Opc = ISD::DELETED_NODE;
6560 EVT CmpVT = Src.getOperand(i: 0).getValueType();
6561 EVT EltVT = CmpVT.getVectorElementType();
6562
6563 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6564 UseLASX = false;
6565 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6566 CmpVT.getSizeInBits() == 256)
6567 UseLASX = true;
6568 else
6569 return SDValue();
6570
6571 SDValue SrcN1 = Src.getOperand(i: 1);
6572 switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get()) {
6573 default:
6574 break;
6575 case ISD::SETEQ:
6576 // x == 0 => not (vmsknez.b x)
6577 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
6578 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6579 break;
6580 case ISD::SETGT:
6581 // x > -1 => vmskgez.b x
6582 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8)
6583 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6584 break;
6585 case ISD::SETGE:
6586 // x >= 0 => vmskgez.b x
6587 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
6588 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6589 break;
6590 case ISD::SETLT:
6591 // x < 0 => vmskltz.{b,h,w,d} x
6592 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) &&
6593 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6594 EltVT == MVT::i64))
6595 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6596 break;
6597 case ISD::SETLE:
6598 // x <= -1 => vmskltz.{b,h,w,d} x
6599 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) &&
6600 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6601 EltVT == MVT::i64))
6602 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6603 break;
6604 case ISD::SETNE:
6605 // x != 0 => vmsknez.b x
6606 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
6607 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6608 break;
6609 }
6610
6611 if (Opc == ISD::DELETED_NODE)
6612 return SDValue();
6613
6614 SDValue V = DAG.getNode(Opcode: Opc, DL, VT: Subtarget.getGRLenVT(), Operand: Src.getOperand(i: 0));
6615 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
6616 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
6617 return DAG.getBitcast(VT, V);
6618}
6619
6620static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
6621 TargetLowering::DAGCombinerInfo &DCI,
6622 const LoongArchSubtarget &Subtarget) {
6623 SDLoc DL(N);
6624 EVT VT = N->getValueType(ResNo: 0);
6625 SDValue Src = N->getOperand(Num: 0);
6626 EVT SrcVT = Src.getValueType();
6627 MVT GRLenVT = Subtarget.getGRLenVT();
6628
6629 if (!DCI.isBeforeLegalizeOps())
6630 return SDValue();
6631
6632 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6633 return SDValue();
6634
6635 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6636 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6637 if (Res)
6638 return Res;
6639
6640 // Generate vXi1 using [X]VMSKLTZ
6641 MVT SExtVT;
6642 unsigned Opc;
6643 bool UseLASX = false;
6644 bool PropagateSExt = false;
6645
6646 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6647 EVT CmpVT = Src.getOperand(i: 0).getValueType();
6648 if (CmpVT.getSizeInBits() > 256)
6649 return SDValue();
6650 }
6651
6652 switch (SrcVT.getSimpleVT().SimpleTy) {
6653 default:
6654 return SDValue();
6655 case MVT::v2i1:
6656 SExtVT = MVT::v2i64;
6657 break;
6658 case MVT::v4i1:
6659 SExtVT = MVT::v4i32;
6660 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
6661 SExtVT = MVT::v4i64;
6662 UseLASX = true;
6663 PropagateSExt = true;
6664 }
6665 break;
6666 case MVT::v8i1:
6667 SExtVT = MVT::v8i16;
6668 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
6669 SExtVT = MVT::v8i32;
6670 UseLASX = true;
6671 PropagateSExt = true;
6672 }
6673 break;
6674 case MVT::v16i1:
6675 SExtVT = MVT::v16i8;
6676 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
6677 SExtVT = MVT::v16i16;
6678 UseLASX = true;
6679 PropagateSExt = true;
6680 }
6681 break;
6682 case MVT::v32i1:
6683 SExtVT = MVT::v32i8;
6684 UseLASX = true;
6685 break;
6686 };
6687 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6688 : DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
6689
6690 SDValue V;
6691 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6692 if (Src.getSimpleValueType() == MVT::v32i8) {
6693 SDValue Lo, Hi;
6694 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Src, DL);
6695 Lo = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Lo);
6696 Hi = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Hi);
6697 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: GRLenVT, N1: Hi,
6698 N2: DAG.getShiftAmountConstant(Val: 16, VT: GRLenVT, DL));
6699 V = DAG.getNode(Opcode: ISD::OR, DL, VT: GRLenVT, N1: Lo, N2: Hi);
6700 } else if (UseLASX) {
6701 return SDValue();
6702 }
6703 }
6704
6705 if (!V) {
6706 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6707 V = DAG.getNode(Opcode: Opc, DL, VT: GRLenVT, Operand: Src);
6708 }
6709
6710 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
6711 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
6712 return DAG.getBitcast(VT, V);
6713}
6714
6715static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
6716 TargetLowering::DAGCombinerInfo &DCI,
6717 const LoongArchSubtarget &Subtarget) {
6718 MVT GRLenVT = Subtarget.getGRLenVT();
6719 EVT ValTy = N->getValueType(ResNo: 0);
6720 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
6721 ConstantSDNode *CN0, *CN1;
6722 SDLoc DL(N);
6723 unsigned ValBits = ValTy.getSizeInBits();
6724 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6725 unsigned Shamt;
6726 bool SwapAndRetried = false;
6727
6728 // BSTRPICK requires the 32S feature.
6729 if (!Subtarget.has32S())
6730 return SDValue();
6731
6732 if (DCI.isBeforeLegalizeOps())
6733 return SDValue();
6734
6735 if (ValBits != 32 && ValBits != 64)
6736 return SDValue();
6737
6738Retry:
6739 // 1st pattern to match BSTRINS:
6740 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6741 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6742 // =>
6743 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6744 if (N0.getOpcode() == ISD::AND &&
6745 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
6746 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
6747 N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
6748 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
6749 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
6750 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6751 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
6752 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6753 (MaskIdx0 + MaskLen0 <= ValBits)) {
6754 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6755 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
6756 N2: N1.getOperand(i: 0).getOperand(i: 0),
6757 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
6758 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
6759 }
6760
6761 // 2nd pattern to match BSTRINS:
6762 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6763 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6764 // =>
6765 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6766 if (N0.getOpcode() == ISD::AND &&
6767 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
6768 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
6769 N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
6770 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
6771 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6772 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
6773 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
6774 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6775 (MaskIdx0 + MaskLen0 <= ValBits)) {
6776 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6777 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
6778 N2: N1.getOperand(i: 0).getOperand(i: 0),
6779 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
6780 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
6781 }
6782
6783 // 3rd pattern to match BSTRINS:
6784 // R = or (and X, mask0), (and Y, mask1)
6785 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6786 // =>
6787 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6788 // where msb = lsb + size - 1
6789 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6790 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
6791 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
6792 (MaskIdx0 + MaskLen0 <= 64) &&
6793 (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) &&
6794 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6795 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6796 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
6797 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1,
6798 N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
6799 N3: DAG.getConstant(Val: ValBits == 32
6800 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6801 : (MaskIdx0 + MaskLen0 - 1),
6802 DL, VT: GRLenVT),
6803 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
6804 }
6805
6806 // 4th pattern to match BSTRINS:
6807 // R = or (and X, mask), (shl Y, shamt)
6808 // where mask = (2**shamt - 1)
6809 // =>
6810 // R = BSTRINS X, Y, ValBits - 1, shamt
6811 // where ValBits = 32 or 64
6812 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6813 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
6814 isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
6815 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
6816 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6817 (MaskIdx0 + MaskLen0 <= ValBits)) {
6818 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6819 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
6820 N2: N1.getOperand(i: 0),
6821 N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT),
6822 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
6823 }
6824
6825 // 5th pattern to match BSTRINS:
6826 // R = or (and X, mask), const
6827 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6828 // =>
6829 // R = BSTRINS X, (const >> lsb), msb, lsb
6830 // where msb = lsb + size - 1
6831 if (N0.getOpcode() == ISD::AND &&
6832 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
6833 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
6834 (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
6835 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6836 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6837 return DAG.getNode(
6838 Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
6839 N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
6840 N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6841 : (MaskIdx0 + MaskLen0 - 1),
6842 DL, VT: GRLenVT),
6843 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
6844 }
6845
6846 // 6th pattern.
6847 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6848 // by the incoming bits are known to be zero.
6849 // =>
6850 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6851 //
6852 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6853 // pattern is more common than the 1st. So we put the 1st before the 6th in
6854 // order to match as many nodes as possible.
6855 ConstantSDNode *CNMask, *CNShamt;
6856 unsigned MaskIdx, MaskLen;
6857 if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
6858 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
6859 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6860 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
6861 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6862 Shamt = CNShamt->getZExtValue();
6863 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6864 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
6865 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6866 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
6867 N2: N1.getOperand(i: 0).getOperand(i: 0),
6868 N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT),
6869 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
6870 }
6871 }
6872
6873 // 7th pattern.
6874 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6875 // overwritten by the incoming bits are known to be zero.
6876 // =>
6877 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6878 //
6879 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6880 // before the 7th in order to match as many nodes as possible.
6881 if (N1.getOpcode() == ISD::AND &&
6882 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
6883 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6884 N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
6885 (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
6886 CNShamt->getZExtValue() == MaskIdx) {
6887 APInt ShMask(ValBits, CNMask->getZExtValue());
6888 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
6889 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6890 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
6891 N2: N1.getOperand(i: 0).getOperand(i: 0),
6892 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
6893 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
6894 }
6895 }
6896
6897 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6898 if (!SwapAndRetried) {
6899 std::swap(a&: N0, b&: N1);
6900 SwapAndRetried = true;
6901 goto Retry;
6902 }
6903
6904 SwapAndRetried = false;
6905Retry2:
6906 // 8th pattern.
6907 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6908 // the incoming bits are known to be zero.
6909 // =>
6910 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6911 //
6912 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6913 // we put it here in order to match as many nodes as possible or generate less
6914 // instructions.
6915 if (N1.getOpcode() == ISD::AND &&
6916 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
6917 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6918 APInt ShMask(ValBits, CNMask->getZExtValue());
6919 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
6920 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6921 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
6922 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0),
6923 N1: N1->getOperand(Num: 0),
6924 N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
6925 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
6926 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
6927 }
6928 }
6929 // Swap N0/N1 and retry.
6930 if (!SwapAndRetried) {
6931 std::swap(a&: N0, b&: N1);
6932 SwapAndRetried = true;
6933 goto Retry2;
6934 }
6935
6936 return SDValue();
6937}
6938
6939static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6940 ExtType = ISD::NON_EXTLOAD;
6941
6942 switch (V.getNode()->getOpcode()) {
6943 case ISD::LOAD: {
6944 LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
6945 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6946 (LoadNode->getMemoryVT() == MVT::i16)) {
6947 ExtType = LoadNode->getExtensionType();
6948 return true;
6949 }
6950 return false;
6951 }
6952 case ISD::AssertSext: {
6953 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
6954 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6955 ExtType = ISD::SEXTLOAD;
6956 return true;
6957 }
6958 return false;
6959 }
6960 case ISD::AssertZext: {
6961 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
6962 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6963 ExtType = ISD::ZEXTLOAD;
6964 return true;
6965 }
6966 return false;
6967 }
6968 default:
6969 return false;
6970 }
6971
6972 return false;
6973}
6974
6975// Eliminate redundant truncation and zero-extension nodes.
6976// * Case 1:
6977// +------------+ +------------+ +------------+
6978// | Input1 | | Input2 | | CC |
6979// +------------+ +------------+ +------------+
6980// | | |
6981// V V +----+
6982// +------------+ +------------+ |
6983// | TRUNCATE | | TRUNCATE | |
6984// +------------+ +------------+ |
6985// | | |
6986// V V |
6987// +------------+ +------------+ |
6988// | ZERO_EXT | | ZERO_EXT | |
6989// +------------+ +------------+ |
6990// | | |
6991// | +-------------+ |
6992// V V | |
6993// +----------------+ | |
6994// | AND | | |
6995// +----------------+ | |
6996// | | |
6997// +---------------+ | |
6998// | | |
6999// V V V
7000// +-------------+
7001// | CMP |
7002// +-------------+
7003// * Case 2:
7004// +------------+ +------------+ +-------------+ +------------+ +------------+
7005// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
7006// +------------+ +------------+ +-------------+ +------------+ +------------+
7007// | | | | |
7008// V | | | |
7009// +------------+ | | | |
7010// | XOR |<---------------------+ | |
7011// +------------+ | | |
7012// | | | |
7013// V V +---------------+ |
7014// +------------+ +------------+ | |
7015// | TRUNCATE | | TRUNCATE | | +-------------------------+
7016// +------------+ +------------+ | |
7017// | | | |
7018// V V | |
7019// +------------+ +------------+ | |
7020// | ZERO_EXT | | ZERO_EXT | | |
7021// +------------+ +------------+ | |
7022// | | | |
7023// V V | |
7024// +----------------+ | |
7025// | AND | | |
7026// +----------------+ | |
7027// | | |
7028// +---------------+ | |
7029// | | |
7030// V V V
7031// +-------------+
7032// | CMP |
7033// +-------------+
7034static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
7035 TargetLowering::DAGCombinerInfo &DCI,
7036 const LoongArchSubtarget &Subtarget) {
7037 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
7038
7039 SDNode *AndNode = N->getOperand(Num: 0).getNode();
7040 if (AndNode->getOpcode() != ISD::AND)
7041 return SDValue();
7042
7043 SDValue AndInputValue2 = AndNode->getOperand(Num: 1);
7044 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
7045 return SDValue();
7046
7047 SDValue CmpInputValue = N->getOperand(Num: 1);
7048 SDValue AndInputValue1 = AndNode->getOperand(Num: 0);
7049 if (AndInputValue1.getOpcode() == ISD::XOR) {
7050 if (CC != ISD::SETEQ && CC != ISD::SETNE)
7051 return SDValue();
7052 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1));
7053 if (!CN || !CN->isAllOnes())
7054 return SDValue();
7055 CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
7056 if (!CN || !CN->isZero())
7057 return SDValue();
7058 AndInputValue1 = AndInputValue1.getOperand(i: 0);
7059 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
7060 return SDValue();
7061 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
7062 if (AndInputValue2 != CmpInputValue)
7063 return SDValue();
7064 } else {
7065 return SDValue();
7066 }
7067
7068 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0);
7069 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
7070 return SDValue();
7071
7072 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0);
7073 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
7074 return SDValue();
7075
7076 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0);
7077 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0);
7078 ISD::LoadExtType ExtType1;
7079 ISD::LoadExtType ExtType2;
7080
7081 if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) ||
7082 !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
7083 return SDValue();
7084
7085 if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) ||
7086 AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0))
7087 return SDValue();
7088
7089 if ((ExtType2 != ISD::ZEXTLOAD) &&
7090 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
7091 return SDValue();
7092
7093 // These truncation and zero-extension nodes are not necessary, remove them.
7094 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0),
7095 N1: TruncInputValue1, N2: TruncInputValue2);
7096 SDValue NewSetCC =
7097 DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
7098 DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
7099 return SDValue(N, 0);
7100}
7101
7102// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
7103static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
7104 TargetLowering::DAGCombinerInfo &DCI,
7105 const LoongArchSubtarget &Subtarget) {
7106 if (DCI.isBeforeLegalizeOps())
7107 return SDValue();
7108
7109 SDValue Src = N->getOperand(Num: 0);
7110 if (Src.getOpcode() != LoongArchISD::REVB_2W)
7111 return SDValue();
7112
7113 return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
7114 Operand: Src.getOperand(i: 0));
7115}
7116
7117// Perform common combines for BR_CC and SELECT_CC conditions.
7118static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
7119 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
7120 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
7121
7122 // As far as arithmetic right shift always saves the sign,
7123 // shift can be omitted.
7124 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
7125 // setge (sra X, N), 0 -> setge X, 0
7126 if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
7127 LHS.getOpcode() == ISD::SRA) {
7128 LHS = LHS.getOperand(i: 0);
7129 return true;
7130 }
7131
7132 if (!ISD::isIntEqualitySetCC(Code: CCVal))
7133 return false;
7134
7135 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
7136 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
7137 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
7138 LHS.getOperand(i: 0).getValueType() == Subtarget.getGRLenVT()) {
7139 // If we're looking for eq 0 instead of ne 0, we need to invert the
7140 // condition.
7141 bool Invert = CCVal == ISD::SETEQ;
7142 CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: 2))->get();
7143 if (Invert)
7144 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
7145
7146 RHS = LHS.getOperand(i: 1);
7147 LHS = LHS.getOperand(i: 0);
7148 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
7149
7150 CC = DAG.getCondCode(Cond: CCVal);
7151 return true;
7152 }
7153
7154 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
7155 if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
7156 LHS.getOperand(i: 1).getOpcode() == ISD::Constant) {
7157 SDValue LHS0 = LHS.getOperand(i: 0);
7158 if (LHS0.getOpcode() == ISD::AND &&
7159 LHS0.getOperand(i: 1).getOpcode() == ISD::Constant) {
7160 uint64_t Mask = LHS0.getConstantOperandVal(i: 1);
7161 uint64_t ShAmt = LHS.getConstantOperandVal(i: 1);
7162 if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
7163 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
7164 CC = DAG.getCondCode(Cond: CCVal);
7165
7166 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
7167 LHS = LHS0.getOperand(i: 0);
7168 if (ShAmt != 0)
7169 LHS =
7170 DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: 0),
7171 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
7172 return true;
7173 }
7174 }
7175 }
7176
7177 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
7178 // This can occur when legalizing some floating point comparisons.
7179 APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: 1);
7180 if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
7181 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
7182 CC = DAG.getCondCode(Cond: CCVal);
7183 RHS = DAG.getConstant(Val: 0, DL, VT: LHS.getValueType());
7184 return true;
7185 }
7186
7187 return false;
7188}
7189
7190static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG,
7191 TargetLowering::DAGCombinerInfo &DCI,
7192 const LoongArchSubtarget &Subtarget) {
7193 SDValue LHS = N->getOperand(Num: 1);
7194 SDValue RHS = N->getOperand(Num: 2);
7195 SDValue CC = N->getOperand(Num: 3);
7196 SDLoc DL(N);
7197
7198 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7199 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: N->getValueType(ResNo: 0),
7200 N1: N->getOperand(Num: 0), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: 4));
7201
7202 return SDValue();
7203}
7204
7205static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
7206 TargetLowering::DAGCombinerInfo &DCI,
7207 const LoongArchSubtarget &Subtarget) {
7208 // Transform
7209 SDValue LHS = N->getOperand(Num: 0);
7210 SDValue RHS = N->getOperand(Num: 1);
7211 SDValue CC = N->getOperand(Num: 2);
7212 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
7213 SDValue TrueV = N->getOperand(Num: 3);
7214 SDValue FalseV = N->getOperand(Num: 4);
7215 SDLoc DL(N);
7216 EVT VT = N->getValueType(ResNo: 0);
7217
7218 // If the True and False values are the same, we don't need a select_cc.
7219 if (TrueV == FalseV)
7220 return TrueV;
7221
7222 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
7223 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
7224 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
7225 isNullConstant(V: RHS) &&
7226 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
7227 if (CCVal == ISD::CondCode::SETGE)
7228 std::swap(a&: TrueV, b&: FalseV);
7229
7230 int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
7231 int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
7232 // Only handle simm12, if it is not in this range, it can be considered as
7233 // register.
7234 if (isInt<12>(x: TrueSImm) && isInt<12>(x: FalseSImm) &&
7235 isInt<12>(x: TrueSImm - FalseSImm)) {
7236 SDValue SRA =
7237 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
7238 N2: DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT));
7239 SDValue AND =
7240 DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
7241 N2: DAG.getSignedConstant(Val: TrueSImm - FalseSImm, DL, VT));
7242 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
7243 }
7244
7245 if (CCVal == ISD::CondCode::SETGE)
7246 std::swap(a&: TrueV, b&: FalseV);
7247 }
7248
7249 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7250 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT: N->getValueType(ResNo: 0),
7251 Ops: {LHS, RHS, CC, TrueV, FalseV});
7252
7253 return SDValue();
7254}
7255
7256template <unsigned N>
7257static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
7258 SelectionDAG &DAG,
7259 const LoongArchSubtarget &Subtarget,
7260 bool IsSigned = false) {
7261 SDLoc DL(Node);
7262 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
7263 // Check the ImmArg.
7264 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7265 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7266 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
7267 ": argument out of range.");
7268 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
7269 }
7270 return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
7271}
7272
7273template <unsigned N>
7274static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
7275 SelectionDAG &DAG, bool IsSigned = false) {
7276 SDLoc DL(Node);
7277 EVT ResTy = Node->getValueType(ResNo: 0);
7278 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
7279
7280 // Check the ImmArg.
7281 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7282 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7283 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
7284 ": argument out of range.");
7285 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
7286 }
7287 return DAG.getConstant(
7288 Val: APInt(ResTy.getScalarType().getSizeInBits(),
7289 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
7290 DL, VT: ResTy);
7291}
7292
7293static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
7294 SDLoc DL(Node);
7295 EVT ResTy = Node->getValueType(ResNo: 0);
7296 SDValue Vec = Node->getOperand(Num: 2);
7297 SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy);
7298 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
7299}
7300
7301static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
7302 SDLoc DL(Node);
7303 EVT ResTy = Node->getValueType(ResNo: 0);
7304 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
7305 SDValue Bit =
7306 DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
7307
7308 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1),
7309 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
7310}
7311
7312template <unsigned N>
7313static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
7314 SDLoc DL(Node);
7315 EVT ResTy = Node->getValueType(ResNo: 0);
7316 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
7317 // Check the unsigned ImmArg.
7318 if (!isUInt<N>(CImm->getZExtValue())) {
7319 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
7320 ": argument out of range.");
7321 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
7322 }
7323
7324 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7325 SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
7326
7327 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask);
7328}
7329
7330template <unsigned N>
7331static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
7332 SDLoc DL(Node);
7333 EVT ResTy = Node->getValueType(ResNo: 0);
7334 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
7335 // Check the unsigned ImmArg.
7336 if (!isUInt<N>(CImm->getZExtValue())) {
7337 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
7338 ": argument out of range.");
7339 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
7340 }
7341
7342 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7343 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
7344 return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
7345}
7346
7347template <unsigned N>
7348static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
7349 SDLoc DL(Node);
7350 EVT ResTy = Node->getValueType(ResNo: 0);
7351 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
7352 // Check the unsigned ImmArg.
7353 if (!isUInt<N>(CImm->getZExtValue())) {
7354 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
7355 ": argument out of range.");
7356 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
7357 }
7358
7359 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7360 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
7361 return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
7362}
7363
7364template <unsigned W>
7365static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
7366 unsigned ResOp) {
7367 unsigned Imm = N->getConstantOperandVal(Num: 2);
7368 if (!isUInt<W>(Imm)) {
7369 const StringRef ErrorMsg = "argument out of range";
7370 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
7371 return DAG.getUNDEF(VT: N->getValueType(ResNo: 0));
7372 }
7373 SDLoc DL(N);
7374 SDValue Vec = N->getOperand(Num: 1);
7375 SDValue Idx = DAG.getConstant(Val: Imm, DL, VT: MVT::i32);
7376 SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
7377 return DAG.getNode(Opcode: ResOp, DL, VT: N->getValueType(ResNo: 0), N1: Vec, N2: Idx, N3: EltVT);
7378}
7379
7380static SDValue
7381performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
7382 TargetLowering::DAGCombinerInfo &DCI,
7383 const LoongArchSubtarget &Subtarget) {
7384 SDLoc DL(N);
7385 switch (N->getConstantOperandVal(Num: 0)) {
7386 default:
7387 break;
7388 case Intrinsic::loongarch_lsx_vadd_b:
7389 case Intrinsic::loongarch_lsx_vadd_h:
7390 case Intrinsic::loongarch_lsx_vadd_w:
7391 case Intrinsic::loongarch_lsx_vadd_d:
7392 case Intrinsic::loongarch_lasx_xvadd_b:
7393 case Intrinsic::loongarch_lasx_xvadd_h:
7394 case Intrinsic::loongarch_lasx_xvadd_w:
7395 case Intrinsic::loongarch_lasx_xvadd_d:
7396 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7397 N2: N->getOperand(Num: 2));
7398 case Intrinsic::loongarch_lsx_vaddi_bu:
7399 case Intrinsic::loongarch_lsx_vaddi_hu:
7400 case Intrinsic::loongarch_lsx_vaddi_wu:
7401 case Intrinsic::loongarch_lsx_vaddi_du:
7402 case Intrinsic::loongarch_lasx_xvaddi_bu:
7403 case Intrinsic::loongarch_lasx_xvaddi_hu:
7404 case Intrinsic::loongarch_lasx_xvaddi_wu:
7405 case Intrinsic::loongarch_lasx_xvaddi_du:
7406 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7407 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7408 case Intrinsic::loongarch_lsx_vsub_b:
7409 case Intrinsic::loongarch_lsx_vsub_h:
7410 case Intrinsic::loongarch_lsx_vsub_w:
7411 case Intrinsic::loongarch_lsx_vsub_d:
7412 case Intrinsic::loongarch_lasx_xvsub_b:
7413 case Intrinsic::loongarch_lasx_xvsub_h:
7414 case Intrinsic::loongarch_lasx_xvsub_w:
7415 case Intrinsic::loongarch_lasx_xvsub_d:
7416 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7417 N2: N->getOperand(Num: 2));
7418 case Intrinsic::loongarch_lsx_vsubi_bu:
7419 case Intrinsic::loongarch_lsx_vsubi_hu:
7420 case Intrinsic::loongarch_lsx_vsubi_wu:
7421 case Intrinsic::loongarch_lsx_vsubi_du:
7422 case Intrinsic::loongarch_lasx_xvsubi_bu:
7423 case Intrinsic::loongarch_lasx_xvsubi_hu:
7424 case Intrinsic::loongarch_lasx_xvsubi_wu:
7425 case Intrinsic::loongarch_lasx_xvsubi_du:
7426 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7427 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7428 case Intrinsic::loongarch_lsx_vneg_b:
7429 case Intrinsic::loongarch_lsx_vneg_h:
7430 case Intrinsic::loongarch_lsx_vneg_w:
7431 case Intrinsic::loongarch_lsx_vneg_d:
7432 case Intrinsic::loongarch_lasx_xvneg_b:
7433 case Intrinsic::loongarch_lasx_xvneg_h:
7434 case Intrinsic::loongarch_lasx_xvneg_w:
7435 case Intrinsic::loongarch_lasx_xvneg_d:
7436 return DAG.getNode(
7437 Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0),
7438 N1: DAG.getConstant(
7439 Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0,
7440 /*isSigned=*/true),
7441 DL: SDLoc(N), VT: N->getValueType(ResNo: 0)),
7442 N2: N->getOperand(Num: 1));
7443 case Intrinsic::loongarch_lsx_vmax_b:
7444 case Intrinsic::loongarch_lsx_vmax_h:
7445 case Intrinsic::loongarch_lsx_vmax_w:
7446 case Intrinsic::loongarch_lsx_vmax_d:
7447 case Intrinsic::loongarch_lasx_xvmax_b:
7448 case Intrinsic::loongarch_lasx_xvmax_h:
7449 case Intrinsic::loongarch_lasx_xvmax_w:
7450 case Intrinsic::loongarch_lasx_xvmax_d:
7451 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7452 N2: N->getOperand(Num: 2));
7453 case Intrinsic::loongarch_lsx_vmax_bu:
7454 case Intrinsic::loongarch_lsx_vmax_hu:
7455 case Intrinsic::loongarch_lsx_vmax_wu:
7456 case Intrinsic::loongarch_lsx_vmax_du:
7457 case Intrinsic::loongarch_lasx_xvmax_bu:
7458 case Intrinsic::loongarch_lasx_xvmax_hu:
7459 case Intrinsic::loongarch_lasx_xvmax_wu:
7460 case Intrinsic::loongarch_lasx_xvmax_du:
7461 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7462 N2: N->getOperand(Num: 2));
7463 case Intrinsic::loongarch_lsx_vmaxi_b:
7464 case Intrinsic::loongarch_lsx_vmaxi_h:
7465 case Intrinsic::loongarch_lsx_vmaxi_w:
7466 case Intrinsic::loongarch_lsx_vmaxi_d:
7467 case Intrinsic::loongarch_lasx_xvmaxi_b:
7468 case Intrinsic::loongarch_lasx_xvmaxi_h:
7469 case Intrinsic::loongarch_lasx_xvmaxi_w:
7470 case Intrinsic::loongarch_lasx_xvmaxi_d:
7471 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7472 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
7473 case Intrinsic::loongarch_lsx_vmaxi_bu:
7474 case Intrinsic::loongarch_lsx_vmaxi_hu:
7475 case Intrinsic::loongarch_lsx_vmaxi_wu:
7476 case Intrinsic::loongarch_lsx_vmaxi_du:
7477 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7478 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7479 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7480 case Intrinsic::loongarch_lasx_xvmaxi_du:
7481 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7482 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7483 case Intrinsic::loongarch_lsx_vmin_b:
7484 case Intrinsic::loongarch_lsx_vmin_h:
7485 case Intrinsic::loongarch_lsx_vmin_w:
7486 case Intrinsic::loongarch_lsx_vmin_d:
7487 case Intrinsic::loongarch_lasx_xvmin_b:
7488 case Intrinsic::loongarch_lasx_xvmin_h:
7489 case Intrinsic::loongarch_lasx_xvmin_w:
7490 case Intrinsic::loongarch_lasx_xvmin_d:
7491 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7492 N2: N->getOperand(Num: 2));
7493 case Intrinsic::loongarch_lsx_vmin_bu:
7494 case Intrinsic::loongarch_lsx_vmin_hu:
7495 case Intrinsic::loongarch_lsx_vmin_wu:
7496 case Intrinsic::loongarch_lsx_vmin_du:
7497 case Intrinsic::loongarch_lasx_xvmin_bu:
7498 case Intrinsic::loongarch_lasx_xvmin_hu:
7499 case Intrinsic::loongarch_lasx_xvmin_wu:
7500 case Intrinsic::loongarch_lasx_xvmin_du:
7501 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7502 N2: N->getOperand(Num: 2));
7503 case Intrinsic::loongarch_lsx_vmini_b:
7504 case Intrinsic::loongarch_lsx_vmini_h:
7505 case Intrinsic::loongarch_lsx_vmini_w:
7506 case Intrinsic::loongarch_lsx_vmini_d:
7507 case Intrinsic::loongarch_lasx_xvmini_b:
7508 case Intrinsic::loongarch_lasx_xvmini_h:
7509 case Intrinsic::loongarch_lasx_xvmini_w:
7510 case Intrinsic::loongarch_lasx_xvmini_d:
7511 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7512 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
7513 case Intrinsic::loongarch_lsx_vmini_bu:
7514 case Intrinsic::loongarch_lsx_vmini_hu:
7515 case Intrinsic::loongarch_lsx_vmini_wu:
7516 case Intrinsic::loongarch_lsx_vmini_du:
7517 case Intrinsic::loongarch_lasx_xvmini_bu:
7518 case Intrinsic::loongarch_lasx_xvmini_hu:
7519 case Intrinsic::loongarch_lasx_xvmini_wu:
7520 case Intrinsic::loongarch_lasx_xvmini_du:
7521 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7522 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7523 case Intrinsic::loongarch_lsx_vmul_b:
7524 case Intrinsic::loongarch_lsx_vmul_h:
7525 case Intrinsic::loongarch_lsx_vmul_w:
7526 case Intrinsic::loongarch_lsx_vmul_d:
7527 case Intrinsic::loongarch_lasx_xvmul_b:
7528 case Intrinsic::loongarch_lasx_xvmul_h:
7529 case Intrinsic::loongarch_lasx_xvmul_w:
7530 case Intrinsic::loongarch_lasx_xvmul_d:
7531 return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7532 N2: N->getOperand(Num: 2));
7533 case Intrinsic::loongarch_lsx_vmadd_b:
7534 case Intrinsic::loongarch_lsx_vmadd_h:
7535 case Intrinsic::loongarch_lsx_vmadd_w:
7536 case Intrinsic::loongarch_lsx_vmadd_d:
7537 case Intrinsic::loongarch_lasx_xvmadd_b:
7538 case Intrinsic::loongarch_lasx_xvmadd_h:
7539 case Intrinsic::loongarch_lasx_xvmadd_w:
7540 case Intrinsic::loongarch_lasx_xvmadd_d: {
7541 EVT ResTy = N->getValueType(ResNo: 0);
7542 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
7543 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
7544 N2: N->getOperand(Num: 3)));
7545 }
7546 case Intrinsic::loongarch_lsx_vmsub_b:
7547 case Intrinsic::loongarch_lsx_vmsub_h:
7548 case Intrinsic::loongarch_lsx_vmsub_w:
7549 case Intrinsic::loongarch_lsx_vmsub_d:
7550 case Intrinsic::loongarch_lasx_xvmsub_b:
7551 case Intrinsic::loongarch_lasx_xvmsub_h:
7552 case Intrinsic::loongarch_lasx_xvmsub_w:
7553 case Intrinsic::loongarch_lasx_xvmsub_d: {
7554 EVT ResTy = N->getValueType(ResNo: 0);
7555 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
7556 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
7557 N2: N->getOperand(Num: 3)));
7558 }
7559 case Intrinsic::loongarch_lsx_vdiv_b:
7560 case Intrinsic::loongarch_lsx_vdiv_h:
7561 case Intrinsic::loongarch_lsx_vdiv_w:
7562 case Intrinsic::loongarch_lsx_vdiv_d:
7563 case Intrinsic::loongarch_lasx_xvdiv_b:
7564 case Intrinsic::loongarch_lasx_xvdiv_h:
7565 case Intrinsic::loongarch_lasx_xvdiv_w:
7566 case Intrinsic::loongarch_lasx_xvdiv_d:
7567 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7568 N2: N->getOperand(Num: 2));
7569 case Intrinsic::loongarch_lsx_vdiv_bu:
7570 case Intrinsic::loongarch_lsx_vdiv_hu:
7571 case Intrinsic::loongarch_lsx_vdiv_wu:
7572 case Intrinsic::loongarch_lsx_vdiv_du:
7573 case Intrinsic::loongarch_lasx_xvdiv_bu:
7574 case Intrinsic::loongarch_lasx_xvdiv_hu:
7575 case Intrinsic::loongarch_lasx_xvdiv_wu:
7576 case Intrinsic::loongarch_lasx_xvdiv_du:
7577 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7578 N2: N->getOperand(Num: 2));
7579 case Intrinsic::loongarch_lsx_vmod_b:
7580 case Intrinsic::loongarch_lsx_vmod_h:
7581 case Intrinsic::loongarch_lsx_vmod_w:
7582 case Intrinsic::loongarch_lsx_vmod_d:
7583 case Intrinsic::loongarch_lasx_xvmod_b:
7584 case Intrinsic::loongarch_lasx_xvmod_h:
7585 case Intrinsic::loongarch_lasx_xvmod_w:
7586 case Intrinsic::loongarch_lasx_xvmod_d:
7587 return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7588 N2: N->getOperand(Num: 2));
7589 case Intrinsic::loongarch_lsx_vmod_bu:
7590 case Intrinsic::loongarch_lsx_vmod_hu:
7591 case Intrinsic::loongarch_lsx_vmod_wu:
7592 case Intrinsic::loongarch_lsx_vmod_du:
7593 case Intrinsic::loongarch_lasx_xvmod_bu:
7594 case Intrinsic::loongarch_lasx_xvmod_hu:
7595 case Intrinsic::loongarch_lasx_xvmod_wu:
7596 case Intrinsic::loongarch_lasx_xvmod_du:
7597 return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7598 N2: N->getOperand(Num: 2));
7599 case Intrinsic::loongarch_lsx_vand_v:
7600 case Intrinsic::loongarch_lasx_xvand_v:
7601 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7602 N2: N->getOperand(Num: 2));
7603 case Intrinsic::loongarch_lsx_vor_v:
7604 case Intrinsic::loongarch_lasx_xvor_v:
7605 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7606 N2: N->getOperand(Num: 2));
7607 case Intrinsic::loongarch_lsx_vxor_v:
7608 case Intrinsic::loongarch_lasx_xvxor_v:
7609 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7610 N2: N->getOperand(Num: 2));
7611 case Intrinsic::loongarch_lsx_vnor_v:
7612 case Intrinsic::loongarch_lasx_xvnor_v: {
7613 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7614 N2: N->getOperand(Num: 2));
7615 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
7616 }
7617 case Intrinsic::loongarch_lsx_vandi_b:
7618 case Intrinsic::loongarch_lasx_xvandi_b:
7619 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7620 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
7621 case Intrinsic::loongarch_lsx_vori_b:
7622 case Intrinsic::loongarch_lasx_xvori_b:
7623 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7624 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
7625 case Intrinsic::loongarch_lsx_vxori_b:
7626 case Intrinsic::loongarch_lasx_xvxori_b:
7627 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7628 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
7629 case Intrinsic::loongarch_lsx_vsll_b:
7630 case Intrinsic::loongarch_lsx_vsll_h:
7631 case Intrinsic::loongarch_lsx_vsll_w:
7632 case Intrinsic::loongarch_lsx_vsll_d:
7633 case Intrinsic::loongarch_lasx_xvsll_b:
7634 case Intrinsic::loongarch_lasx_xvsll_h:
7635 case Intrinsic::loongarch_lasx_xvsll_w:
7636 case Intrinsic::loongarch_lasx_xvsll_d:
7637 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7638 N2: truncateVecElts(Node: N, DAG));
7639 case Intrinsic::loongarch_lsx_vslli_b:
7640 case Intrinsic::loongarch_lasx_xvslli_b:
7641 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7642 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
7643 case Intrinsic::loongarch_lsx_vslli_h:
7644 case Intrinsic::loongarch_lasx_xvslli_h:
7645 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7646 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
7647 case Intrinsic::loongarch_lsx_vslli_w:
7648 case Intrinsic::loongarch_lasx_xvslli_w:
7649 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7650 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7651 case Intrinsic::loongarch_lsx_vslli_d:
7652 case Intrinsic::loongarch_lasx_xvslli_d:
7653 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7654 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
7655 case Intrinsic::loongarch_lsx_vsrl_b:
7656 case Intrinsic::loongarch_lsx_vsrl_h:
7657 case Intrinsic::loongarch_lsx_vsrl_w:
7658 case Intrinsic::loongarch_lsx_vsrl_d:
7659 case Intrinsic::loongarch_lasx_xvsrl_b:
7660 case Intrinsic::loongarch_lasx_xvsrl_h:
7661 case Intrinsic::loongarch_lasx_xvsrl_w:
7662 case Intrinsic::loongarch_lasx_xvsrl_d:
7663 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7664 N2: truncateVecElts(Node: N, DAG));
7665 case Intrinsic::loongarch_lsx_vsrli_b:
7666 case Intrinsic::loongarch_lasx_xvsrli_b:
7667 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7668 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
7669 case Intrinsic::loongarch_lsx_vsrli_h:
7670 case Intrinsic::loongarch_lasx_xvsrli_h:
7671 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7672 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
7673 case Intrinsic::loongarch_lsx_vsrli_w:
7674 case Intrinsic::loongarch_lasx_xvsrli_w:
7675 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7676 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7677 case Intrinsic::loongarch_lsx_vsrli_d:
7678 case Intrinsic::loongarch_lasx_xvsrli_d:
7679 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7680 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
7681 case Intrinsic::loongarch_lsx_vsra_b:
7682 case Intrinsic::loongarch_lsx_vsra_h:
7683 case Intrinsic::loongarch_lsx_vsra_w:
7684 case Intrinsic::loongarch_lsx_vsra_d:
7685 case Intrinsic::loongarch_lasx_xvsra_b:
7686 case Intrinsic::loongarch_lasx_xvsra_h:
7687 case Intrinsic::loongarch_lasx_xvsra_w:
7688 case Intrinsic::loongarch_lasx_xvsra_d:
7689 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7690 N2: truncateVecElts(Node: N, DAG));
7691 case Intrinsic::loongarch_lsx_vsrai_b:
7692 case Intrinsic::loongarch_lasx_xvsrai_b:
7693 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7694 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
7695 case Intrinsic::loongarch_lsx_vsrai_h:
7696 case Intrinsic::loongarch_lasx_xvsrai_h:
7697 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7698 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
7699 case Intrinsic::loongarch_lsx_vsrai_w:
7700 case Intrinsic::loongarch_lasx_xvsrai_w:
7701 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7702 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
7703 case Intrinsic::loongarch_lsx_vsrai_d:
7704 case Intrinsic::loongarch_lasx_xvsrai_d:
7705 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7706 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
7707 case Intrinsic::loongarch_lsx_vclz_b:
7708 case Intrinsic::loongarch_lsx_vclz_h:
7709 case Intrinsic::loongarch_lsx_vclz_w:
7710 case Intrinsic::loongarch_lsx_vclz_d:
7711 case Intrinsic::loongarch_lasx_xvclz_b:
7712 case Intrinsic::loongarch_lasx_xvclz_h:
7713 case Intrinsic::loongarch_lasx_xvclz_w:
7714 case Intrinsic::loongarch_lasx_xvclz_d:
7715 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
7716 case Intrinsic::loongarch_lsx_vpcnt_b:
7717 case Intrinsic::loongarch_lsx_vpcnt_h:
7718 case Intrinsic::loongarch_lsx_vpcnt_w:
7719 case Intrinsic::loongarch_lsx_vpcnt_d:
7720 case Intrinsic::loongarch_lasx_xvpcnt_b:
7721 case Intrinsic::loongarch_lasx_xvpcnt_h:
7722 case Intrinsic::loongarch_lasx_xvpcnt_w:
7723 case Intrinsic::loongarch_lasx_xvpcnt_d:
7724 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
7725 case Intrinsic::loongarch_lsx_vbitclr_b:
7726 case Intrinsic::loongarch_lsx_vbitclr_h:
7727 case Intrinsic::loongarch_lsx_vbitclr_w:
7728 case Intrinsic::loongarch_lsx_vbitclr_d:
7729 case Intrinsic::loongarch_lasx_xvbitclr_b:
7730 case Intrinsic::loongarch_lasx_xvbitclr_h:
7731 case Intrinsic::loongarch_lasx_xvbitclr_w:
7732 case Intrinsic::loongarch_lasx_xvbitclr_d:
7733 return lowerVectorBitClear(Node: N, DAG);
7734 case Intrinsic::loongarch_lsx_vbitclri_b:
7735 case Intrinsic::loongarch_lasx_xvbitclri_b:
7736 return lowerVectorBitClearImm<3>(Node: N, DAG);
7737 case Intrinsic::loongarch_lsx_vbitclri_h:
7738 case Intrinsic::loongarch_lasx_xvbitclri_h:
7739 return lowerVectorBitClearImm<4>(Node: N, DAG);
7740 case Intrinsic::loongarch_lsx_vbitclri_w:
7741 case Intrinsic::loongarch_lasx_xvbitclri_w:
7742 return lowerVectorBitClearImm<5>(Node: N, DAG);
7743 case Intrinsic::loongarch_lsx_vbitclri_d:
7744 case Intrinsic::loongarch_lasx_xvbitclri_d:
7745 return lowerVectorBitClearImm<6>(Node: N, DAG);
7746 case Intrinsic::loongarch_lsx_vbitset_b:
7747 case Intrinsic::loongarch_lsx_vbitset_h:
7748 case Intrinsic::loongarch_lsx_vbitset_w:
7749 case Intrinsic::loongarch_lsx_vbitset_d:
7750 case Intrinsic::loongarch_lasx_xvbitset_b:
7751 case Intrinsic::loongarch_lasx_xvbitset_h:
7752 case Intrinsic::loongarch_lasx_xvbitset_w:
7753 case Intrinsic::loongarch_lasx_xvbitset_d: {
7754 EVT VecTy = N->getValueType(ResNo: 0);
7755 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
7756 return DAG.getNode(
7757 Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
7758 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
7759 }
7760 case Intrinsic::loongarch_lsx_vbitseti_b:
7761 case Intrinsic::loongarch_lasx_xvbitseti_b:
7762 return lowerVectorBitSetImm<3>(Node: N, DAG);
7763 case Intrinsic::loongarch_lsx_vbitseti_h:
7764 case Intrinsic::loongarch_lasx_xvbitseti_h:
7765 return lowerVectorBitSetImm<4>(Node: N, DAG);
7766 case Intrinsic::loongarch_lsx_vbitseti_w:
7767 case Intrinsic::loongarch_lasx_xvbitseti_w:
7768 return lowerVectorBitSetImm<5>(Node: N, DAG);
7769 case Intrinsic::loongarch_lsx_vbitseti_d:
7770 case Intrinsic::loongarch_lasx_xvbitseti_d:
7771 return lowerVectorBitSetImm<6>(Node: N, DAG);
7772 case Intrinsic::loongarch_lsx_vbitrev_b:
7773 case Intrinsic::loongarch_lsx_vbitrev_h:
7774 case Intrinsic::loongarch_lsx_vbitrev_w:
7775 case Intrinsic::loongarch_lsx_vbitrev_d:
7776 case Intrinsic::loongarch_lasx_xvbitrev_b:
7777 case Intrinsic::loongarch_lasx_xvbitrev_h:
7778 case Intrinsic::loongarch_lasx_xvbitrev_w:
7779 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7780 EVT VecTy = N->getValueType(ResNo: 0);
7781 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
7782 return DAG.getNode(
7783 Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
7784 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
7785 }
7786 case Intrinsic::loongarch_lsx_vbitrevi_b:
7787 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7788 return lowerVectorBitRevImm<3>(Node: N, DAG);
7789 case Intrinsic::loongarch_lsx_vbitrevi_h:
7790 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7791 return lowerVectorBitRevImm<4>(Node: N, DAG);
7792 case Intrinsic::loongarch_lsx_vbitrevi_w:
7793 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7794 return lowerVectorBitRevImm<5>(Node: N, DAG);
7795 case Intrinsic::loongarch_lsx_vbitrevi_d:
7796 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7797 return lowerVectorBitRevImm<6>(Node: N, DAG);
7798 case Intrinsic::loongarch_lsx_vfadd_s:
7799 case Intrinsic::loongarch_lsx_vfadd_d:
7800 case Intrinsic::loongarch_lasx_xvfadd_s:
7801 case Intrinsic::loongarch_lasx_xvfadd_d:
7802 return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7803 N2: N->getOperand(Num: 2));
7804 case Intrinsic::loongarch_lsx_vfsub_s:
7805 case Intrinsic::loongarch_lsx_vfsub_d:
7806 case Intrinsic::loongarch_lasx_xvfsub_s:
7807 case Intrinsic::loongarch_lasx_xvfsub_d:
7808 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7809 N2: N->getOperand(Num: 2));
7810 case Intrinsic::loongarch_lsx_vfmul_s:
7811 case Intrinsic::loongarch_lsx_vfmul_d:
7812 case Intrinsic::loongarch_lasx_xvfmul_s:
7813 case Intrinsic::loongarch_lasx_xvfmul_d:
7814 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7815 N2: N->getOperand(Num: 2));
7816 case Intrinsic::loongarch_lsx_vfdiv_s:
7817 case Intrinsic::loongarch_lsx_vfdiv_d:
7818 case Intrinsic::loongarch_lasx_xvfdiv_s:
7819 case Intrinsic::loongarch_lasx_xvfdiv_d:
7820 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7821 N2: N->getOperand(Num: 2));
7822 case Intrinsic::loongarch_lsx_vfmadd_s:
7823 case Intrinsic::loongarch_lsx_vfmadd_d:
7824 case Intrinsic::loongarch_lasx_xvfmadd_s:
7825 case Intrinsic::loongarch_lasx_xvfmadd_d:
7826 return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
7827 N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3));
7828 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7829 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
7830 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
7831 N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget));
7832 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7833 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7834 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
7835 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
7836 N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget));
7837 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7838 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7839 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
7840 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
7841 N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget));
7842 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7843 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
7844 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
7845 N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget));
7846 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7847 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7848 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7849 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7850 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7851 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7852 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7853 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7854 return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: 0),
7855 Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
7856 Operand: N->getOperand(Num: 1)));
7857 case Intrinsic::loongarch_lsx_vreplve_b:
7858 case Intrinsic::loongarch_lsx_vreplve_h:
7859 case Intrinsic::loongarch_lsx_vreplve_w:
7860 case Intrinsic::loongarch_lsx_vreplve_d:
7861 case Intrinsic::loongarch_lasx_xvreplve_b:
7862 case Intrinsic::loongarch_lasx_xvreplve_h:
7863 case Intrinsic::loongarch_lasx_xvreplve_w:
7864 case Intrinsic::loongarch_lasx_xvreplve_d:
7865 return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0),
7866 N1: N->getOperand(Num: 1),
7867 N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
7868 Operand: N->getOperand(Num: 2)));
7869 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7870 if (!Subtarget.is64Bit())
7871 return lowerVectorPickVE2GR<4>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
7872 break;
7873 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7874 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7875 if (!Subtarget.is64Bit())
7876 return lowerVectorPickVE2GR<3>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
7877 break;
7878 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7879 if (!Subtarget.is64Bit())
7880 return lowerVectorPickVE2GR<2>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
7881 break;
7882 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7883 if (!Subtarget.is64Bit())
7884 return lowerVectorPickVE2GR<4>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
7885 break;
7886 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7887 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7888 if (!Subtarget.is64Bit())
7889 return lowerVectorPickVE2GR<3>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
7890 break;
7891 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7892 if (!Subtarget.is64Bit())
7893 return lowerVectorPickVE2GR<2>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
7894 break;
7895 case Intrinsic::loongarch_lsx_bz_b:
7896 case Intrinsic::loongarch_lsx_bz_h:
7897 case Intrinsic::loongarch_lsx_bz_w:
7898 case Intrinsic::loongarch_lsx_bz_d:
7899 case Intrinsic::loongarch_lasx_xbz_b:
7900 case Intrinsic::loongarch_lasx_xbz_h:
7901 case Intrinsic::loongarch_lasx_xbz_w:
7902 case Intrinsic::loongarch_lasx_xbz_d:
7903 if (!Subtarget.is64Bit())
7904 return DAG.getNode(Opcode: LoongArchISD::VALL_ZERO, DL, VT: N->getValueType(ResNo: 0),
7905 Operand: N->getOperand(Num: 1));
7906 break;
7907 case Intrinsic::loongarch_lsx_bz_v:
7908 case Intrinsic::loongarch_lasx_xbz_v:
7909 if (!Subtarget.is64Bit())
7910 return DAG.getNode(Opcode: LoongArchISD::VANY_ZERO, DL, VT: N->getValueType(ResNo: 0),
7911 Operand: N->getOperand(Num: 1));
7912 break;
7913 case Intrinsic::loongarch_lsx_bnz_b:
7914 case Intrinsic::loongarch_lsx_bnz_h:
7915 case Intrinsic::loongarch_lsx_bnz_w:
7916 case Intrinsic::loongarch_lsx_bnz_d:
7917 case Intrinsic::loongarch_lasx_xbnz_b:
7918 case Intrinsic::loongarch_lasx_xbnz_h:
7919 case Intrinsic::loongarch_lasx_xbnz_w:
7920 case Intrinsic::loongarch_lasx_xbnz_d:
7921 if (!Subtarget.is64Bit())
7922 return DAG.getNode(Opcode: LoongArchISD::VALL_NONZERO, DL, VT: N->getValueType(ResNo: 0),
7923 Operand: N->getOperand(Num: 1));
7924 break;
7925 case Intrinsic::loongarch_lsx_bnz_v:
7926 case Intrinsic::loongarch_lasx_xbnz_v:
7927 if (!Subtarget.is64Bit())
7928 return DAG.getNode(Opcode: LoongArchISD::VANY_NONZERO, DL, VT: N->getValueType(ResNo: 0),
7929 Operand: N->getOperand(Num: 1));
7930 break;
7931 case Intrinsic::loongarch_lasx_concat_128_s:
7932 case Intrinsic::loongarch_lasx_concat_128_d:
7933 case Intrinsic::loongarch_lasx_concat_128:
7934 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: N->getValueType(ResNo: 0),
7935 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2));
7936 }
7937 return SDValue();
7938}
7939
7940static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
7941 TargetLowering::DAGCombinerInfo &DCI,
7942 const LoongArchSubtarget &Subtarget) {
7943 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7944 // conversion is unnecessary and can be replaced with the
7945 // MOVFR2GR_S_LA64 operand.
7946 SDValue Op0 = N->getOperand(Num: 0);
7947 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7948 return Op0.getOperand(i: 0);
7949 return SDValue();
7950}
7951
7952static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
7953 TargetLowering::DAGCombinerInfo &DCI,
7954 const LoongArchSubtarget &Subtarget) {
7955 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7956 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7957 // operand.
7958 SDValue Op0 = N->getOperand(Num: 0);
7959 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7960 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7961 "Unexpected value type!");
7962 return Op0.getOperand(i: 0);
7963 }
7964 return SDValue();
7965}
7966
7967static SDValue
7968performDemandedBitsCombine(SDNode *N, SelectionDAG &DAG,
7969 TargetLowering::DAGCombinerInfo &DCI) {
7970 MVT VT = N->getSimpleValueType(ResNo: 0);
7971 unsigned NumBits = VT.getScalarSizeInBits();
7972
7973 // Simplify the inputs.
7974 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7975 APInt DemandedMask(APInt::getAllOnes(numBits: NumBits));
7976 if (TLI.SimplifyDemandedBits(Op: SDValue(N, 0), DemandedBits: DemandedMask, DCI))
7977 return SDValue(N, 0);
7978
7979 return SDValue();
7980}
7981
7982static SDValue
7983performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
7984 TargetLowering::DAGCombinerInfo &DCI,
7985 const LoongArchSubtarget &Subtarget) {
7986 SDValue Op0 = N->getOperand(Num: 0);
7987 SDLoc DL(N);
7988
7989 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7990 // redundant. Instead, use BuildPairF64's operands directly.
7991 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7992 return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1));
7993
7994 if (Op0->isUndef()) {
7995 SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
7996 SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
7997 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
7998 }
7999
8000 // It's cheaper to materialise two 32-bit integers than to load a double
8001 // from the constant pool and transfer it to integer registers through the
8002 // stack.
8003 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
8004 APInt V = C->getValueAPF().bitcastToAPInt();
8005 SDValue Lo = DAG.getConstant(Val: V.trunc(width: 32), DL, VT: MVT::i32);
8006 SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
8007 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
8008 }
8009
8010 return SDValue();
8011}
8012
8013/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
8014static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
8015 TargetLowering::DAGCombinerInfo &DCI,
8016 const LoongArchSubtarget &Subtarget) {
8017 SDValue N0 = N->getOperand(Num: 0);
8018 SDValue N1 = N->getOperand(Num: 1);
8019 MVT VT = N->getSimpleValueType(ResNo: 0);
8020 SDLoc DL(N);
8021
8022 // VANDN(undef, x) -> 0
8023 // VANDN(x, undef) -> 0
8024 if (N0.isUndef() || N1.isUndef())
8025 return DAG.getConstant(Val: 0, DL, VT);
8026
8027 // VANDN(0, x) -> x
8028 if (ISD::isBuildVectorAllZeros(N: N0.getNode()))
8029 return N1;
8030
8031 // VANDN(x, 0) -> 0
8032 if (ISD::isBuildVectorAllZeros(N: N1.getNode()))
8033 return DAG.getConstant(Val: 0, DL, VT);
8034
8035 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
8036 if (ISD::isBuildVectorAllOnes(N: N1.getNode()))
8037 return DAG.getNOT(DL, Val: N0, VT);
8038
8039 // Turn VANDN back to AND if input is inverted.
8040 if (SDValue Not = isNOT(V: N0, DAG))
8041 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: DAG.getBitcast(VT, V: Not), N2: N1);
8042
8043 // Folds for better commutativity:
8044 if (N1->hasOneUse()) {
8045 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
8046 if (SDValue Not = isNOT(V: N1, DAG))
8047 return DAG.getNOT(
8048 DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)), VT);
8049
8050 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
8051 // -> NOT(OR(x, SplatVector(-Imm))
8052 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
8053 // gain benefits.
8054 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
8055 N1.getOpcode() == ISD::BUILD_VECTOR) {
8056 if (SDValue SplatValue =
8057 cast<BuildVectorSDNode>(Val: N1.getNode())->getSplatValue()) {
8058 if (!N1->isOnlyUserOf(N: SplatValue.getNode()))
8059 return SDValue();
8060
8061 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
8062 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
8063 SDValue Not =
8064 DAG.getSplat(VT, DL, Op: DAG.getTargetConstant(Val: NCVal, DL, VT: MVT::i8));
8065 return DAG.getNOT(
8066 DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)),
8067 VT);
8068 }
8069 }
8070 }
8071 }
8072
8073 return SDValue();
8074}
8075
8076static SDValue ExtendSrcToDst(SDNode *N, SelectionDAG &DAG, unsigned ExtendOp) {
8077 SDLoc DL(N);
8078 EVT VT = N->getValueType(ResNo: 0);
8079 SDValue Src = N->getOperand(Num: 0);
8080 EVT SrcVT = Src.getValueType();
8081
8082 unsigned DstElts = VT.getVectorNumElements();
8083 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8084 unsigned DstEltBits = VT.getScalarSizeInBits();
8085
8086 if (SrcEltBits >= DstEltBits)
8087 return SDValue();
8088
8089 MVT WidenEltVT = MVT::getIntegerVT(BitWidth: DstEltBits);
8090 MVT WidenSrcVT = MVT::getVectorVT(VT: WidenEltVT, NumElements: DstElts);
8091
8092 SDValue Extend = DAG.getNode(Opcode: ExtendOp, DL, VT: WidenSrcVT, Operand: Src);
8093 return DAG.getNode(Opcode: N->getOpcode(), DL, VT, Operand: Extend);
8094}
8095
8096static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
8097 TargetLowering::DAGCombinerInfo &DCI,
8098 const LoongArchSubtarget &Subtarget) {
8099 SDLoc DL(N);
8100 EVT VT = N->getValueType(ResNo: 0);
8101
8102 // Sign-extend src to avoid scalarization.
8103 if (VT.isVector())
8104 return ExtendSrcToDst(N, DAG, ExtendOp: ISD::SIGN_EXTEND);
8105
8106 if (VT != MVT::f32 && VT != MVT::f64)
8107 return SDValue();
8108 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8109 return SDValue();
8110 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8111 return SDValue();
8112
8113 // Only optimize when the source and destination types have the same width.
8114 if (VT.getSizeInBits() != N->getOperand(Num: 0).getValueSizeInBits())
8115 return SDValue();
8116
8117 SDValue Src = N->getOperand(Num: 0);
8118 // If the result of an integer load is only used by an integer-to-float
8119 // conversion, use a fp load instead. This eliminates an integer-to-float-move
8120 // (movgr2fr) instruction.
8121 if (ISD::isNormalLoad(N: Src.getNode()) && Src.hasOneUse() &&
8122 // Do not change the width of a volatile load. This condition check is
8123 // inspired by AArch64.
8124 !cast<LoadSDNode>(Val&: Src)->isVolatile()) {
8125 LoadSDNode *LN0 = cast<LoadSDNode>(Val&: Src);
8126 SDValue Load = DAG.getLoad(VT, dl: DL, Chain: LN0->getChain(), Ptr: LN0->getBasePtr(),
8127 PtrInfo: LN0->getPointerInfo(), Alignment: LN0->getAlign(),
8128 MMOFlags: LN0->getMemOperand()->getFlags());
8129
8130 // Make sure successors of the original load stay after it by updating them
8131 // to use the new Chain.
8132 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN0, 1), To: Load.getValue(R: 1));
8133 return DAG.getNode(Opcode: LoongArchISD::SITOF, DL: SDLoc(N), VT, Operand: Load);
8134 }
8135
8136 return SDValue();
8137}
8138
8139static SDValue performUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
8140 TargetLowering::DAGCombinerInfo &DCI,
8141 const LoongArchSubtarget &Subtarget) {
8142 SDLoc DL(N);
8143 EVT VT = N->getValueType(ResNo: 0);
8144
8145 // Zero-extend src to avoid scalarization.
8146 if (VT.isVector())
8147 return ExtendSrcToDst(N, DAG, ExtendOp: ISD::ZERO_EXTEND);
8148
8149 return SDValue();
8150}
8151
8152// Using [X]VFTINTRZ_W_D for double to signed 32-bit integer conversion.
8153// For example:
8154// v4i32 = fp_to_sint (concat_vectors v2f64, v2f64)
8155// Can be combined into:
8156// v4i32 = VFTINTRZ_W_D v2f64. v2f64
8157static SDValue performFP_TO_INTCombine(SDNode *N, SelectionDAG &DAG,
8158 TargetLowering::DAGCombinerInfo &DCI,
8159 const LoongArchSubtarget &Subtarget) {
8160 if (!Subtarget.hasExtLSX())
8161 return SDValue();
8162
8163 SDLoc DL(N);
8164 EVT DstVT = N->getValueType(ResNo: 0);
8165 SDValue Src = N->getOperand(Num: 0);
8166 EVT SrcVT = Src.getValueType();
8167 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
8168
8169 if (!DstVT.isVector() || !DstVT.isSimple() || !SrcVT.isSimple())
8170 return SDValue();
8171
8172 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8173 unsigned SrcBits = SrcVT.getSizeInBits();
8174 unsigned DstEltBits = DstVT.getScalarSizeInBits();
8175 unsigned NumElts = DstVT.getVectorNumElements();
8176 unsigned BlockBits = Subtarget.hasExtLASX() ? 256 : 128;
8177
8178 if (!isPowerOf2_32(Value: NumElts) || !isPowerOf2_32(Value: DstEltBits))
8179 return SDValue();
8180
8181 if (SrcBits % BlockBits != 0 && SrcBits != 128)
8182 return SDValue();
8183
8184 if (DstEltBits < 32) {
8185 MVT PromoteVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 32), NumElements: NumElts);
8186 SDValue Conv = DAG.getNode(Opcode: N->getOpcode(), DL, VT: PromoteVT, Operand: Src);
8187 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: DstVT, Operand: Conv);
8188 }
8189
8190 if (SrcEltBits != 64 || DstEltBits != 32)
8191 return SDValue();
8192
8193 if (!IsSigned) {
8194 // LASX already has pattern for double convert to uint32.
8195 if (Subtarget.hasExtLASX())
8196 return SDValue();
8197 MVT TmpVT = MVT::getVectorVT(VT: MVT::i64, NumElements: NumElts);
8198 SDValue Tmp = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL, VT: TmpVT, Operand: Src);
8199 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: DstVT, Operand: Tmp);
8200 }
8201
8202 SmallVector<SDValue, 8> Blocks;
8203 unsigned BlockNumElts = BlockBits / 64;
8204 MVT BlockVT = MVT::getVectorVT(VT: MVT::f64, NumElements: BlockNumElts);
8205 if (Src.getOpcode() == ISD::CONCAT_VECTORS &&
8206 Src.getOperand(i: 0).getValueType() == BlockVT) {
8207 for (unsigned i = 0; i < Src.getNumOperands(); i++)
8208 Blocks.push_back(Elt: Src.getOperand(i));
8209 } else if (SrcBits > BlockBits) {
8210 // Wider than one register: extract each BlockBits-wide sub-vector.
8211 for (unsigned i = 0; i < SrcBits / BlockBits; i++)
8212 Blocks.push_back(
8213 Elt: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: BlockVT, N1: Src,
8214 N2: DAG.getVectorIdxConstant(Val: i * BlockNumElts, DL)));
8215 } else {
8216 BlockBits = SrcBits;
8217 Blocks.push_back(Elt: Src);
8218 }
8219
8220 MVT NativeVT = BlockBits == 256 ? MVT::v8i32 : MVT::v4i32;
8221 SmallVector<SDValue, 4> Parts;
8222 for (unsigned i = 0; i < Blocks.size(); i += 2) {
8223 SDValue Lo = Blocks[i];
8224 SDValue Hi = Blocks.size() > 1 ? Blocks[i + 1] : Lo;
8225 SDValue Res = DAG.getNode(Opcode: LoongArchISD::VFTINTRZ, DL, VT: NativeVT, N1: Hi, N2: Lo);
8226
8227 if (BlockBits == 256) {
8228 SDValue Undef = DAG.getUNDEF(VT: Res.getValueType());
8229 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
8230 Res = DAG.getVectorShuffle(VT: Res.getValueType(), dl: DL, N1: Res, N2: Undef, Mask);
8231 Res = DAG.getBitcast(VT: NativeVT, V: Res);
8232 }
8233
8234 Parts.push_back(Elt: Res);
8235 }
8236
8237 if (Blocks.size() == 1)
8238 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: DstVT, N1: Parts[0],
8239 N2: DAG.getVectorIdxConstant(Val: 0, DL));
8240 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: DstVT, Ops: Parts);
8241}
8242
8243// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
8244// logical operations, like in the example below.
8245// or (and (truncate x, truncate y)),
8246// (xor (truncate z, build_vector (constants)))
8247// Given a target type \p VT, we generate
8248// or (and x, y), (xor z, zext(build_vector (constants)))
8249// given x, y and z are of type \p VT. We can do so, if operands are either
8250// truncates from VT types, the second operand is a vector of constants, can
8251// be recursively promoted or is an existing extension we can extend further.
8252static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
8253 SelectionDAG &DAG,
8254 const LoongArchSubtarget &Subtarget,
8255 unsigned Depth) {
8256 // Limit recursion to avoid excessive compile times.
8257 if (Depth >= SelectionDAG::MaxRecursionDepth)
8258 return SDValue();
8259
8260 if (!ISD::isBitwiseLogicOp(Opcode: N.getOpcode()))
8261 return SDValue();
8262
8263 SDValue N0 = N.getOperand(i: 0);
8264 SDValue N1 = N.getOperand(i: 1);
8265
8266 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8267 if (!TLI.isOperationLegalOrPromote(Op: N.getOpcode(), VT))
8268 return SDValue();
8269
8270 if (SDValue NN0 =
8271 PromoteMaskArithmetic(N: N0, DL, VT, DAG, Subtarget, Depth: Depth + 1))
8272 N0 = NN0;
8273 else {
8274 // The left side has to be a 'trunc'.
8275 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
8276 N0.getOperand(i: 0).getValueType() == VT;
8277 if (LHSTrunc)
8278 N0 = N0.getOperand(i: 0);
8279 else
8280 return SDValue();
8281 }
8282
8283 if (SDValue NN1 =
8284 PromoteMaskArithmetic(N: N1, DL, VT, DAG, Subtarget, Depth: Depth + 1))
8285 N1 = NN1;
8286 else {
8287 // The right side has to be a 'trunc', a (foldable) constant or an
8288 // existing extension we can extend further.
8289 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
8290 N1.getOperand(i: 0).getValueType() == VT;
8291 if (RHSTrunc)
8292 N1 = N1.getOperand(i: 0);
8293 else if (ISD::isExtVecInRegOpcode(Opcode: N1.getOpcode()) && VT.is256BitVector() &&
8294 Subtarget.hasExtLASX() && N1.hasOneUse())
8295 N1 = DAG.getNode(Opcode: N1.getOpcode(), DL, VT, Operand: N1.getOperand(i: 0));
8296 // On 32-bit platform, i64 is an illegal integer scalar type, and
8297 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
8298 // future.
8299 else if (SDValue Cst =
8300 DAG.FoldConstantArithmetic(Opcode: ISD::ZERO_EXTEND, DL, VT, Ops: {N1}))
8301 N1 = Cst;
8302 else
8303 return SDValue();
8304 }
8305
8306 return DAG.getNode(Opcode: N.getOpcode(), DL, VT, N1: N0, N2: N1);
8307}
8308
8309// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
8310// is LSX-sized register. In most cases we actually compare or select LASX-sized
8311// registers and mixing the two types creates horrible code. This method
8312// optimizes some of the transition sequences.
8313static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
8314 SelectionDAG &DAG,
8315 const LoongArchSubtarget &Subtarget) {
8316 EVT VT = N.getValueType();
8317 assert(VT.isVector() && "Expected vector type");
8318 assert((N.getOpcode() == ISD::ANY_EXTEND ||
8319 N.getOpcode() == ISD::ZERO_EXTEND ||
8320 N.getOpcode() == ISD::SIGN_EXTEND) &&
8321 "Invalid Node");
8322
8323 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
8324 return SDValue();
8325
8326 SDValue Narrow = N.getOperand(i: 0);
8327 EVT NarrowVT = Narrow.getValueType();
8328
8329 // Generate the wide operation.
8330 SDValue Op = PromoteMaskArithmetic(N: Narrow, DL, VT, DAG, Subtarget, Depth: 0);
8331 if (!Op)
8332 return SDValue();
8333 switch (N.getOpcode()) {
8334 default:
8335 llvm_unreachable("Unexpected opcode");
8336 case ISD::ANY_EXTEND:
8337 return Op;
8338 case ISD::ZERO_EXTEND:
8339 return DAG.getZeroExtendInReg(Op, DL, VT: NarrowVT);
8340 case ISD::SIGN_EXTEND:
8341 return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: Op,
8342 N2: DAG.getValueType(NarrowVT));
8343 }
8344}
8345
8346static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG,
8347 TargetLowering::DAGCombinerInfo &DCI,
8348 const LoongArchSubtarget &Subtarget) {
8349 EVT VT = N->getValueType(ResNo: 0);
8350 SDLoc DL(N);
8351
8352 if (VT.isVector())
8353 if (SDValue R = PromoteMaskArithmetic(N: SDValue(N, 0), DL, DAG, Subtarget))
8354 return R;
8355
8356 return SDValue();
8357}
8358
8359static SDValue
8360performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
8361 TargetLowering::DAGCombinerInfo &DCI,
8362 const LoongArchSubtarget &Subtarget) {
8363 SDLoc DL(N);
8364 EVT VT = N->getValueType(ResNo: 0);
8365
8366 if (VT.isVector() && N->getNumOperands() == 2)
8367 if (SDValue R = combineFP_ROUND(N: SDValue(N, 0), DL, DAG, Subtarget))
8368 return R;
8369
8370 return SDValue();
8371}
8372
8373static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG,
8374 TargetLowering::DAGCombinerInfo &DCI,
8375 const LoongArchSubtarget &Subtarget) {
8376 if (DCI.isBeforeLegalizeOps())
8377 return SDValue();
8378
8379 EVT VT = N->getValueType(ResNo: 0);
8380 if (!VT.isVector())
8381 return SDValue();
8382
8383 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8384 return SDValue();
8385
8386 EVT EltVT = VT.getVectorElementType();
8387 if (!EltVT.isInteger())
8388 return SDValue();
8389
8390 SDValue Cond = N->getOperand(Num: 0);
8391 SDValue TrueVal = N->getOperand(Num: 1);
8392 SDValue FalseVal = N->getOperand(Num: 2);
8393
8394 // match:
8395 //
8396 // vselect (setcc shift, 0, seteq),
8397 // x,
8398 // rounded_shift
8399
8400 if (Cond.getOpcode() != ISD::SETCC)
8401 return SDValue();
8402
8403 if (!ISD::isConstantSplatVectorAllZeros(N: Cond.getOperand(i: 1).getNode()))
8404 return SDValue();
8405
8406 auto *CC = cast<CondCodeSDNode>(Val: Cond.getOperand(i: 2));
8407 if (CC->get() != ISD::SETEQ)
8408 return SDValue();
8409
8410 SDValue Shift = Cond.getOperand(i: 0);
8411
8412 // True branch must be original value:
8413 //
8414 // vselect cond, x, ...
8415
8416 SDValue X = TrueVal;
8417
8418 // Now match rounded shift pattern:
8419 //
8420 // add
8421 // (and
8422 // (srl X, shift-1)
8423 // 1)
8424 // (srl/sra X, shift)
8425
8426 if (FalseVal.getOpcode() != ISD::ADD)
8427 return SDValue();
8428
8429 SDValue Add0 = FalseVal.getOperand(i: 0);
8430 SDValue Add1 = FalseVal.getOperand(i: 1);
8431 SDValue And;
8432 SDValue Shr;
8433
8434 if (Add0.getOpcode() == ISD::AND) {
8435 And = Add0;
8436 Shr = Add1;
8437 } else if (Add1.getOpcode() == ISD::AND) {
8438 And = Add1;
8439 Shr = Add0;
8440 } else {
8441 return SDValue();
8442 }
8443
8444 // match:
8445 //
8446 // srl/sra X, shift
8447
8448 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
8449 return SDValue();
8450
8451 if (Shr.getOperand(i: 0) != X)
8452 return SDValue();
8453
8454 if (Shr.getOperand(i: 1) != Shift)
8455 return SDValue();
8456
8457 // match:
8458 //
8459 // and
8460 // (srl X, shift-1)
8461 // 1
8462
8463 SDValue Srl = And.getOperand(i: 0);
8464 SDValue One = And.getOperand(i: 1);
8465 APInt SplatVal;
8466
8467 if (Srl.getOpcode() != ISD::SRL)
8468 return SDValue();
8469
8470 One = peekThroughBitcasts(V: One);
8471 if (!isConstantSplatVector(N: One, SplatValue&: SplatVal, MinSizeInBits: EltVT.getSizeInBits()))
8472 return SDValue();
8473
8474 if (SplatVal != 1)
8475 return SDValue();
8476
8477 if (Srl.getOperand(i: 0) != X)
8478 return SDValue();
8479
8480 // match:
8481 //
8482 // shift-1
8483
8484 SDValue ShiftMinus1 = Srl.getOperand(i: 1);
8485
8486 if (ShiftMinus1.getOpcode() != ISD::ADD)
8487 return SDValue();
8488
8489 if (ShiftMinus1.getOperand(i: 0) != Shift)
8490 return SDValue();
8491
8492 if (!ISD::isConstantSplatVectorAllOnes(N: ShiftMinus1.getOperand(i: 1).getNode()))
8493 return SDValue();
8494
8495 // We matched a rounded right shift pattern and can lower it
8496 // to a single vector rounded shift instruction.
8497
8498 SDLoc DL(N);
8499 return DAG.getNode(Opcode: Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
8500 : LoongArchISD::VSRAR,
8501 DL, VT, N1: X, N2: Shift);
8502}
8503
8504SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
8505 DAGCombinerInfo &DCI) const {
8506 SelectionDAG &DAG = DCI.DAG;
8507 switch (N->getOpcode()) {
8508 default:
8509 break;
8510 case ISD::ADD:
8511 return performADDCombine(N, DAG, DCI, Subtarget);
8512 case ISD::AND:
8513 return performANDCombine(N, DAG, DCI, Subtarget);
8514 case ISD::OR:
8515 return performORCombine(N, DAG, DCI, Subtarget);
8516 case ISD::SETCC:
8517 return performSETCCCombine(N, DAG, DCI, Subtarget);
8518 case ISD::SHL:
8519 return performSHLCombine(N, DAG, DCI, Subtarget);
8520 case ISD::SRL:
8521 return performSRLCombine(N, DAG, DCI, Subtarget);
8522 case ISD::SUB:
8523 return performSUBCombine(N, DAG, DCI, Subtarget);
8524 case ISD::BITCAST:
8525 return performBITCASTCombine(N, DAG, DCI, Subtarget);
8526 case ISD::ANY_EXTEND:
8527 case ISD::ZERO_EXTEND:
8528 case ISD::SIGN_EXTEND:
8529 return performEXTENDCombine(N, DAG, DCI, Subtarget);
8530 case ISD::SINT_TO_FP:
8531 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8532 case ISD::UINT_TO_FP:
8533 return performUINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8534 case ISD::FP_TO_SINT:
8535 case ISD::FP_TO_UINT:
8536 return performFP_TO_INTCombine(N, DAG, DCI, Subtarget);
8537 case LoongArchISD::BITREV_W:
8538 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
8539 case LoongArchISD::BR_CC:
8540 return performBR_CCCombine(N, DAG, DCI, Subtarget);
8541 case LoongArchISD::SELECT_CC:
8542 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
8543 case ISD::INTRINSIC_WO_CHAIN:
8544 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
8545 case LoongArchISD::MOVGR2FR_W_LA64:
8546 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
8547 case LoongArchISD::MOVFR2GR_S_LA64:
8548 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
8549 case LoongArchISD::CRC_W_B_W:
8550 case LoongArchISD::CRC_W_H_W:
8551 case LoongArchISD::CRCC_W_B_W:
8552 case LoongArchISD::CRCC_W_H_W:
8553 case LoongArchISD::VMSKLTZ:
8554 case LoongArchISD::XVMSKLTZ:
8555 return performDemandedBitsCombine(N, DAG, DCI);
8556 case LoongArchISD::SPLIT_PAIR_F64:
8557 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
8558 case LoongArchISD::VANDN:
8559 return performVANDNCombine(N, DAG, DCI, Subtarget);
8560 case ISD::CONCAT_VECTORS:
8561 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
8562 case ISD::VSELECT:
8563 return performVSELECTCombine(N, DAG, DCI, Subtarget);
8564 case LoongArchISD::VPACKEV:
8565 case LoongArchISD::VPERMI:
8566 if (SDValue Result =
8567 combineFP_ROUND(N: SDValue(N, 0), DL: SDLoc(N), DAG, Subtarget))
8568 return Result;
8569 }
8570 return SDValue();
8571}
8572
8573static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
8574 MachineBasicBlock *MBB) {
8575 if (!ZeroDivCheck)
8576 return MBB;
8577
8578 // Build instructions:
8579 // MBB:
8580 // div(or mod) $dst, $dividend, $divisor
8581 // bne $divisor, $zero, SinkMBB
8582 // BreakMBB:
8583 // break 7 // BRK_DIVZERO
8584 // SinkMBB:
8585 // fallthrough
8586 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8587 MachineFunction::iterator It = ++MBB->getIterator();
8588 MachineFunction *MF = MBB->getParent();
8589 auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
8590 auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
8591 MF->insert(MBBI: It, MBB: BreakMBB);
8592 MF->insert(MBBI: It, MBB: SinkMBB);
8593
8594 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8595 SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
8596 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
8597
8598 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8599 DebugLoc DL = MI.getDebugLoc();
8600 MachineOperand &Divisor = MI.getOperand(i: 2);
8601 Register DivisorReg = Divisor.getReg();
8602
8603 // MBB:
8604 BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE))
8605 .addReg(RegNo: DivisorReg, Flags: getKillRegState(B: Divisor.isKill()))
8606 .addReg(RegNo: LoongArch::R0)
8607 .addMBB(MBB: SinkMBB);
8608 MBB->addSuccessor(Succ: BreakMBB);
8609 MBB->addSuccessor(Succ: SinkMBB);
8610
8611 // BreakMBB:
8612 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8613 // definition of BRK_DIVZERO.
8614 BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/);
8615 BreakMBB->addSuccessor(Succ: SinkMBB);
8616
8617 // Clear Divisor's kill flag.
8618 Divisor.setIsKill(false);
8619
8620 return SinkMBB;
8621}
8622
8623static MachineBasicBlock *
8624emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
8625 const LoongArchSubtarget &Subtarget) {
8626 unsigned CondOpc;
8627 switch (MI.getOpcode()) {
8628 default:
8629 llvm_unreachable("Unexpected opcode");
8630 case LoongArch::PseudoVBZ:
8631 CondOpc = LoongArch::VSETEQZ_V;
8632 break;
8633 case LoongArch::PseudoVBZ_B:
8634 CondOpc = LoongArch::VSETANYEQZ_B;
8635 break;
8636 case LoongArch::PseudoVBZ_H:
8637 CondOpc = LoongArch::VSETANYEQZ_H;
8638 break;
8639 case LoongArch::PseudoVBZ_W:
8640 CondOpc = LoongArch::VSETANYEQZ_W;
8641 break;
8642 case LoongArch::PseudoVBZ_D:
8643 CondOpc = LoongArch::VSETANYEQZ_D;
8644 break;
8645 case LoongArch::PseudoVBNZ:
8646 CondOpc = LoongArch::VSETNEZ_V;
8647 break;
8648 case LoongArch::PseudoVBNZ_B:
8649 CondOpc = LoongArch::VSETALLNEZ_B;
8650 break;
8651 case LoongArch::PseudoVBNZ_H:
8652 CondOpc = LoongArch::VSETALLNEZ_H;
8653 break;
8654 case LoongArch::PseudoVBNZ_W:
8655 CondOpc = LoongArch::VSETALLNEZ_W;
8656 break;
8657 case LoongArch::PseudoVBNZ_D:
8658 CondOpc = LoongArch::VSETALLNEZ_D;
8659 break;
8660 case LoongArch::PseudoXVBZ:
8661 CondOpc = LoongArch::XVSETEQZ_V;
8662 break;
8663 case LoongArch::PseudoXVBZ_B:
8664 CondOpc = LoongArch::XVSETANYEQZ_B;
8665 break;
8666 case LoongArch::PseudoXVBZ_H:
8667 CondOpc = LoongArch::XVSETANYEQZ_H;
8668 break;
8669 case LoongArch::PseudoXVBZ_W:
8670 CondOpc = LoongArch::XVSETANYEQZ_W;
8671 break;
8672 case LoongArch::PseudoXVBZ_D:
8673 CondOpc = LoongArch::XVSETANYEQZ_D;
8674 break;
8675 case LoongArch::PseudoXVBNZ:
8676 CondOpc = LoongArch::XVSETNEZ_V;
8677 break;
8678 case LoongArch::PseudoXVBNZ_B:
8679 CondOpc = LoongArch::XVSETALLNEZ_B;
8680 break;
8681 case LoongArch::PseudoXVBNZ_H:
8682 CondOpc = LoongArch::XVSETALLNEZ_H;
8683 break;
8684 case LoongArch::PseudoXVBNZ_W:
8685 CondOpc = LoongArch::XVSETALLNEZ_W;
8686 break;
8687 case LoongArch::PseudoXVBNZ_D:
8688 CondOpc = LoongArch::XVSETALLNEZ_D;
8689 break;
8690 }
8691
8692 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8693 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8694 DebugLoc DL = MI.getDebugLoc();
8695 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8696 MachineFunction::iterator It = ++BB->getIterator();
8697
8698 MachineFunction *F = BB->getParent();
8699 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
8700 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
8701 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
8702
8703 F->insert(MBBI: It, MBB: FalseBB);
8704 F->insert(MBBI: It, MBB: TrueBB);
8705 F->insert(MBBI: It, MBB: SinkBB);
8706
8707 // Transfer the remainder of MBB and its successor edges to Sink.
8708 SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
8709 SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
8710
8711 // Insert the real instruction to BB.
8712 Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
8713 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg());
8714
8715 // Insert branch.
8716 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
8717 BB->addSuccessor(Succ: FalseBB);
8718 BB->addSuccessor(Succ: TrueBB);
8719
8720 // FalseBB.
8721 Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8722 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
8723 .addReg(RegNo: LoongArch::R0)
8724 .addImm(Val: 0);
8725 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
8726 FalseBB->addSuccessor(Succ: SinkBB);
8727
8728 // TrueBB.
8729 Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8730 BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
8731 .addReg(RegNo: LoongArch::R0)
8732 .addImm(Val: 1);
8733 TrueBB->addSuccessor(Succ: SinkBB);
8734
8735 // SinkBB: merge the results.
8736 BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
8737 DestReg: MI.getOperand(i: 0).getReg())
8738 .addReg(RegNo: RD1)
8739 .addMBB(MBB: FalseBB)
8740 .addReg(RegNo: RD2)
8741 .addMBB(MBB: TrueBB);
8742
8743 // The pseudo instruction is gone now.
8744 MI.eraseFromParent();
8745 return SinkBB;
8746}
8747
8748static MachineBasicBlock *
8749emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
8750 const LoongArchSubtarget &Subtarget) {
8751 unsigned InsOp;
8752 unsigned BroadcastOp;
8753 unsigned HalfSize;
8754 switch (MI.getOpcode()) {
8755 default:
8756 llvm_unreachable("Unexpected opcode");
8757 case LoongArch::PseudoXVINSGR2VR_B:
8758 HalfSize = 16;
8759 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8760 InsOp = LoongArch::XVEXTRINS_B;
8761 break;
8762 case LoongArch::PseudoXVINSGR2VR_H:
8763 HalfSize = 8;
8764 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8765 InsOp = LoongArch::XVEXTRINS_H;
8766 break;
8767 }
8768 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8769 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8770 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8771 DebugLoc DL = MI.getDebugLoc();
8772 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8773 // XDst = vector_insert XSrc, Elt, Idx
8774 Register XDst = MI.getOperand(i: 0).getReg();
8775 Register XSrc = MI.getOperand(i: 1).getReg();
8776 Register Elt = MI.getOperand(i: 2).getReg();
8777 unsigned Idx = MI.getOperand(i: 3).getImm();
8778
8779 if (XSrc.isVirtual() && MRI.getVRegDef(Reg: XSrc)->isImplicitDef() &&
8780 Idx < HalfSize) {
8781 Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
8782 Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
8783
8784 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
8785 .addReg(RegNo: XSrc, Flags: {}, SubReg: LoongArch::sub_128);
8786 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
8787 MCID: TII->get(Opcode: HalfSize == 8 ? LoongArch::VINSGR2VR_H
8788 : LoongArch::VINSGR2VR_B),
8789 DestReg: ScratchSubReg2)
8790 .addReg(RegNo: ScratchSubReg1)
8791 .addReg(RegNo: Elt)
8792 .addImm(Val: Idx);
8793
8794 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: XDst)
8795 .addReg(RegNo: ScratchSubReg2)
8796 .addImm(Val: LoongArch::sub_128);
8797 } else {
8798 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
8799 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
8800
8801 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BroadcastOp), DestReg: ScratchReg1).addReg(RegNo: Elt);
8802
8803 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg2)
8804 .addReg(RegNo: ScratchReg1)
8805 .addReg(RegNo: XSrc)
8806 .addImm(Val: Idx >= HalfSize ? 48 : 18);
8807
8808 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: XDst)
8809 .addReg(RegNo: XSrc)
8810 .addReg(RegNo: ScratchReg2)
8811 .addImm(Val: (Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8812 }
8813
8814 MI.eraseFromParent();
8815 return BB;
8816}
8817
8818static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
8819 MachineBasicBlock *BB,
8820 const LoongArchSubtarget &Subtarget) {
8821 assert(Subtarget.hasExtLSX());
8822 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8823 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8824 DebugLoc DL = MI.getDebugLoc();
8825 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8826 Register Dst = MI.getOperand(i: 0).getReg();
8827 Register Src = MI.getOperand(i: 1).getReg();
8828
8829 unsigned BroadcastOp, CTOp, PickOp;
8830 switch (MI.getOpcode()) {
8831 default:
8832 llvm_unreachable("Unexpected opcode");
8833 case LoongArch::PseudoCTPOP_B:
8834 BroadcastOp = LoongArch::VREPLGR2VR_B;
8835 CTOp = LoongArch::VPCNT_B;
8836 PickOp = LoongArch::VPICKVE2GR_B;
8837 break;
8838 case LoongArch::PseudoCTPOP_H:
8839 case LoongArch::PseudoCTPOP_H_LA32:
8840 BroadcastOp = LoongArch::VREPLGR2VR_H;
8841 CTOp = LoongArch::VPCNT_H;
8842 PickOp = LoongArch::VPICKVE2GR_H;
8843 break;
8844 case LoongArch::PseudoCTPOP_W:
8845 case LoongArch::PseudoCTPOP_W_LA32:
8846 BroadcastOp = LoongArch::VREPLGR2VR_W;
8847 CTOp = LoongArch::VPCNT_W;
8848 PickOp = LoongArch::VPICKVE2GR_W;
8849 break;
8850 case LoongArch::PseudoCTPOP_D:
8851 BroadcastOp = LoongArch::VREPLGR2VR_D;
8852 CTOp = LoongArch::VPCNT_D;
8853 PickOp = LoongArch::VPICKVE2GR_D;
8854 break;
8855 }
8856
8857 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
8858 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
8859 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BroadcastOp), DestReg: ScratchReg1).addReg(RegNo: Src);
8860 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: CTOp), DestReg: ScratchReg2).addReg(RegNo: ScratchReg1);
8861 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PickOp), DestReg: Dst).addReg(RegNo: ScratchReg2).addImm(Val: 0);
8862
8863 MI.eraseFromParent();
8864 return BB;
8865}
8866
8867static MachineBasicBlock *
8868emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
8869 const LoongArchSubtarget &Subtarget) {
8870 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8871 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8872 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8873 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
8874 Register Dst = MI.getOperand(i: 0).getReg();
8875 Register Src = MI.getOperand(i: 1).getReg();
8876 DebugLoc DL = MI.getDebugLoc();
8877 unsigned EleBits = 8;
8878 unsigned NotOpc = 0;
8879 unsigned MskOpc;
8880
8881 switch (MI.getOpcode()) {
8882 default:
8883 llvm_unreachable("Unexpected opcode");
8884 case LoongArch::PseudoVMSKLTZ_B:
8885 MskOpc = LoongArch::VMSKLTZ_B;
8886 break;
8887 case LoongArch::PseudoVMSKLTZ_H:
8888 MskOpc = LoongArch::VMSKLTZ_H;
8889 EleBits = 16;
8890 break;
8891 case LoongArch::PseudoVMSKLTZ_W:
8892 MskOpc = LoongArch::VMSKLTZ_W;
8893 EleBits = 32;
8894 break;
8895 case LoongArch::PseudoVMSKLTZ_D:
8896 MskOpc = LoongArch::VMSKLTZ_D;
8897 EleBits = 64;
8898 break;
8899 case LoongArch::PseudoVMSKGEZ_B:
8900 MskOpc = LoongArch::VMSKGEZ_B;
8901 break;
8902 case LoongArch::PseudoVMSKEQZ_B:
8903 MskOpc = LoongArch::VMSKNZ_B;
8904 NotOpc = LoongArch::VNOR_V;
8905 break;
8906 case LoongArch::PseudoVMSKNEZ_B:
8907 MskOpc = LoongArch::VMSKNZ_B;
8908 break;
8909 case LoongArch::PseudoXVMSKLTZ_B:
8910 MskOpc = LoongArch::XVMSKLTZ_B;
8911 RC = &LoongArch::LASX256RegClass;
8912 break;
8913 case LoongArch::PseudoXVMSKLTZ_H:
8914 MskOpc = LoongArch::XVMSKLTZ_H;
8915 RC = &LoongArch::LASX256RegClass;
8916 EleBits = 16;
8917 break;
8918 case LoongArch::PseudoXVMSKLTZ_W:
8919 MskOpc = LoongArch::XVMSKLTZ_W;
8920 RC = &LoongArch::LASX256RegClass;
8921 EleBits = 32;
8922 break;
8923 case LoongArch::PseudoXVMSKLTZ_D:
8924 MskOpc = LoongArch::XVMSKLTZ_D;
8925 RC = &LoongArch::LASX256RegClass;
8926 EleBits = 64;
8927 break;
8928 case LoongArch::PseudoXVMSKGEZ_B:
8929 MskOpc = LoongArch::XVMSKGEZ_B;
8930 RC = &LoongArch::LASX256RegClass;
8931 break;
8932 case LoongArch::PseudoXVMSKEQZ_B:
8933 MskOpc = LoongArch::XVMSKNZ_B;
8934 NotOpc = LoongArch::XVNOR_V;
8935 RC = &LoongArch::LASX256RegClass;
8936 break;
8937 case LoongArch::PseudoXVMSKNEZ_B:
8938 MskOpc = LoongArch::XVMSKNZ_B;
8939 RC = &LoongArch::LASX256RegClass;
8940 break;
8941 }
8942
8943 Register Msk = MRI.createVirtualRegister(RegClass: RC);
8944 if (NotOpc) {
8945 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
8946 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src);
8947 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk)
8948 .addReg(RegNo: Tmp, Flags: RegState::Kill)
8949 .addReg(RegNo: Tmp, Flags: RegState::Kill);
8950 } else {
8951 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src);
8952 }
8953
8954 if (TRI->getRegSizeInBits(RC: *RC) > 128) {
8955 Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8956 Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8957 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo)
8958 .addReg(RegNo: Msk)
8959 .addImm(Val: 0);
8960 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi)
8961 .addReg(RegNo: Msk, Flags: RegState::Kill)
8962 .addImm(Val: 4);
8963 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
8964 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8965 : LoongArch::BSTRINS_W),
8966 DestReg: Dst)
8967 .addReg(RegNo: Lo, Flags: RegState::Kill)
8968 .addReg(RegNo: Hi, Flags: RegState::Kill)
8969 .addImm(Val: 256 / EleBits - 1)
8970 .addImm(Val: 128 / EleBits);
8971 } else {
8972 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst)
8973 .addReg(RegNo: Msk, Flags: RegState::Kill)
8974 .addImm(Val: 0);
8975 }
8976
8977 MI.eraseFromParent();
8978 return BB;
8979}
8980
8981static MachineBasicBlock *
8982emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
8983 const LoongArchSubtarget &Subtarget) {
8984 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8985 "Unexpected instruction");
8986
8987 MachineFunction &MF = *BB->getParent();
8988 DebugLoc DL = MI.getDebugLoc();
8989 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
8990 Register LoReg = MI.getOperand(i: 0).getReg();
8991 Register HiReg = MI.getOperand(i: 1).getReg();
8992 Register SrcReg = MI.getOperand(i: 2).getReg();
8993
8994 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg);
8995 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg)
8996 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
8997 MI.eraseFromParent(); // The pseudo instruction is gone now.
8998 return BB;
8999}
9000
9001static MachineBasicBlock *
9002emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
9003 const LoongArchSubtarget &Subtarget) {
9004 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
9005 "Unexpected instruction");
9006
9007 MachineFunction &MF = *BB->getParent();
9008 DebugLoc DL = MI.getDebugLoc();
9009 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
9010 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
9011 Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass);
9012 Register DstReg = MI.getOperand(i: 0).getReg();
9013 Register LoReg = MI.getOperand(i: 1).getReg();
9014 Register HiReg = MI.getOperand(i: 2).getReg();
9015
9016 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg)
9017 .addReg(RegNo: LoReg, Flags: getKillRegState(B: MI.getOperand(i: 1).isKill()));
9018 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg)
9019 .addReg(RegNo: TmpReg, Flags: RegState::Kill)
9020 .addReg(RegNo: HiReg, Flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
9021 MI.eraseFromParent(); // The pseudo instruction is gone now.
9022 return BB;
9023}
9024
9025static bool isSelectPseudo(MachineInstr &MI) {
9026 switch (MI.getOpcode()) {
9027 default:
9028 return false;
9029 case LoongArch::Select_GPR_Using_CC_GPR:
9030 return true;
9031 }
9032}
9033
9034static MachineBasicBlock *
9035emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
9036 const LoongArchSubtarget &Subtarget) {
9037 // To "insert" Select_* instructions, we actually have to insert the triangle
9038 // control-flow pattern. The incoming instructions know the destination vreg
9039 // to set, the condition code register to branch on, the true/false values to
9040 // select between, and the condcode to use to select the appropriate branch.
9041 //
9042 // We produce the following control flow:
9043 // HeadMBB
9044 // | \
9045 // | IfFalseMBB
9046 // | /
9047 // TailMBB
9048 //
9049 // When we find a sequence of selects we attempt to optimize their emission
9050 // by sharing the control flow. Currently we only handle cases where we have
9051 // multiple selects with the exact same condition (same LHS, RHS and CC).
9052 // The selects may be interleaved with other instructions if the other
9053 // instructions meet some requirements we deem safe:
9054 // - They are not pseudo instructions.
9055 // - They are debug instructions. Otherwise,
9056 // - They do not have side-effects, do not access memory and their inputs do
9057 // not depend on the results of the select pseudo-instructions.
9058 // The TrueV/FalseV operands of the selects cannot depend on the result of
9059 // previous selects in the sequence.
9060 // These conditions could be further relaxed. See the X86 target for a
9061 // related approach and more information.
9062
9063 Register LHS = MI.getOperand(i: 1).getReg();
9064 Register RHS;
9065 if (MI.getOperand(i: 2).isReg())
9066 RHS = MI.getOperand(i: 2).getReg();
9067 auto CC = static_cast<unsigned>(MI.getOperand(i: 3).getImm());
9068
9069 SmallVector<MachineInstr *, 4> SelectDebugValues;
9070 SmallSet<Register, 4> SelectDests;
9071 SelectDests.insert(V: MI.getOperand(i: 0).getReg());
9072
9073 MachineInstr *LastSelectPseudo = &MI;
9074 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
9075 SequenceMBBI != E; ++SequenceMBBI) {
9076 if (SequenceMBBI->isDebugInstr())
9077 continue;
9078 if (isSelectPseudo(MI&: *SequenceMBBI)) {
9079 if (SequenceMBBI->getOperand(i: 1).getReg() != LHS ||
9080 !SequenceMBBI->getOperand(i: 2).isReg() ||
9081 SequenceMBBI->getOperand(i: 2).getReg() != RHS ||
9082 SequenceMBBI->getOperand(i: 3).getImm() != CC ||
9083 SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) ||
9084 SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg()))
9085 break;
9086 LastSelectPseudo = &*SequenceMBBI;
9087 SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues);
9088 SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg());
9089 continue;
9090 }
9091 if (SequenceMBBI->hasUnmodeledSideEffects() ||
9092 SequenceMBBI->mayLoadOrStore() ||
9093 SequenceMBBI->usesCustomInsertionHook())
9094 break;
9095 if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) {
9096 return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
9097 }))
9098 break;
9099 }
9100
9101 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
9102 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9103 DebugLoc DL = MI.getDebugLoc();
9104 MachineFunction::iterator I = ++BB->getIterator();
9105
9106 MachineBasicBlock *HeadMBB = BB;
9107 MachineFunction *F = BB->getParent();
9108 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
9109 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
9110
9111 F->insert(MBBI: I, MBB: IfFalseMBB);
9112 F->insert(MBBI: I, MBB: TailMBB);
9113
9114 // Set the call frame size on entry to the new basic blocks.
9115 unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
9116 IfFalseMBB->setCallFrameSize(CallFrameSize);
9117 TailMBB->setCallFrameSize(CallFrameSize);
9118
9119 // Transfer debug instructions associated with the selects to TailMBB.
9120 for (MachineInstr *DebugInstr : SelectDebugValues) {
9121 TailMBB->push_back(MI: DebugInstr->removeFromParent());
9122 }
9123
9124 // Move all instructions after the sequence to TailMBB.
9125 TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
9126 From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
9127 // Update machine-CFG edges by transferring all successors of the current
9128 // block to the new block which will contain the Phi nodes for the selects.
9129 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
9130 // Set the successors for HeadMBB.
9131 HeadMBB->addSuccessor(Succ: IfFalseMBB);
9132 HeadMBB->addSuccessor(Succ: TailMBB);
9133
9134 // Insert appropriate branch.
9135 if (MI.getOperand(i: 2).isImm())
9136 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC))
9137 .addReg(RegNo: LHS)
9138 .addImm(Val: MI.getOperand(i: 2).getImm())
9139 .addMBB(MBB: TailMBB);
9140 else
9141 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB);
9142
9143 // IfFalseMBB just falls through to TailMBB.
9144 IfFalseMBB->addSuccessor(Succ: TailMBB);
9145
9146 // Create PHIs for all of the select pseudo-instructions.
9147 auto SelectMBBI = MI.getIterator();
9148 auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
9149 auto InsertionPoint = TailMBB->begin();
9150 while (SelectMBBI != SelectEnd) {
9151 auto Next = std::next(x: SelectMBBI);
9152 if (isSelectPseudo(MI&: *SelectMBBI)) {
9153 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
9154 BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI->getDebugLoc(),
9155 MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI->getOperand(i: 0).getReg())
9156 .addReg(RegNo: SelectMBBI->getOperand(i: 4).getReg())
9157 .addMBB(MBB: HeadMBB)
9158 .addReg(RegNo: SelectMBBI->getOperand(i: 5).getReg())
9159 .addMBB(MBB: IfFalseMBB);
9160 SelectMBBI->eraseFromParent();
9161 }
9162 SelectMBBI = Next;
9163 }
9164
9165 F->getProperties().resetNoPHIs();
9166 return TailMBB;
9167}
9168
9169MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
9170 MachineInstr &MI, MachineBasicBlock *BB) const {
9171 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9172 DebugLoc DL = MI.getDebugLoc();
9173
9174 switch (MI.getOpcode()) {
9175 default:
9176 llvm_unreachable("Unexpected instr type to insert");
9177 case LoongArch::DIV_W:
9178 case LoongArch::DIV_WU:
9179 case LoongArch::MOD_W:
9180 case LoongArch::MOD_WU:
9181 case LoongArch::DIV_D:
9182 case LoongArch::DIV_DU:
9183 case LoongArch::MOD_D:
9184 case LoongArch::MOD_DU:
9185 return insertDivByZeroTrap(MI, MBB: BB);
9186 break;
9187 case LoongArch::WRFCSR: {
9188 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
9189 DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm())
9190 .addReg(RegNo: MI.getOperand(i: 1).getReg());
9191 MI.eraseFromParent();
9192 return BB;
9193 }
9194 case LoongArch::RDFCSR: {
9195 MachineInstr *ReadFCSR =
9196 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
9197 DestReg: MI.getOperand(i: 0).getReg())
9198 .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm());
9199 ReadFCSR->getOperand(i: 1).setIsUndef();
9200 MI.eraseFromParent();
9201 return BB;
9202 }
9203 case LoongArch::Select_GPR_Using_CC_GPR:
9204 return emitSelectPseudo(MI, BB, Subtarget);
9205 case LoongArch::BuildPairF64Pseudo:
9206 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
9207 case LoongArch::SplitPairF64Pseudo:
9208 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
9209 case LoongArch::PseudoVBZ:
9210 case LoongArch::PseudoVBZ_B:
9211 case LoongArch::PseudoVBZ_H:
9212 case LoongArch::PseudoVBZ_W:
9213 case LoongArch::PseudoVBZ_D:
9214 case LoongArch::PseudoVBNZ:
9215 case LoongArch::PseudoVBNZ_B:
9216 case LoongArch::PseudoVBNZ_H:
9217 case LoongArch::PseudoVBNZ_W:
9218 case LoongArch::PseudoVBNZ_D:
9219 case LoongArch::PseudoXVBZ:
9220 case LoongArch::PseudoXVBZ_B:
9221 case LoongArch::PseudoXVBZ_H:
9222 case LoongArch::PseudoXVBZ_W:
9223 case LoongArch::PseudoXVBZ_D:
9224 case LoongArch::PseudoXVBNZ:
9225 case LoongArch::PseudoXVBNZ_B:
9226 case LoongArch::PseudoXVBNZ_H:
9227 case LoongArch::PseudoXVBNZ_W:
9228 case LoongArch::PseudoXVBNZ_D:
9229 return emitVecCondBranchPseudo(MI, BB, Subtarget);
9230 case LoongArch::PseudoXVINSGR2VR_B:
9231 case LoongArch::PseudoXVINSGR2VR_H:
9232 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
9233 case LoongArch::PseudoCTPOP_B:
9234 case LoongArch::PseudoCTPOP_H:
9235 case LoongArch::PseudoCTPOP_W:
9236 case LoongArch::PseudoCTPOP_D:
9237 case LoongArch::PseudoCTPOP_H_LA32:
9238 case LoongArch::PseudoCTPOP_W_LA32:
9239 return emitPseudoCTPOP(MI, BB, Subtarget);
9240 case LoongArch::PseudoVMSKLTZ_B:
9241 case LoongArch::PseudoVMSKLTZ_H:
9242 case LoongArch::PseudoVMSKLTZ_W:
9243 case LoongArch::PseudoVMSKLTZ_D:
9244 case LoongArch::PseudoVMSKGEZ_B:
9245 case LoongArch::PseudoVMSKEQZ_B:
9246 case LoongArch::PseudoVMSKNEZ_B:
9247 case LoongArch::PseudoXVMSKLTZ_B:
9248 case LoongArch::PseudoXVMSKLTZ_H:
9249 case LoongArch::PseudoXVMSKLTZ_W:
9250 case LoongArch::PseudoXVMSKLTZ_D:
9251 case LoongArch::PseudoXVMSKGEZ_B:
9252 case LoongArch::PseudoXVMSKEQZ_B:
9253 case LoongArch::PseudoXVMSKNEZ_B:
9254 return emitPseudoVMSKCOND(MI, BB, Subtarget);
9255 case TargetOpcode::STATEPOINT:
9256 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
9257 // while bl call instruction (where statepoint will be lowered at the
9258 // end) has implicit def. This def is early-clobber as it will be set at
9259 // the moment of the call and earlier than any use is read.
9260 // Add this implicit dead def here as a workaround.
9261 MI.addOperand(MF&: *MI.getMF(),
9262 Op: MachineOperand::CreateReg(
9263 Reg: LoongArch::R1, /*isDef*/ true,
9264 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
9265 /*isUndef*/ false, /*isEarlyClobber*/ true));
9266 if (!Subtarget.is64Bit())
9267 report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets");
9268 return emitPatchPoint(MI, MBB: BB);
9269 case LoongArch::PROBED_STACKALLOC_DYN:
9270 return emitDynamicProbedAlloc(MI, MBB: BB);
9271 }
9272}
9273
9274bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
9275 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
9276 unsigned *Fast) const {
9277 if (!Subtarget.hasUAL())
9278 return false;
9279
9280 // TODO: set reasonable speed number.
9281 if (Fast)
9282 *Fast = 1;
9283 return true;
9284}
9285
9286//===----------------------------------------------------------------------===//
9287// Calling Convention Implementation
9288//===----------------------------------------------------------------------===//
9289
9290// Eight general-purpose registers a0-a7 used for passing integer arguments,
9291// with a0-a1 reused to return values. Generally, the GPRs are used to pass
9292// fixed-point arguments, and floating-point arguments when no FPR is available
9293// or with soft float ABI.
9294const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
9295 LoongArch::R7, LoongArch::R8, LoongArch::R9,
9296 LoongArch::R10, LoongArch::R11};
9297
9298// PreserveNone calling convention:
9299// Arguments may be passed in any general-purpose registers except:
9300// - R1 : return address register
9301// - R22 : frame pointer
9302// - R31 : base pointer
9303//
9304// All general-purpose registers are treated as caller-saved,
9305// except R1 (RA) and R22 (FP).
9306//
9307// Non-volatile registers are allocated first so that a function
9308// can call normal functions without having to spill and reload
9309// argument registers.
9310const MCPhysReg PreserveNoneArgGPRs[] = {
9311 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
9312 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
9313 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
9314 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
9315 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
9316 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
9317 LoongArch::R20};
9318
9319// Eight floating-point registers fa0-fa7 used for passing floating-point
9320// arguments, and fa0-fa1 are also used to return values.
9321const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
9322 LoongArch::F3, LoongArch::F4, LoongArch::F5,
9323 LoongArch::F6, LoongArch::F7};
9324// FPR32 and FPR64 alias each other.
9325const MCPhysReg ArgFPR64s[] = {
9326 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
9327 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
9328
9329const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
9330 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
9331 LoongArch::VR6, LoongArch::VR7};
9332
9333const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
9334 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
9335 LoongArch::XR6, LoongArch::XR7};
9336
9337static Register allocateArgGPR(CCState &State) {
9338 switch (State.getCallingConv()) {
9339 case CallingConv::PreserveNone:
9340 if (!State.isVarArg())
9341 return State.AllocateReg(Regs: PreserveNoneArgGPRs);
9342 [[fallthrough]];
9343 default:
9344 return State.AllocateReg(Regs: ArgGPRs);
9345 }
9346}
9347
9348// Pass a 2*GRLen argument that has been split into two GRLen values through
9349// registers or the stack as necessary.
9350static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
9351 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
9352 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
9353 ISD::ArgFlagsTy ArgFlags2) {
9354 unsigned GRLenInBytes = GRLen / 8;
9355 if (Register Reg = allocateArgGPR(State)) {
9356 // At least one half can be passed via register.
9357 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg,
9358 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
9359 } else {
9360 // Both halves must be passed on the stack, with proper alignment.
9361 Align StackAlign =
9362 std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
9363 State.addLoc(
9364 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
9365 Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
9366 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
9367 State.addLoc(V: CCValAssign::getMem(
9368 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
9369 LocVT: LocVT2, HTP: CCValAssign::Full));
9370 return false;
9371 }
9372 if (Register Reg = allocateArgGPR(State)) {
9373 // The second half can also be passed via register.
9374 State.addLoc(
9375 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
9376 } else {
9377 // The second half is passed via the stack, without additional alignment.
9378 State.addLoc(V: CCValAssign::getMem(
9379 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
9380 LocVT: LocVT2, HTP: CCValAssign::Full));
9381 }
9382 return false;
9383}
9384
9385// Implements the LoongArch calling convention. Returns true upon failure.
9386static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
9387 unsigned ValNo, MVT ValVT,
9388 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
9389 CCState &State, bool IsRet, Type *OrigTy) {
9390 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
9391 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
9392 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
9393 MVT LocVT = ValVT;
9394
9395 // Any return value split into more than two values can't be returned
9396 // directly.
9397 if (IsRet && ValNo > 1)
9398 return true;
9399
9400 // If passing a variadic argument, or if no FPR is available.
9401 bool UseGPRForFloat = true;
9402
9403 switch (ABI) {
9404 default:
9405 llvm_unreachable("Unexpected ABI");
9406 break;
9407 case LoongArchABI::ABI_ILP32F:
9408 case LoongArchABI::ABI_LP64F:
9409 case LoongArchABI::ABI_ILP32D:
9410 case LoongArchABI::ABI_LP64D:
9411 UseGPRForFloat = ArgFlags.isVarArg();
9412 break;
9413 case LoongArchABI::ABI_ILP32S:
9414 case LoongArchABI::ABI_LP64S:
9415 break;
9416 }
9417
9418 // If this is a variadic argument, the LoongArch calling convention requires
9419 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
9420 // byte alignment. An aligned register should be used regardless of whether
9421 // the original argument was split during legalisation or not. The argument
9422 // will not be passed by registers if the original type is larger than
9423 // 2*GRLen, so the register alignment rule does not apply.
9424 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
9425 if (ArgFlags.isVarArg() &&
9426 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
9427 DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
9428 unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
9429 // Skip 'odd' register if necessary.
9430 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
9431 State.AllocateReg(Regs: ArgGPRs);
9432 }
9433
9434 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
9435 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
9436 State.getPendingArgFlags();
9437
9438 assert(PendingLocs.size() == PendingArgFlags.size() &&
9439 "PendingLocs and PendingArgFlags out of sync");
9440
9441 // FPR32 and FPR64 alias each other.
9442 if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
9443 UseGPRForFloat = true;
9444
9445 if (UseGPRForFloat && ValVT == MVT::f32) {
9446 LocVT = GRLenVT;
9447 LocInfo = CCValAssign::BCvt;
9448 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
9449 LocVT = MVT::i64;
9450 LocInfo = CCValAssign::BCvt;
9451 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
9452 // Handle passing f64 on LA32D with a soft float ABI or when floating point
9453 // registers are exhausted.
9454 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
9455 // Depending on available argument GPRS, f64 may be passed in a pair of
9456 // GPRs, split between a GPR and the stack, or passed completely on the
9457 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
9458 // cases.
9459 MCRegister Reg = allocateArgGPR(State);
9460 if (!Reg) {
9461 int64_t StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8));
9462 State.addLoc(
9463 V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
9464 return false;
9465 }
9466 LocVT = MVT::i32;
9467 State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
9468 MCRegister HiReg = allocateArgGPR(State);
9469 if (HiReg) {
9470 State.addLoc(
9471 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo));
9472 } else {
9473 int64_t StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4));
9474 State.addLoc(
9475 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
9476 }
9477 return false;
9478 }
9479
9480 // Split arguments might be passed indirectly, so keep track of the pending
9481 // values.
9482 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9483 LocVT = GRLenVT;
9484 LocInfo = CCValAssign::Indirect;
9485 PendingLocs.push_back(
9486 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
9487 PendingArgFlags.push_back(Elt: ArgFlags);
9488 if (!ArgFlags.isSplitEnd()) {
9489 return false;
9490 }
9491 }
9492
9493 // If the split argument only had two elements, it should be passed directly
9494 // in registers or on the stack.
9495 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9496 PendingLocs.size() <= 2) {
9497 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9498 // Apply the normal calling convention rules to the first half of the
9499 // split argument.
9500 CCValAssign VA = PendingLocs[0];
9501 ISD::ArgFlagsTy AF = PendingArgFlags[0];
9502 PendingLocs.clear();
9503 PendingArgFlags.clear();
9504 return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
9505 ArgFlags2: ArgFlags);
9506 }
9507
9508 // Allocate to a register if possible, or else a stack slot.
9509 Register Reg;
9510 unsigned StoreSizeBytes = GRLen / 8;
9511 Align StackAlign = Align(GRLen / 8);
9512
9513 if (ValVT == MVT::f32 && !UseGPRForFloat) {
9514 Reg = State.AllocateReg(Regs: ArgFPR32s);
9515 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
9516 Reg = State.AllocateReg(Regs: ArgFPR64s);
9517 } else if (ValVT.is128BitVector()) {
9518 Reg = State.AllocateReg(Regs: ArgVRs);
9519 UseGPRForFloat = false;
9520 StoreSizeBytes = 16;
9521 StackAlign = Align(16);
9522 } else if (ValVT.is256BitVector()) {
9523 Reg = State.AllocateReg(Regs: ArgXRs);
9524 UseGPRForFloat = false;
9525 StoreSizeBytes = 32;
9526 StackAlign = Align(32);
9527 } else {
9528 Reg = allocateArgGPR(State);
9529 }
9530
9531 unsigned StackOffset =
9532 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
9533
9534 // If we reach this point and PendingLocs is non-empty, we must be at the
9535 // end of a split argument that must be passed indirectly.
9536 if (!PendingLocs.empty()) {
9537 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9538 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9539 for (auto &It : PendingLocs) {
9540 if (Reg)
9541 It.convertToReg(Reg);
9542 else
9543 It.convertToMem(Offset: StackOffset);
9544 State.addLoc(V: It);
9545 }
9546 PendingLocs.clear();
9547 PendingArgFlags.clear();
9548 return false;
9549 }
9550 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
9551 "Expected an GRLenVT at this stage");
9552
9553 if (Reg) {
9554 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
9555 return false;
9556 }
9557
9558 // When a floating-point value is passed on the stack, no bit-cast is needed.
9559 if (ValVT.isFloatingPoint()) {
9560 LocVT = ValVT;
9561 LocInfo = CCValAssign::Full;
9562 }
9563
9564 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
9565 return false;
9566}
9567
9568void LoongArchTargetLowering::analyzeInputArgs(
9569 MachineFunction &MF, CCState &CCInfo,
9570 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9571 LoongArchCCAssignFn Fn) const {
9572 FunctionType *FType = MF.getFunction().getFunctionType();
9573 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9574 MVT ArgVT = Ins[i].VT;
9575 Type *ArgTy = nullptr;
9576 if (IsRet)
9577 ArgTy = FType->getReturnType();
9578 else if (Ins[i].isOrigArg())
9579 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
9580 LoongArchABI::ABI ABI =
9581 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9582 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9583 CCInfo, IsRet, ArgTy)) {
9584 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9585 << '\n');
9586 llvm_unreachable("");
9587 }
9588 }
9589}
9590
9591void LoongArchTargetLowering::analyzeOutputArgs(
9592 MachineFunction &MF, CCState &CCInfo,
9593 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9594 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9595 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9596 MVT ArgVT = Outs[i].VT;
9597 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9598 LoongArchABI::ABI ABI =
9599 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9600 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9601 CCInfo, IsRet, OrigTy)) {
9602 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9603 << "\n");
9604 llvm_unreachable("");
9605 }
9606 }
9607}
9608
9609// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9610// values.
9611static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
9612 const CCValAssign &VA, const SDLoc &DL) {
9613 switch (VA.getLocInfo()) {
9614 default:
9615 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9616 case CCValAssign::Full:
9617 case CCValAssign::Indirect:
9618 break;
9619 case CCValAssign::BCvt:
9620 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9621 Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
9622 else
9623 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
9624 break;
9625 }
9626 return Val;
9627}
9628
9629static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
9630 const CCValAssign &VA, const SDLoc &DL,
9631 const ISD::InputArg &In,
9632 const LoongArchTargetLowering &TLI) {
9633 MachineFunction &MF = DAG.getMachineFunction();
9634 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9635 EVT LocVT = VA.getLocVT();
9636 SDValue Val;
9637 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
9638 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
9639 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
9640 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
9641
9642 // If input is sign extended from 32 bits, note it for the OptW pass.
9643 if (In.isOrigArg()) {
9644 Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
9645 if (OrigArg->getType()->isIntegerTy()) {
9646 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9647 // An input zero extended from i31 can also be considered sign extended.
9648 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9649 (BitWidth < 32 && In.Flags.isZExt())) {
9650 LoongArchMachineFunctionInfo *LAFI =
9651 MF.getInfo<LoongArchMachineFunctionInfo>();
9652 LAFI->addSExt32Register(Reg: VReg);
9653 }
9654 }
9655 }
9656
9657 return convertLocVTToValVT(DAG, Val, VA, DL);
9658}
9659
9660// The caller is responsible for loading the full value if the argument is
9661// passed with CCValAssign::Indirect.
9662static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
9663 const CCValAssign &VA, const SDLoc &DL) {
9664 MachineFunction &MF = DAG.getMachineFunction();
9665 MachineFrameInfo &MFI = MF.getFrameInfo();
9666 EVT ValVT = VA.getValVT();
9667 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
9668 /*IsImmutable=*/true);
9669 SDValue FIN = DAG.getFrameIndex(
9670 FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)));
9671
9672 ISD::LoadExtType ExtType;
9673 switch (VA.getLocInfo()) {
9674 default:
9675 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9676 case CCValAssign::Full:
9677 case CCValAssign::Indirect:
9678 case CCValAssign::BCvt:
9679 ExtType = ISD::NON_EXTLOAD;
9680 break;
9681 }
9682 return DAG.getExtLoad(
9683 ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
9684 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
9685}
9686
9687static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
9688 const CCValAssign &VA,
9689 const CCValAssign &HiVA,
9690 const SDLoc &DL) {
9691 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9692 "Unexpected VA");
9693 MachineFunction &MF = DAG.getMachineFunction();
9694 MachineFrameInfo &MFI = MF.getFrameInfo();
9695 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9696
9697 assert(VA.isRegLoc() && "Expected register VA assignment");
9698
9699 Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
9700 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
9701 SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
9702 SDValue Hi;
9703 if (HiVA.isMemLoc()) {
9704 // Second half of f64 is passed on the stack.
9705 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(),
9706 /*IsImmutable=*/true);
9707 SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
9708 Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
9709 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
9710 } else {
9711 // Second half of f64 is passed in another GPR.
9712 Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
9713 RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
9714 Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
9715 }
9716 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
9717}
9718
9719static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
9720 const CCValAssign &VA, const SDLoc &DL) {
9721 EVT LocVT = VA.getLocVT();
9722
9723 switch (VA.getLocInfo()) {
9724 default:
9725 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9726 case CCValAssign::Full:
9727 break;
9728 case CCValAssign::BCvt:
9729 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9730 Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
9731 else
9732 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
9733 break;
9734 }
9735 return Val;
9736}
9737
9738static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9739 CCValAssign::LocInfo LocInfo,
9740 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9741 CCState &State) {
9742 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9743 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9744 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9745 static const MCPhysReg GPRList[] = {
9746 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9747 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9748 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9749 if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) {
9750 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
9751 return false;
9752 }
9753 }
9754
9755 if (LocVT == MVT::f32) {
9756 // Pass in STG registers: F1, F2, F3, F4
9757 // fs0,fs1,fs2,fs3
9758 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9759 LoongArch::F26, LoongArch::F27};
9760 if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) {
9761 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
9762 return false;
9763 }
9764 }
9765
9766 if (LocVT == MVT::f64) {
9767 // Pass in STG registers: D1, D2, D3, D4
9768 // fs4,fs5,fs6,fs7
9769 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9770 LoongArch::F30_64, LoongArch::F31_64};
9771 if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) {
9772 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
9773 return false;
9774 }
9775 }
9776
9777 report_fatal_error(reason: "No registers left in GHC calling convention");
9778 return true;
9779}
9780
9781// Transform physical registers into virtual registers.
9782SDValue LoongArchTargetLowering::LowerFormalArguments(
9783 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9784 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9785 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9786
9787 MachineFunction &MF = DAG.getMachineFunction();
9788
9789 switch (CallConv) {
9790 default:
9791 llvm_unreachable("Unsupported calling convention");
9792 case CallingConv::C:
9793 case CallingConv::Fast:
9794 case CallingConv::PreserveNone:
9795 case CallingConv::PreserveMost:
9796 break;
9797 case CallingConv::GHC:
9798 if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) ||
9799 !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
9800 report_fatal_error(
9801 reason: "GHC calling convention requires the F and D extensions");
9802 }
9803
9804 const Function &Func = MF.getFunction();
9805 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
9806 MVT GRLenVT = Subtarget.getGRLenVT();
9807 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9808
9809 // Check if this function has any musttail calls. If so, incoming indirect
9810 // arg pointers must be saved in virtual registers so they survive across
9811 // basic blocks (the SelectionDAG is cleared between BBs). Only do this
9812 // when needed to avoid adding register pressure to non-musttail functions.
9813 bool HasMusttail = llvm::any_of(Range: Func, P: [](const BasicBlock &BB) {
9814 return llvm::any_of(Range: BB, P: [](const Instruction &I) {
9815 if (const auto *CI = dyn_cast<CallInst>(Val: &I))
9816 return CI->isMustTailCall();
9817 return false;
9818 });
9819 });
9820 // Used with varargs to acumulate store chains.
9821 std::vector<SDValue> OutChains;
9822
9823 // Assign locations to all of the incoming arguments.
9824 SmallVector<CCValAssign> ArgLocs;
9825 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9826
9827 if (CallConv == CallingConv::GHC)
9828 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
9829 else
9830 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch);
9831
9832 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9833 CCValAssign &VA = ArgLocs[i];
9834 SDValue ArgValue;
9835 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9836 // case.
9837 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9838 assert(VA.needsCustom());
9839 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL);
9840 } else if (VA.isRegLoc())
9841 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this);
9842 else
9843 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9844 if (VA.getLocInfo() == CCValAssign::Indirect) {
9845 // If the original argument was split and passed by reference, we need to
9846 // load all parts of it here (using the same address).
9847 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
9848 PtrInfo: MachinePointerInfo()));
9849 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9850 if (HasMusttail) {
9851 LoongArchMachineFunctionInfo *LAFI =
9852 MF.getInfo<LoongArchMachineFunctionInfo>();
9853 Register VReg =
9854 MF.getRegInfo().createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
9855 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VReg, N: ArgValue);
9856 LAFI->setIncomingIndirectArg(ArgIndex, Reg: VReg);
9857 }
9858 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9859 assert(ArgPartOffset == 0);
9860 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9861 CCValAssign &PartVA = ArgLocs[i + 1];
9862 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9863 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
9864 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
9865 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
9866 PtrInfo: MachinePointerInfo()));
9867 ++i;
9868 ++InsIdx;
9869 }
9870 continue;
9871 }
9872 InVals.push_back(Elt: ArgValue);
9873 }
9874
9875 if (IsVarArg) {
9876 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
9877 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
9878 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9879 MachineFrameInfo &MFI = MF.getFrameInfo();
9880 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9881 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9882
9883 // Offset of the first variable argument from stack pointer, and size of
9884 // the vararg save area. For now, the varargs save area is either zero or
9885 // large enough to hold a0-a7.
9886 int VaArgOffset, VarArgsSaveSize;
9887
9888 // If all registers are allocated, then all varargs must be passed on the
9889 // stack and we don't need to save any argregs.
9890 if (ArgRegs.size() == Idx) {
9891 VaArgOffset = CCInfo.getStackSize();
9892 VarArgsSaveSize = 0;
9893 } else {
9894 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9895 VaArgOffset = -VarArgsSaveSize;
9896 }
9897
9898 // Record the frame index of the first variable argument
9899 // which is a value necessary to VASTART.
9900 int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
9901 LoongArchFI->setVarArgsFrameIndex(FI);
9902
9903 // If saving an odd number of registers then create an extra stack slot to
9904 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9905 // offsets to even-numbered registered remain 2*GRLen-aligned.
9906 if (Idx % 2) {
9907 MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
9908 IsImmutable: true);
9909 VarArgsSaveSize += GRLenInBytes;
9910 }
9911
9912 // Copy the integer registers that may have been used for passing varargs
9913 // to the vararg save area.
9914 for (unsigned I = Idx; I < ArgRegs.size();
9915 ++I, VaArgOffset += GRLenInBytes) {
9916 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
9917 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
9918 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
9919 FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
9920 SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
9921 SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
9922 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
9923 cast<StoreSDNode>(Val: Store.getNode())
9924 ->getMemOperand()
9925 ->setValue((Value *)nullptr);
9926 OutChains.push_back(x: Store);
9927 }
9928 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9929 }
9930
9931 // All stores are grouped in one node to allow the matching between
9932 // the size of Ins and InVals. This only happens for vararg functions.
9933 if (!OutChains.empty()) {
9934 OutChains.push_back(x: Chain);
9935 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
9936 }
9937
9938 return Chain;
9939}
9940
9941bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
9942 return CI->isTailCall();
9943}
9944
9945// Check if the return value is used as only a return value, as otherwise
9946// we can't perform a tail-call.
9947bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
9948 SDValue &Chain) const {
9949 if (N->getNumValues() != 1)
9950 return false;
9951 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
9952 return false;
9953
9954 SDNode *Copy = *N->user_begin();
9955 if (Copy->getOpcode() != ISD::CopyToReg)
9956 return false;
9957
9958 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9959 // isn't safe to perform a tail call.
9960 if (Copy->getGluedNode())
9961 return false;
9962
9963 // The copy must be used by a LoongArchISD::RET, and nothing else.
9964 bool HasRet = false;
9965 for (SDNode *Node : Copy->users()) {
9966 if (Node->getOpcode() != LoongArchISD::RET)
9967 return false;
9968 HasRet = true;
9969 }
9970
9971 if (!HasRet)
9972 return false;
9973
9974 Chain = Copy->getOperand(Num: 0);
9975 return true;
9976}
9977
9978// Check whether the call is eligible for tail call optimization.
9979bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9980 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9981 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9982
9983 auto CalleeCC = CLI.CallConv;
9984 auto &Outs = CLI.Outs;
9985 auto &Caller = MF.getFunction();
9986 auto CallerCC = Caller.getCallingConv();
9987
9988 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
9989
9990 // Byval parameters hand the function a pointer directly into the stack area
9991 // we want to reuse during a tail call. Working around this *is* possible
9992 // but less efficient and uglier in LowerCall. For musttail, there is no
9993 // workaround today: a byval arg requires a local copy that becomes invalid
9994 // after the tail call deallocates the caller's frame, so rejecting here
9995 // (and triggering reportFatalInternalError in LowerCall) is safer than
9996 // miscompiling.
9997 for (auto &Arg : Outs)
9998 if (Arg.Flags.isByVal())
9999 return false;
10000
10001 // musttail bypasses the remaining checks: the checks either reject cases
10002 // we handle specially (indirect args are forwarded via incoming pointers,
10003 // stack-passed args reuse the matching incoming layout, sret is forwarded
10004 // like any other pointer arg) or are optimizations not applicable to
10005 // mandatory tail calls.
10006 if (IsMustTail)
10007 return true;
10008
10009 // Do not tail call opt if the stack is used to pass parameters.
10010 if (CCInfo.getStackSize() != 0)
10011 return false;
10012
10013 // Do not tail call opt if any parameters need to be passed indirectly.
10014 for (auto &VA : ArgLocs)
10015 if (VA.getLocInfo() == CCValAssign::Indirect)
10016 return false;
10017
10018 // Do not tail call opt if either caller or callee uses struct return
10019 // semantics.
10020 auto IsCallerStructRet = Caller.hasStructRetAttr();
10021 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
10022 if (IsCallerStructRet || IsCalleeStructRet)
10023 return false;
10024
10025 // The callee has to preserve all registers the caller needs to preserve.
10026 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
10027 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
10028 if (CalleeCC != CallerCC) {
10029 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
10030 if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
10031 return false;
10032 }
10033 return true;
10034}
10035
10036static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
10037 return DAG.getDataLayout().getPrefTypeAlign(
10038 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
10039}
10040
10041// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
10042// and output parameter nodes.
10043SDValue
10044LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
10045 SmallVectorImpl<SDValue> &InVals) const {
10046 SelectionDAG &DAG = CLI.DAG;
10047 SDLoc &DL = CLI.DL;
10048 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
10049 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
10050 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
10051 SDValue Chain = CLI.Chain;
10052 SDValue Callee = CLI.Callee;
10053 CallingConv::ID CallConv = CLI.CallConv;
10054 bool IsVarArg = CLI.IsVarArg;
10055 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10056 MVT GRLenVT = Subtarget.getGRLenVT();
10057 bool &IsTailCall = CLI.IsTailCall;
10058
10059 MachineFunction &MF = DAG.getMachineFunction();
10060
10061 // Analyze the operands of the call, assigning locations to each operand.
10062 SmallVector<CCValAssign> ArgLocs;
10063 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
10064
10065 if (CallConv == CallingConv::GHC)
10066 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
10067 else
10068 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch);
10069
10070 // Check if it's really possible to do a tail call.
10071 if (IsTailCall)
10072 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
10073
10074 if (IsTailCall)
10075 ++NumTailCalls;
10076 else if (CLI.CB && CLI.CB->isMustTailCall())
10077 report_fatal_error(reason: "failed to perform tail call elimination on a call "
10078 "site marked musttail");
10079
10080 // Get a count of how many bytes are to be pushed on the stack.
10081 unsigned NumBytes = ArgCCInfo.getStackSize();
10082
10083 // Create local copies for byval args.
10084 SmallVector<SDValue> ByValArgs;
10085 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10086 ISD::ArgFlagsTy Flags = Outs[i].Flags;
10087 if (!Flags.isByVal())
10088 continue;
10089
10090 SDValue Arg = OutVals[i];
10091 unsigned Size = Flags.getByValSize();
10092 Align Alignment = Flags.getNonZeroByValAlign();
10093
10094 int FI =
10095 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
10096 SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
10097 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
10098
10099 Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, DstAlign: Alignment, SrcAlign: Alignment,
10100 /*IsVolatile=*/isVol: false,
10101 /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
10102 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
10103 ByValArgs.push_back(Elt: FIPtr);
10104 }
10105
10106 if (!IsTailCall)
10107 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
10108
10109 // Copy argument values to their designated locations.
10110 SmallVector<std::pair<Register, SDValue>> RegsToPass;
10111 SmallVector<SDValue> MemOpChains;
10112 SDValue StackPtr;
10113 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
10114 ++i, ++OutIdx) {
10115 CCValAssign &VA = ArgLocs[i];
10116 SDValue ArgValue = OutVals[OutIdx];
10117 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
10118
10119 // Handle passing f64 on LA32D with a soft float ABI as a special case.
10120 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10121 assert(VA.isRegLoc() && "Expected register VA assignment");
10122 assert(VA.needsCustom());
10123 SDValue SplitF64 =
10124 DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
10125 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
10126 SDValue Lo = SplitF64.getValue(R: 0);
10127 SDValue Hi = SplitF64.getValue(R: 1);
10128
10129 Register RegLo = VA.getLocReg();
10130 RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
10131
10132 // Get the CCValAssign for the Hi part.
10133 CCValAssign &HiVA = ArgLocs[++i];
10134
10135 if (HiVA.isMemLoc()) {
10136 // Second half of f64 is passed on the stack.
10137 if (!StackPtr.getNode())
10138 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
10139 SDValue Address =
10140 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
10141 N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
10142 // Emit the store.
10143 MemOpChains.push_back(Elt: DAG.getStore(
10144 Chain, dl: DL, Val: Hi, Ptr: Address,
10145 PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
10146 } else {
10147 // Second half of f64 is passed in another GPR.
10148 Register RegHigh = HiVA.getLocReg();
10149 RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
10150 }
10151 continue;
10152 }
10153
10154 // Promote the value if needed.
10155 // For now, only handle fully promoted and indirect arguments.
10156 if (VA.getLocInfo() == CCValAssign::Indirect) {
10157 // For musttail calls, reuse incoming indirect pointers instead of
10158 // creating new stack temporaries. The incoming pointers point to the
10159 // caller's caller's frame, which remains valid after a tail call.
10160 if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
10161 LoongArchMachineFunctionInfo *LAFI =
10162 MF.getInfo<LoongArchMachineFunctionInfo>();
10163 unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
10164
10165 // Resolve which formal parameter is being passed at this call
10166 // position.
10167 //
10168 // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),
10169 // but Outs[].OrigArgIndex is an index into a filtered arg list
10170 // (empty types removed, via CallLoweringInfo in the target-
10171 // independent layer). IncomingIndirectArgs is keyed by the
10172 // caller's unfiltered Argument::getArgNo(), so we have to walk
10173 // the caller's formals (same filter) to translate the index.
10174 // This target-independent asymmetry should be normalized so
10175 // backends do not need to re-derive the mapping.
10176 //
10177 // Steps:
10178 // 1. Find the call operand at filtered position CallArgIdx.
10179 // 2. If it is an Argument, use getArgNo() directly (same filter
10180 // for caller formals and call operands).
10181 // 3. Otherwise (computed value), walk the caller's formals and
10182 // skip empty types to map the filtered index to getArgNo().
10183 const Argument *FormalArg = nullptr;
10184 unsigned FilteredIdx = 0;
10185 for (const auto &CallArg : CLI.CB->args()) {
10186 if (CallArg->getType()->isEmptyTy())
10187 continue;
10188 if (FilteredIdx == CallArgIdx) {
10189 FormalArg = dyn_cast<Argument>(Val: CallArg);
10190 break;
10191 }
10192 ++FilteredIdx;
10193 }
10194
10195 // For forwarded args, getArgNo() gives the unfiltered index directly.
10196 // For computed args, walk the caller's formals to resolve it.
10197 unsigned FormalArgIdx = CallArgIdx;
10198 if (FormalArg) {
10199 FormalArgIdx = FormalArg->getArgNo();
10200 } else {
10201 FilteredIdx = 0;
10202 for (const auto &Arg : MF.getFunction().args()) {
10203 if (Arg.getType()->isEmptyTy())
10204 continue;
10205 if (FilteredIdx == CallArgIdx) {
10206 FormalArgIdx = Arg.getArgNo();
10207 break;
10208 }
10209 ++FilteredIdx;
10210 }
10211 }
10212
10213 Register VReg = LAFI->getIncomingIndirectArg(ArgIndex: FormalArgIdx);
10214 SDValue CopyOp = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: PtrVT);
10215 // Thread the CopyFromReg output chain through MemOpChains so the
10216 // TokenFactor below sequences the copy with any stores we emit
10217 // for this argument.
10218 MemOpChains.push_back(Elt: CopyOp.getValue(R: 1));
10219 SDValue IncomingPtr = CopyOp;
10220
10221 if (!FormalArg) {
10222 // Computed value: store into the incoming indirect pointer for the
10223 // same-position formal parameter (musttail guarantees matching
10224 // prototypes, so types match). The pointer survives the tail call
10225 // since it points to the caller's caller's frame.
10226 //
10227 // The data-flow edge through IncomingPtr already prevents the
10228 // store from being scheduled before the CopyFromReg. Threading
10229 // CopyOp.getValue(1) (the copy's output chain) into the store
10230 // makes that ordering explicit on the chain edge as well, which
10231 // is the convention for memory ops chaining off their producers.
10232 MemOpChains.push_back(
10233 Elt: DAG.getStore(Chain: CopyOp.getValue(R: 1), dl: DL, Val: ArgValue, Ptr: IncomingPtr,
10234 PtrInfo: MachinePointerInfo::getUnknownStack(MF)));
10235 // Store any split parts at their respective offsets.
10236 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10237 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10238 SDValue PartValue = OutVals[OutIdx + 1];
10239 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10240 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
10241 SDValue Addr =
10242 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: IncomingPtr, N2: Offset);
10243 MemOpChains.push_back(
10244 Elt: DAG.getStore(Chain: CopyOp.getValue(R: 1), dl: DL, Val: PartValue, Ptr: Addr,
10245 PtrInfo: MachinePointerInfo::getUnknownStack(MF)));
10246 ++i;
10247 ++OutIdx;
10248 }
10249 }
10250 ArgValue = IncomingPtr;
10251
10252 // Skip any remaining split parts (for forwarded args, they are
10253 // covered by the forwarded pointer).
10254 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10255 ++i;
10256 ++OutIdx;
10257 }
10258 } else {
10259 // Store the argument in a stack slot and pass its address.
10260 Align StackAlign =
10261 std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG),
10262 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
10263 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
10264 // If the original argument was split and passed by reference, we need
10265 // to store the required parts of it here (and pass just one address).
10266 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
10267 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10268 assert(ArgPartOffset == 0);
10269 // Calculate the total size to store. We don't have access to what we're
10270 // actually storing other than performing the loop and collecting the
10271 // info.
10272 SmallVector<std::pair<SDValue, SDValue>> Parts;
10273 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
10274 SDValue PartValue = OutVals[OutIdx + 1];
10275 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10276 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
10277 EVT PartVT = PartValue.getValueType();
10278 StoredSize += PartVT.getStoreSize();
10279 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
10280 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
10281 ++i;
10282 ++OutIdx;
10283 }
10284 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
10285 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
10286 MemOpChains.push_back(
10287 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
10288 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
10289 for (const auto &Part : Parts) {
10290 SDValue PartValue = Part.first;
10291 SDValue PartOffset = Part.second;
10292 SDValue Address =
10293 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
10294 MemOpChains.push_back(
10295 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
10296 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
10297 }
10298 ArgValue = SpillSlot;
10299 }
10300 } else {
10301 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
10302 }
10303
10304 // Use local copy if it is a byval arg.
10305 if (Flags.isByVal())
10306 ArgValue = ByValArgs[j++];
10307
10308 if (VA.isRegLoc()) {
10309 // Queue up the argument copies and emit them at the end.
10310 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
10311 } else {
10312 assert(VA.isMemLoc() && "Argument not register or memory");
10313 assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&
10314 "Tail call not allowed if stack is used for passing parameters");
10315
10316 // Work out the address of the stack slot.
10317 if (!StackPtr.getNode())
10318 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
10319 SDValue Address =
10320 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
10321 N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
10322
10323 // Emit the store.
10324 MemOpChains.push_back(
10325 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo()));
10326 }
10327 }
10328
10329 // Join the stores, which are independent of one another.
10330 if (!MemOpChains.empty())
10331 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
10332
10333 SDValue Glue;
10334
10335 // Build a sequence of copy-to-reg nodes, chained and glued together.
10336 for (auto &Reg : RegsToPass) {
10337 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
10338 Glue = Chain.getValue(R: 1);
10339 }
10340
10341 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
10342 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
10343 // split it and then direct call can be matched by PseudoCALL_SMALL.
10344 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
10345 const GlobalValue *GV = S->getGlobal();
10346 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
10347 ? LoongArchII::MO_CALL
10348 : LoongArchII::MO_CALL_PLT;
10349 Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags);
10350 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
10351 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
10352 ? LoongArchII::MO_CALL
10353 : LoongArchII::MO_CALL_PLT;
10354 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
10355 }
10356
10357 // The first call operand is the chain and the second is the target address.
10358 SmallVector<SDValue> Ops;
10359 Ops.push_back(Elt: Chain);
10360 Ops.push_back(Elt: Callee);
10361
10362 // Add argument registers to the end of the list so that they are
10363 // known live into the call.
10364 for (auto &Reg : RegsToPass)
10365 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
10366
10367 if (!IsTailCall) {
10368 // Add a register mask operand representing the call-preserved registers.
10369 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
10370 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
10371 assert(Mask && "Missing call preserved mask for calling convention");
10372 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
10373 }
10374
10375 // Glue the call to the argument copies, if any.
10376 if (Glue.getNode())
10377 Ops.push_back(Elt: Glue);
10378
10379 // Emit the call.
10380 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
10381 unsigned Op;
10382 switch (DAG.getTarget().getCodeModel()) {
10383 default:
10384 report_fatal_error(reason: "Unsupported code model");
10385 case CodeModel::Small:
10386 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
10387 break;
10388 case CodeModel::Medium:
10389 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
10390 break;
10391 case CodeModel::Large:
10392 assert(Subtarget.is64Bit() && "Large code model requires LA64");
10393 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
10394 break;
10395 }
10396
10397 if (IsTailCall) {
10398 MF.getFrameInfo().setHasTailCall();
10399 SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
10400 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
10401 return Ret;
10402 }
10403
10404 Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
10405 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
10406 Glue = Chain.getValue(R: 1);
10407
10408 // Mark the end of the call, which is glued to the call itself.
10409 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
10410 Glue = Chain.getValue(R: 1);
10411
10412 // Assign locations to each value returned by this call.
10413 SmallVector<CCValAssign> RVLocs;
10414 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
10415 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch);
10416
10417 // Copy all of the result registers out of their specified physreg.
10418 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
10419 auto &VA = RVLocs[i];
10420 // Copy the value out.
10421 SDValue RetValue =
10422 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
10423 // Glue the RetValue to the end of the call sequence.
10424 Chain = RetValue.getValue(R: 1);
10425 Glue = RetValue.getValue(R: 2);
10426
10427 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10428 assert(VA.needsCustom());
10429 SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs[++i].getLocReg(),
10430 VT: MVT::i32, Glue);
10431 Chain = RetValue2.getValue(R: 1);
10432 Glue = RetValue2.getValue(R: 2);
10433 RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64,
10434 N1: RetValue, N2: RetValue2);
10435 } else
10436 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
10437
10438 InVals.push_back(Elt: RetValue);
10439 }
10440
10441 return Chain;
10442}
10443
10444bool LoongArchTargetLowering::CanLowerReturn(
10445 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
10446 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
10447 const Type *RetTy) const {
10448 SmallVector<CCValAssign> RVLocs;
10449 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
10450
10451 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10452 LoongArchABI::ABI ABI =
10453 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
10454 if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full,
10455 ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsRet=*/true, OrigTy: nullptr))
10456 return false;
10457 }
10458 return true;
10459}
10460
10461SDValue LoongArchTargetLowering::LowerReturn(
10462 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
10463 const SmallVectorImpl<ISD::OutputArg> &Outs,
10464 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
10465 SelectionDAG &DAG) const {
10466 // Stores the assignment of the return value to a location.
10467 SmallVector<CCValAssign> RVLocs;
10468
10469 // Info about the registers and stack slot.
10470 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10471 *DAG.getContext());
10472
10473 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10474 CLI: nullptr, Fn: CC_LoongArch);
10475 if (CallConv == CallingConv::GHC && !RVLocs.empty())
10476 report_fatal_error(reason: "GHC functions return void only");
10477 SDValue Glue;
10478 SmallVector<SDValue, 4> RetOps(1, Chain);
10479
10480 // Copy the result values into the output registers.
10481 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
10482 SDValue Val = OutVals[OutIdx];
10483 CCValAssign &VA = RVLocs[i];
10484 assert(VA.isRegLoc() && "Can only return in registers!");
10485
10486 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10487 // Handle returning f64 on LA32D with a soft float ABI.
10488 assert(VA.isRegLoc() && "Expected return via registers");
10489 assert(VA.needsCustom());
10490 SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
10491 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
10492 SDValue Lo = SplitF64.getValue(R: 0);
10493 SDValue Hi = SplitF64.getValue(R: 1);
10494 Register RegLo = VA.getLocReg();
10495 Register RegHi = RVLocs[++i].getLocReg();
10496
10497 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
10498 Glue = Chain.getValue(R: 1);
10499 RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
10500 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
10501 Glue = Chain.getValue(R: 1);
10502 RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
10503 } else {
10504 // Handle a 'normal' return.
10505 Val = convertValVTToLocVT(DAG, Val, VA, DL);
10506 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
10507
10508 // Guarantee that all emitted copies are stuck together.
10509 Glue = Chain.getValue(R: 1);
10510 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
10511 }
10512 }
10513
10514 RetOps[0] = Chain; // Update chain.
10515
10516 // Add the glue node if we have it.
10517 if (Glue.getNode())
10518 RetOps.push_back(Elt: Glue);
10519
10520 return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
10521}
10522
10523// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
10524// Note: The following prefixes are excluded:
10525// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
10526// as they can be represented using [x]vrepli.[whb]
10527std::pair<bool, uint64_t> LoongArchTargetLowering::isImmVLDILegalForMode1(
10528 const APInt &SplatValue, const unsigned SplatBitSize) const {
10529 uint64_t RequiredImm = 0;
10530 uint64_t V = SplatValue.getZExtValue();
10531 if (SplatBitSize == 16 && !(V & 0x00FF)) {
10532 // 4'b0101
10533 RequiredImm = (0b10101 << 8) | (V >> 8);
10534 return {true, RequiredImm};
10535 } else if (SplatBitSize == 32) {
10536 // 4'b0001
10537 if (!(V & 0xFFFF00FF)) {
10538 RequiredImm = (0b10001 << 8) | (V >> 8);
10539 return {true, RequiredImm};
10540 }
10541 // 4'b0010
10542 if (!(V & 0xFF00FFFF)) {
10543 RequiredImm = (0b10010 << 8) | (V >> 16);
10544 return {true, RequiredImm};
10545 }
10546 // 4'b0011
10547 if (!(V & 0x00FFFFFF)) {
10548 RequiredImm = (0b10011 << 8) | (V >> 24);
10549 return {true, RequiredImm};
10550 }
10551 // 4'b0110
10552 if ((V & 0xFFFF00FF) == 0xFF) {
10553 RequiredImm = (0b10110 << 8) | (V >> 8);
10554 return {true, RequiredImm};
10555 }
10556 // 4'b0111
10557 if ((V & 0xFF00FFFF) == 0xFFFF) {
10558 RequiredImm = (0b10111 << 8) | (V >> 16);
10559 return {true, RequiredImm};
10560 }
10561 // 4'b1010
10562 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
10563 RequiredImm =
10564 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10565 return {true, RequiredImm};
10566 }
10567 } else if (SplatBitSize == 64) {
10568 // 4'b1011
10569 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
10570 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
10571 RequiredImm =
10572 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10573 return {true, RequiredImm};
10574 }
10575 // 4'b1100
10576 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
10577 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
10578 RequiredImm =
10579 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
10580 return {true, RequiredImm};
10581 }
10582 // 4'b1001
10583 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
10584 uint8_t res = 0;
10585 for (int i = 0; i < 8; ++i) {
10586 uint8_t byte = x & 0xFF;
10587 if (byte == 0 || byte == 0xFF)
10588 res |= ((byte & 1) << i);
10589 else
10590 return {false, 0};
10591 x >>= 8;
10592 }
10593 return {true, res};
10594 };
10595 auto [IsSame, Suffix] = sameBitsPreByte(V);
10596 if (IsSame) {
10597 RequiredImm = (0b11001 << 8) | Suffix;
10598 return {true, RequiredImm};
10599 }
10600 }
10601 return {false, RequiredImm};
10602}
10603
10604bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
10605 EVT VT) const {
10606 if (!Subtarget.hasExtLSX())
10607 return false;
10608
10609 if (VT == MVT::f32) {
10610 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
10611 return (masked == 0x3e000000 || masked == 0x40000000);
10612 }
10613
10614 if (VT == MVT::f64) {
10615 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
10616 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
10617 }
10618
10619 return false;
10620}
10621
10622bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
10623 bool ForCodeSize) const {
10624 // TODO: Maybe need more checks here after vector extension is supported.
10625 if (VT == MVT::f32 && !Subtarget.hasBasicF())
10626 return false;
10627 if (VT == MVT::f64 && !Subtarget.hasBasicD())
10628 return false;
10629 return (Imm.isZero() || Imm.isOne() || isFPImmVLDILegal(Imm, VT));
10630}
10631
10632bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
10633 return true;
10634}
10635
10636bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
10637 return true;
10638}
10639
10640bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
10641 const Instruction *I) const {
10642 if (!Subtarget.is64Bit())
10643 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I);
10644
10645 if (isa<LoadInst>(Val: I))
10646 return true;
10647
10648 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
10649 // require fences beacuse we can use amswap_db.[w/d].
10650 Type *Ty = I->getOperand(i: 0)->getType();
10651 if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) {
10652 unsigned Size = Ty->getIntegerBitWidth();
10653 return (Size == 8 || Size == 16);
10654 }
10655
10656 return false;
10657}
10658
10659EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
10660 LLVMContext &Context,
10661 EVT VT) const {
10662 if (!VT.isVector())
10663 return getPointerTy(DL);
10664 return VT.changeVectorElementTypeToInteger();
10665}
10666
10667bool LoongArchTargetLowering::canMergeStoresTo(
10668 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
10669 // Do not merge to float value size (128 or 256 bits) if no implicit
10670 // float attribute is set.
10671 bool NoFloat = MF.getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat);
10672 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
10673 if (NoFloat)
10674 return MemVT.getSizeInBits() <= MaxIntSize;
10675
10676 // Make sure we don't merge greater than our maximum supported vector width.
10677 if (Subtarget.hasExtLASX())
10678 MaxIntSize = 256;
10679 else if (Subtarget.hasExtLSX())
10680 MaxIntSize = 128;
10681
10682 return MemVT.getSizeInBits() <= MaxIntSize;
10683}
10684
10685bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
10686 EVT VT = Y.getValueType();
10687
10688 if (VT.isVector())
10689 return Subtarget.hasExtLSX() && VT.isInteger();
10690
10691 return VT.isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
10692}
10693
10694void LoongArchTargetLowering::getTgtMemIntrinsic(
10695 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
10696 MachineFunction &MF, unsigned Intrinsic) const {
10697 switch (Intrinsic) {
10698 default:
10699 return;
10700 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
10701 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
10702 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
10703 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
10704 IntrinsicInfo Info;
10705 Info.opc = ISD::INTRINSIC_W_CHAIN;
10706 Info.memVT = MVT::i32;
10707 Info.ptrVal = I.getArgOperand(i: 0);
10708 Info.offset = 0;
10709 Info.align = Align(4);
10710 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
10711 MachineMemOperand::MOVolatile;
10712 Infos.push_back(Elt: Info);
10713 return;
10714 // TODO: Add more Intrinsics later.
10715 }
10716 }
10717}
10718
10719// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10720// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10721// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10722// regression, we need to implement it manually.
10723void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
10724 AtomicRMWInst::BinOp Op = AI->getOperation();
10725
10726 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
10727 Op == AtomicRMWInst::And) &&
10728 "Unable to expand");
10729 unsigned MinWordSize = 4;
10730
10731 IRBuilder<> Builder(AI);
10732 LLVMContext &Ctx = Builder.getContext();
10733 const DataLayout &DL = AI->getDataLayout();
10734 Type *ValueType = AI->getType();
10735 Type *WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize * 8);
10736
10737 Value *Addr = AI->getPointerOperand();
10738 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
10739 IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
10740
10741 Value *AlignedAddr = Builder.CreateIntrinsic(
10742 ID: Intrinsic::ptrmask, OverloadTypes: {PtrTy, IntTy},
10743 Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, FMFSource: nullptr,
10744 Name: "AlignedAddr");
10745
10746 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
10747 Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
10748 Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
10749 ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt");
10750 Value *Mask = Builder.CreateShl(
10751 LHS: ConstantInt::get(Ty: WordType,
10752 V: (1 << (DL.getTypeStoreSize(Ty: ValueType) * 8)) - 1),
10753 RHS: ShiftAmt, Name: "Mask");
10754 Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask");
10755 Value *ValOperand_Shifted =
10756 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType),
10757 RHS: ShiftAmt, Name: "ValOperand_Shifted");
10758 Value *NewOperand;
10759 if (Op == AtomicRMWInst::And)
10760 NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand");
10761 else
10762 NewOperand = ValOperand_Shifted;
10763
10764 AtomicRMWInst *NewAI =
10765 Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align(MinWordSize),
10766 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
10767
10768 Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted");
10769 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted");
10770 Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType);
10771 AI->replaceAllUsesWith(V: FinalOldResult);
10772 AI->eraseFromParent();
10773}
10774
10775TargetLowering::AtomicExpansionKind
10776LoongArchTargetLowering::shouldExpandAtomicRMWInIR(
10777 const AtomicRMWInst *AI) const {
10778 // TODO: Add more AtomicRMWInst that needs to be extended.
10779
10780 // Since floating-point operation requires a non-trivial set of data
10781 // operations, use CmpXChg to expand.
10782 if (AI->isFloatingPointOperation() ||
10783 AI->getOperation() == AtomicRMWInst::UIncWrap ||
10784 AI->getOperation() == AtomicRMWInst::UDecWrap ||
10785 AI->getOperation() == AtomicRMWInst::USubCond ||
10786 AI->getOperation() == AtomicRMWInst::USubSat)
10787 return AtomicExpansionKind::CmpXChg;
10788
10789 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10790 (AI->getOperation() == AtomicRMWInst::Xchg ||
10791 AI->getOperation() == AtomicRMWInst::Add ||
10792 AI->getOperation() == AtomicRMWInst::Sub)) {
10793 return AtomicExpansionKind::None;
10794 }
10795
10796 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10797 if (Subtarget.hasLAMCAS()) {
10798 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10799 AI->getOperation() == AtomicRMWInst::Or ||
10800 AI->getOperation() == AtomicRMWInst::Xor))
10801 return AtomicExpansionKind::CustomExpand;
10802 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10803 return AtomicExpansionKind::CmpXChg;
10804 }
10805
10806 if (Size == 8 || Size == 16)
10807 return AtomicExpansionKind::MaskedIntrinsic;
10808 return AtomicExpansionKind::None;
10809}
10810
10811static Intrinsic::ID
10812getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
10813 AtomicRMWInst::BinOp BinOp) {
10814 if (GRLen == 64) {
10815 switch (BinOp) {
10816 default:
10817 llvm_unreachable("Unexpected AtomicRMW BinOp");
10818 case AtomicRMWInst::Xchg:
10819 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10820 case AtomicRMWInst::Add:
10821 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10822 case AtomicRMWInst::Sub:
10823 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10824 case AtomicRMWInst::Nand:
10825 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10826 case AtomicRMWInst::UMax:
10827 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10828 case AtomicRMWInst::UMin:
10829 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10830 case AtomicRMWInst::Max:
10831 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10832 case AtomicRMWInst::Min:
10833 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10834 // TODO: support other AtomicRMWInst.
10835 }
10836 }
10837
10838 if (GRLen == 32) {
10839 switch (BinOp) {
10840 default:
10841 llvm_unreachable("Unexpected AtomicRMW BinOp");
10842 case AtomicRMWInst::Xchg:
10843 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10844 case AtomicRMWInst::Add:
10845 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10846 case AtomicRMWInst::Sub:
10847 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10848 case AtomicRMWInst::Nand:
10849 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10850 case AtomicRMWInst::UMax:
10851 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10852 case AtomicRMWInst::UMin:
10853 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10854 case AtomicRMWInst::Max:
10855 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10856 case AtomicRMWInst::Min:
10857 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10858 // TODO: support other AtomicRMWInst.
10859 }
10860 }
10861
10862 llvm_unreachable("Unexpected GRLen\n");
10863}
10864
10865TargetLowering::AtomicExpansionKind
10866LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
10867 const AtomicCmpXchgInst *CI) const {
10868
10869 if (Subtarget.hasLAMCAS())
10870 return AtomicExpansionKind::None;
10871
10872 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
10873 if (Size == 8 || Size == 16)
10874 return AtomicExpansionKind::MaskedIntrinsic;
10875 return AtomicExpansionKind::None;
10876}
10877
10878Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
10879 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10880 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10881 unsigned GRLen = Subtarget.getGRLen();
10882 AtomicOrdering FailOrd = CI->getFailureOrdering();
10883 Value *FailureOrdering =
10884 Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
10885 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10886 if (GRLen == 64) {
10887 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10888 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
10889 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
10890 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
10891 }
10892 Type *Tys[] = {AlignedAddr->getType()};
10893 Value *Result = Builder.CreateIntrinsic(
10894 ID: CmpXchgIntrID, OverloadTypes: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10895 if (GRLen == 64)
10896 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
10897 return Result;
10898}
10899
10900Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
10901 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10902 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10903 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10904 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10905 // mask, as this produces better code than the LL/SC loop emitted by
10906 // int_loongarch_masked_atomicrmw_xchg.
10907 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10908 isa<ConstantInt>(Val: AI->getValOperand())) {
10909 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
10910 if (CVal->isZero())
10911 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
10912 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
10913 Align: AI->getAlign(), Ordering: Ord);
10914 if (CVal->isMinusOne())
10915 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
10916 Align: AI->getAlign(), Ordering: Ord);
10917 }
10918
10919 unsigned GRLen = Subtarget.getGRLen();
10920 Value *Ordering =
10921 Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
10922 Type *Tys[] = {AlignedAddr->getType()};
10923 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
10924 M: AI->getModule(),
10925 id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), OverloadTys: Tys);
10926
10927 if (GRLen == 64) {
10928 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
10929 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
10930 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
10931 }
10932
10933 Value *Result;
10934
10935 // Must pass the shift amount needed to sign extend the loaded value prior
10936 // to performing a signed comparison for min/max. ShiftAmt is the number of
10937 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10938 // is the number of bits to left+right shift the value in order to
10939 // sign-extend.
10940 if (AI->getOperation() == AtomicRMWInst::Min ||
10941 AI->getOperation() == AtomicRMWInst::Max) {
10942 const DataLayout &DL = AI->getDataLayout();
10943 unsigned ValWidth =
10944 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
10945 Value *SextShamt =
10946 Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
10947 Result = Builder.CreateCall(Callee: LlwOpScwLoop,
10948 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10949 } else {
10950 Result =
10951 Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
10952 }
10953
10954 if (GRLen == 64)
10955 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
10956 return Result;
10957}
10958
10959bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
10960 const MachineFunction &MF, EVT VT) const {
10961 VT = VT.getScalarType();
10962
10963 if (!VT.isSimple())
10964 return false;
10965
10966 switch (VT.getSimpleVT().SimpleTy) {
10967 case MVT::f32:
10968 case MVT::f64:
10969 return true;
10970 default:
10971 break;
10972 }
10973
10974 return false;
10975}
10976
10977Register LoongArchTargetLowering::getExceptionPointerRegister(
10978 const Constant *PersonalityFn) const {
10979 return LoongArch::R4;
10980}
10981
10982Register LoongArchTargetLowering::getExceptionSelectorRegister(
10983 const Constant *PersonalityFn) const {
10984 return LoongArch::R5;
10985}
10986
10987//===----------------------------------------------------------------------===//
10988// Target Optimization Hooks
10989//===----------------------------------------------------------------------===//
10990
10991static int getEstimateRefinementSteps(EVT VT,
10992 const LoongArchSubtarget &Subtarget) {
10993 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10994 // IEEE float has 23 digits and double has 52 digits.
10995 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10996 return RefinementSteps;
10997}
10998
10999static bool
11000isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget) {
11001 assert(Subtarget.hasFrecipe() &&
11002 "Reciprocal estimate queried on unsupported target");
11003
11004 if (!VT.isSimple())
11005 return false;
11006
11007 switch (VT.getSimpleVT().SimpleTy) {
11008 case MVT::f32:
11009 // f32 is the base type for reciprocal estimate instructions.
11010 return true;
11011
11012 case MVT::f64:
11013 return Subtarget.hasBasicD();
11014
11015 case MVT::v4f32:
11016 case MVT::v2f64:
11017 return Subtarget.hasExtLSX();
11018
11019 case MVT::v8f32:
11020 case MVT::v4f64:
11021 return Subtarget.hasExtLASX();
11022
11023 default:
11024 return false;
11025 }
11026}
11027
11028SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
11029 SelectionDAG &DAG, int Enabled,
11030 int &RefinementSteps,
11031 bool &UseOneConstNR,
11032 bool Reciprocal) const {
11033 assert(Enabled != ReciprocalEstimate::Disabled &&
11034 "Enabled should never be Disabled here");
11035
11036 if (!Subtarget.hasFrecipe())
11037 return SDValue();
11038
11039 SDLoc DL(Operand);
11040 EVT VT = Operand.getValueType();
11041
11042 // Check supported types.
11043 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
11044 return SDValue();
11045
11046 // Handle refinement steps.
11047 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11048 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11049
11050 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
11051 UseOneConstNR = false;
11052 SDValue Rsqrt = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand);
11053
11054 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
11055 // are needed (which rely on the reciprocal form), return the raw reciprocal
11056 // estimate.
11057 if (Reciprocal || RefinementSteps > 0)
11058 return Rsqrt;
11059
11060 // Otherwise, return sqrt(x) by multiplying with the operand.
11061 return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Rsqrt);
11062}
11063
11064SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
11065 SelectionDAG &DAG,
11066 int Enabled,
11067 int &RefinementSteps) const {
11068 assert(Enabled != ReciprocalEstimate::Disabled &&
11069 "Enabled should never be Disabled here");
11070
11071 if (!Subtarget.hasFrecipe())
11072 return SDValue();
11073
11074 SDLoc DL(Operand);
11075 EVT VT = Operand.getValueType();
11076
11077 // Check supported types.
11078 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
11079 return SDValue();
11080
11081 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11082 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11083
11084 // FRECIPE computes 1.0 / x.
11085 return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand);
11086}
11087
11088//===----------------------------------------------------------------------===//
11089// LoongArch Inline Assembly Support
11090//===----------------------------------------------------------------------===//
11091
11092LoongArchTargetLowering::ConstraintType
11093LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
11094 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
11095 //
11096 // 'f': A floating-point register (if available).
11097 // 'k': A memory operand whose address is formed by a base register and
11098 // (optionally scaled) index register.
11099 // 'l': A signed 16-bit constant.
11100 // 'm': A memory operand whose address is formed by a base register and
11101 // offset that is suitable for use in instructions with the same
11102 // addressing mode as st.w and ld.w.
11103 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
11104 // instruction)
11105 // 'I': A signed 12-bit constant (for arithmetic instructions).
11106 // 'J': Integer zero.
11107 // 'K': An unsigned 12-bit constant (for logic instructions).
11108 // "ZB": An address that is held in a general-purpose register. The offset is
11109 // zero.
11110 // "ZC": A memory operand whose address is formed by a base register and
11111 // offset that is suitable for use in instructions with the same
11112 // addressing mode as ll.w and sc.w.
11113 if (Constraint.size() == 1) {
11114 switch (Constraint[0]) {
11115 default:
11116 break;
11117 case 'f':
11118 case 'q':
11119 return C_RegisterClass;
11120 case 'l':
11121 case 'I':
11122 case 'J':
11123 case 'K':
11124 return C_Immediate;
11125 case 'k':
11126 return C_Memory;
11127 }
11128 }
11129
11130 if (Constraint == "ZC" || Constraint == "ZB")
11131 return C_Memory;
11132
11133 // 'm' is handled here.
11134 return TargetLowering::getConstraintType(Constraint);
11135}
11136
11137InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
11138 StringRef ConstraintCode) const {
11139 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
11140 .Case(S: "k", Value: InlineAsm::ConstraintCode::k)
11141 .Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
11142 .Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
11143 .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
11144}
11145
11146std::pair<unsigned, const TargetRegisterClass *>
11147LoongArchTargetLowering::getRegForInlineAsmConstraint(
11148 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
11149 // First, see if this is a constraint that directly corresponds to a LoongArch
11150 // register class.
11151 if (Constraint.size() == 1) {
11152 switch (Constraint[0]) {
11153 case 'r':
11154 // TODO: Support fixed vectors up to GRLen?
11155 if (VT.isVector())
11156 break;
11157 return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass);
11158 case 'q':
11159 return std::make_pair(x: 0U, y: &LoongArch::GPRNoR0R1RegClass);
11160 case 'f':
11161 if (Subtarget.hasBasicF() && VT == MVT::f32)
11162 return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass);
11163 if (Subtarget.hasBasicD() && VT == MVT::f64)
11164 return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass);
11165 if (Subtarget.hasExtLSX() &&
11166 TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
11167 return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass);
11168 if (Subtarget.hasExtLASX() &&
11169 TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
11170 return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass);
11171 break;
11172 default:
11173 break;
11174 }
11175 }
11176
11177 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
11178 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
11179 // constraints while the official register name is prefixed with a '$'. So we
11180 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
11181 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
11182 // case insensitive, so no need to convert the constraint to upper case here.
11183 //
11184 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
11185 // decode the usage of register name aliases into their official names. And
11186 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
11187 // official register names.
11188 if (Constraint.starts_with(Prefix: "{$r") || Constraint.starts_with(Prefix: "{$f") ||
11189 Constraint.starts_with(Prefix: "{$vr") || Constraint.starts_with(Prefix: "{$xr")) {
11190 bool IsFP = Constraint[2] == 'f';
11191 std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$');
11192 std::pair<unsigned, const TargetRegisterClass *> R;
11193 R = TargetLowering::getRegForInlineAsmConstraint(
11194 TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
11195 // Match those names to the widest floating point register type available.
11196 if (IsFP) {
11197 unsigned RegNo = R.first;
11198 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
11199 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
11200 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
11201 return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
11202 }
11203 }
11204 }
11205 return R;
11206 }
11207
11208 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11209}
11210
11211void LoongArchTargetLowering::LowerAsmOperandForConstraint(
11212 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
11213 SelectionDAG &DAG) const {
11214 // Currently only support length 1 constraints.
11215 if (Constraint.size() == 1) {
11216 switch (Constraint[0]) {
11217 case 'l':
11218 // Validate & create a 16-bit signed immediate operand.
11219 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
11220 uint64_t CVal = C->getSExtValue();
11221 if (isInt<16>(x: CVal))
11222 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
11223 VT: Subtarget.getGRLenVT()));
11224 }
11225 return;
11226 case 'I':
11227 // Validate & create a 12-bit signed immediate operand.
11228 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
11229 uint64_t CVal = C->getSExtValue();
11230 if (isInt<12>(x: CVal))
11231 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
11232 VT: Subtarget.getGRLenVT()));
11233 }
11234 return;
11235 case 'J':
11236 // Validate & create an integer zero operand.
11237 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
11238 if (C->getZExtValue() == 0)
11239 Ops.push_back(
11240 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
11241 return;
11242 case 'K':
11243 // Validate & create a 12-bit unsigned immediate operand.
11244 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
11245 uint64_t CVal = C->getZExtValue();
11246 if (isUInt<12>(x: CVal))
11247 Ops.push_back(
11248 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
11249 }
11250 return;
11251 default:
11252 break;
11253 }
11254 }
11255 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
11256}
11257
11258#define GET_REGISTER_MATCHER
11259#include "LoongArchGenAsmMatcher.inc"
11260
11261Register
11262LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
11263 const MachineFunction &MF) const {
11264 std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$');
11265 std::string NewRegName = Name.second.str();
11266 Register Reg = MatchRegisterAltName(Name: NewRegName);
11267 if (!Reg)
11268 Reg = MatchRegisterName(Name: NewRegName);
11269 if (!Reg)
11270 return Reg;
11271 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
11272 if (!ReservedRegs.test(Idx: Reg))
11273 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
11274 StringRef(RegName) + "\"."));
11275 return Reg;
11276}
11277
11278bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
11279 EVT VT, SDValue C) const {
11280 // TODO: Support vectors.
11281 if (!VT.isScalarInteger())
11282 return false;
11283
11284 // Omit the optimization if the data size exceeds GRLen.
11285 if (VT.getSizeInBits() > Subtarget.getGRLen())
11286 return false;
11287
11288 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
11289 const APInt &Imm = ConstNode->getAPIntValue();
11290 // Break MUL into (SLLI + ADD/SUB) or ALSL.
11291 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
11292 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
11293 return true;
11294 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
11295 if (ConstNode->hasOneUse() &&
11296 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
11297 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
11298 return true;
11299 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
11300 // in which the immediate has two set bits. Or Break (MUL x, imm)
11301 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
11302 // equals to (1 << s0) - (1 << s1).
11303 if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) {
11304 unsigned Shifts = Imm.countr_zero();
11305 // Reject immediates which can be composed via a single LUI.
11306 if (Shifts >= 12)
11307 return false;
11308 // Reject multiplications can be optimized to
11309 // (SLLI (ALSL x, x, 1/2/3/4), s).
11310 APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
11311 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
11312 return false;
11313 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
11314 // since it needs one more instruction than other 3 cases.
11315 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
11316 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
11317 (ImmSmall - Imm).isPowerOf2())
11318 return true;
11319 }
11320 }
11321
11322 return false;
11323}
11324
11325bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
11326 const AddrMode &AM,
11327 Type *Ty, unsigned AS,
11328 Instruction *I) const {
11329 // LoongArch has four basic addressing modes:
11330 // 1. reg
11331 // 2. reg + 12-bit signed offset
11332 // 3. reg + 14-bit signed offset left-shifted by 2
11333 // 4. reg1 + reg2
11334 // TODO: Add more checks after support vector extension.
11335
11336 // No global is ever allowed as a base.
11337 if (AM.BaseGV)
11338 return false;
11339
11340 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
11341 // with `UAL` feature.
11342 if (!isInt<12>(x: AM.BaseOffs) &&
11343 !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL()))
11344 return false;
11345
11346 switch (AM.Scale) {
11347 case 0:
11348 // "r+i" or just "i", depending on HasBaseReg.
11349 break;
11350 case 1:
11351 // "r+r+i" is not allowed.
11352 if (AM.HasBaseReg && AM.BaseOffs)
11353 return false;
11354 // Otherwise we have "r+r" or "r+i".
11355 break;
11356 case 2:
11357 // "2*r+r" or "2*r+i" is not allowed.
11358 if (AM.HasBaseReg || AM.BaseOffs)
11359 return false;
11360 // Allow "2*r" as "r+r".
11361 break;
11362 default:
11363 return false;
11364 }
11365
11366 return true;
11367}
11368
11369bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
11370 return isInt<12>(x: Imm);
11371}
11372
11373bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
11374 return isInt<12>(x: Imm);
11375}
11376
11377bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11378 // Zexts are free if they can be combined with a load.
11379 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
11380 // poorly with type legalization of compares preferring sext.
11381 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
11382 EVT MemVT = LD->getMemoryVT();
11383 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
11384 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11385 LD->getExtensionType() == ISD::ZEXTLOAD))
11386 return true;
11387 }
11388
11389 return TargetLowering::isZExtFree(Val, VT2);
11390}
11391
11392bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
11393 EVT DstVT) const {
11394 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
11395}
11396
11397bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
11398 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(BitWidth: 32);
11399}
11400
11401bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
11402 // TODO: Support vectors.
11403 if (Y.getValueType().isVector())
11404 return false;
11405
11406 return !isa<ConstantSDNode>(Val: Y);
11407}
11408
11409ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
11410 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
11411 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
11412}
11413
11414bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
11415 Type *Ty, bool IsSigned) const {
11416 if (Subtarget.is64Bit() && Ty->isIntegerTy(BitWidth: 32))
11417 return true;
11418
11419 return IsSigned;
11420}
11421
11422bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
11423 // Return false to suppress the unnecessary extensions if the LibCall
11424 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
11425 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
11426 Type.getSizeInBits() < Subtarget.getGRLen()))
11427 return false;
11428 return true;
11429}
11430
11431// memcpy, and other memory intrinsics, typically tries to use wider load/store
11432// if the source/dest is aligned and the copy size is large enough. We therefore
11433// want to align such objects passed to memory intrinsics.
11434bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
11435 unsigned &MinSize,
11436 Align &PrefAlign) const {
11437 if (!isa<MemIntrinsic>(Val: CI))
11438 return false;
11439
11440 if (Subtarget.is64Bit()) {
11441 MinSize = 8;
11442 PrefAlign = Align(8);
11443 } else {
11444 MinSize = 4;
11445 PrefAlign = Align(4);
11446 }
11447
11448 return true;
11449}
11450
11451TargetLoweringBase::LegalizeTypeAction
11452LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
11453 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
11454 VT.getVectorElementType() != MVT::i1)
11455 return TypeWidenVector;
11456
11457 return TargetLoweringBase::getPreferredVectorAction(VT);
11458}
11459
11460bool LoongArchTargetLowering::splitValueIntoRegisterParts(
11461 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
11462 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
11463 bool IsABIRegCopy = CC.has_value();
11464 EVT ValueVT = Val.getValueType();
11465
11466 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11467 PartVT == MVT::f32) {
11468 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
11469 // nan, and cast to f32.
11470 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
11471 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
11472 Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
11473 N2: DAG.getConstant(Val: 0xFFFF0000, DL, VT: MVT::i32));
11474 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val);
11475 Parts[0] = Val;
11476 return true;
11477 }
11478
11479 return false;
11480}
11481
11482SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
11483 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11484 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
11485 bool IsABIRegCopy = CC.has_value();
11486
11487 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11488 PartVT == MVT::f32) {
11489 SDValue Val = Parts[0];
11490
11491 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
11492 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
11493 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
11494 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
11495 return Val;
11496 }
11497
11498 return SDValue();
11499}
11500
11501MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
11502 CallingConv::ID CC,
11503 EVT VT) const {
11504 // Use f32 to pass f16.
11505 if (VT == MVT::f16 && Subtarget.hasBasicF())
11506 return MVT::f32;
11507
11508 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
11509}
11510
11511unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
11512 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
11513 // Use f32 to pass f16.
11514 if (VT == MVT::f16 && Subtarget.hasBasicF())
11515 return 1;
11516
11517 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
11518}
11519
11520void LoongArchTargetLowering::computeKnownBitsForTargetNode(
11521 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
11522 const SelectionDAG &DAG, unsigned Depth) const {
11523 unsigned Opc = Op.getOpcode();
11524 Known.resetAll();
11525 switch (Opc) {
11526 default:
11527 break;
11528 case LoongArchISD::VPICK_ZEXT_ELT: {
11529 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
11530 EVT VT = cast<VTSDNode>(Val: Op->getOperand(Num: 2))->getVT();
11531 unsigned VTBits = VT.getScalarSizeInBits();
11532 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
11533 Known.Zero.setBitsFrom(VTBits);
11534 break;
11535 }
11536 }
11537}
11538
11539bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
11540 SDValue Op, const APInt &OriginalDemandedBits,
11541 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
11542 unsigned Depth) const {
11543 EVT VT = Op.getValueType();
11544 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
11545 unsigned Opc = Op.getOpcode();
11546 switch (Opc) {
11547 default:
11548 break;
11549 case LoongArchISD::CRC_W_B_W:
11550 case LoongArchISD::CRC_W_H_W:
11551 case LoongArchISD::CRCC_W_B_W:
11552 case LoongArchISD::CRCC_W_H_W: {
11553 KnownBits KnownSrc;
11554 APInt DemandedSrcBits =
11555 APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: (Opc == LoongArchISD::CRC_W_B_W ||
11556 Opc == LoongArchISD::CRCC_W_B_W)
11557 ? 8
11558 : 16);
11559 return SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: DemandedSrcBits,
11560 DemandedElts: OriginalDemandedElts, Known&: KnownSrc, TLO, Depth: Depth + 1);
11561 }
11562 case LoongArchISD::VMSKLTZ:
11563 case LoongArchISD::XVMSKLTZ: {
11564 SDValue Src = Op.getOperand(i: 0);
11565 MVT SrcVT = Src.getSimpleValueType();
11566 unsigned SrcBits = SrcVT.getScalarSizeInBits();
11567 unsigned NumElts = SrcVT.getVectorNumElements();
11568
11569 // If we don't need the sign bits at all just return zero.
11570 if (OriginalDemandedBits.countr_zero() >= NumElts)
11571 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
11572
11573 // Only demand the vector elements of the sign bits we need.
11574 APInt KnownUndef, KnownZero;
11575 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts);
11576 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
11577 TLO, Depth: Depth + 1))
11578 return true;
11579
11580 Known.Zero = KnownZero.zext(width: BitWidth);
11581 Known.Zero.setHighBits(BitWidth - NumElts);
11582
11583 // [X]VMSKLTZ only uses the MSB from each vector element.
11584 KnownBits KnownSrc;
11585 APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits);
11586 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO,
11587 Depth: Depth + 1))
11588 return true;
11589
11590 if (KnownSrc.One[SrcBits - 1])
11591 Known.One.setLowBits(NumElts);
11592 else if (KnownSrc.Zero[SrcBits - 1])
11593 Known.Zero.setLowBits(NumElts);
11594
11595 // Attempt to avoid multi-use ops if we don't need anything from it.
11596 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
11597 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
11598 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT, Operand: NewSrc));
11599 return false;
11600 }
11601 }
11602
11603 return TargetLowering::SimplifyDemandedBitsForTargetNode(
11604 Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
11605}
11606
11607bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
11608 unsigned Opc = VecOp.getOpcode();
11609
11610 // Assume target opcodes can't be scalarized.
11611 // TODO - do we have any exceptions?
11612 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opcode: Opc))
11613 return false;
11614
11615 // If the vector op is not supported, try to convert to scalar.
11616 EVT VecVT = VecOp.getValueType();
11617 if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
11618 return true;
11619
11620 // If the vector op is supported, but the scalar op is not, the transform may
11621 // not be worthwhile.
11622 EVT ScalarVT = VecVT.getScalarType();
11623 return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
11624}
11625
11626bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
11627 unsigned Index) const {
11628 if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
11629 return false;
11630
11631 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
11632 return Index == 0;
11633}
11634
11635bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
11636 unsigned Index) const {
11637 EVT EltVT = VT.getScalarType();
11638
11639 // Extract a scalar FP value from index 0 of a vector is free.
11640 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
11641}
11642
11643bool LoongArchTargetLowering::hasInlineStackProbe(
11644 const MachineFunction &MF) const {
11645
11646 // If the function specifically requests inline stack probes, emit them.
11647 if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
11648 return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
11649 "inline-asm";
11650
11651 return false;
11652}
11653
11654unsigned LoongArchTargetLowering::getStackProbeSize(const MachineFunction &MF,
11655 Align StackAlign) const {
11656 // The default stack probe size is 4096 if the function has no
11657 // stack-probe-size attribute.
11658 const Function &Fn = MF.getFunction();
11659 unsigned StackProbeSize =
11660 Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: 4096);
11661 // Round down to the stack alignment.
11662 StackProbeSize = alignDown(Value: StackProbeSize, Align: StackAlign.value());
11663 return StackProbeSize ? StackProbeSize : StackAlign.value();
11664}
11665
11666SDValue
11667LoongArchTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
11668 SelectionDAG &DAG) const {
11669 MachineFunction &MF = DAG.getMachineFunction();
11670 if (!hasInlineStackProbe(MF))
11671 return SDValue();
11672
11673 const MVT GRLenVT = Subtarget.getGRLenVT();
11674 // Get the inputs.
11675 SDValue Chain = Op.getOperand(i: 0);
11676 SDValue Size = Op.getOperand(i: 1);
11677
11678 const MaybeAlign Align =
11679 cast<ConstantSDNode>(Val: Op.getOperand(i: 2))->getMaybeAlignValue();
11680 const SDLoc dl(Op);
11681 const EVT VT = Op.getValueType();
11682
11683 // Construct the new SP value in a GPR.
11684 SDValue SP = DAG.getCopyFromReg(Chain, dl, Reg: LoongArch::R3, VT: GRLenVT);
11685 Chain = SP.getValue(R: 1);
11686 SP = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: GRLenVT, N1: SP, N2: Size);
11687 if (Align)
11688 SP = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SP.getValue(R: 0),
11689 N2: DAG.getSignedConstant(Val: -Align->value(), DL: dl, VT));
11690
11691 // Set the real SP to the new value with a probing loop.
11692 Chain = DAG.getNode(Opcode: LoongArchISD::PROBED_ALLOCA, DL: dl, VT: MVT::Other, N1: Chain, N2: SP);
11693 return DAG.getMergeValues(Ops: {SP, Chain}, dl);
11694}
11695
11696MachineBasicBlock *
11697LoongArchTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,
11698 MachineBasicBlock *MBB) const {
11699 MachineFunction &MF = *MBB->getParent();
11700 MachineBasicBlock::iterator MBBI = MI.getIterator();
11701 DebugLoc DL = MBB->findDebugLoc(MBBI);
11702 const Register TargetReg = MI.getOperand(i: 0).getReg();
11703
11704 const LoongArchInstrInfo *TII = Subtarget.getInstrInfo();
11705 const bool IsLA64 = Subtarget.is64Bit();
11706 const Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
11707 const LoongArchTargetLowering *TLI = Subtarget.getTargetLowering();
11708 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
11709
11710 MachineFunction::iterator MBBInsertPoint = std::next(x: MBB->getIterator());
11711 MachineBasicBlock *const LoopTestMBB =
11712 MF.CreateMachineBasicBlock(BB: MBB->getBasicBlock());
11713 MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
11714 MachineBasicBlock *const ExitMBB =
11715 MF.CreateMachineBasicBlock(BB: MBB->getBasicBlock());
11716 MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
11717 const Register SPReg = LoongArch::R3;
11718 const Register ScratchReg =
11719 MF.getRegInfo().createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
11720
11721 // ScratchReg = ProbeSize
11722 TII->movImm(MBB&: *MBB, MBBI, DL, DstReg: ScratchReg, Val: ProbeSize, Flag: MachineInstr::NoFlags);
11723
11724 // LoopTest:
11725 // sub.{w/d} $sp, $sp, ScratchReg
11726 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
11727 MCID: TII->get(Opcode: IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), DestReg: SPReg)
11728 .addReg(RegNo: SPReg)
11729 .addReg(RegNo: ScratchReg);
11730
11731 // st.{w/d} $zero, $sp, 0
11732 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
11733 MCID: TII->get(Opcode: IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
11734 .addReg(RegNo: LoongArch::R0)
11735 .addReg(RegNo: SPReg)
11736 .addImm(Val: 0);
11737
11738 // bltu TargetReg, $sp, LoopTest
11739 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::BLTU))
11740 .addReg(RegNo: TargetReg)
11741 .addReg(RegNo: SPReg)
11742 .addMBB(MBB: LoopTestMBB);
11743
11744 // move $sp, TargetReg
11745 BuildMI(BB&: *ExitMBB, I: ExitMBB->end(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR), DestReg: SPReg)
11746 .addReg(RegNo: TargetReg)
11747 .addReg(RegNo: LoongArch::R0);
11748
11749 ExitMBB->splice(Where: ExitMBB->end(), Other: MBB, From: std::next(x: MBBI), To: MBB->end());
11750 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
11751
11752 LoopTestMBB->addSuccessor(Succ: ExitMBB);
11753 LoopTestMBB->addSuccessor(Succ: LoopTestMBB);
11754 MBB->addSuccessor(Succ: LoopTestMBB);
11755
11756 MI.eraseFromParent();
11757 MF.getInfo<LoongArchMachineFunctionInfo>()->setDynamicAllocation();
11758 return ExitMBB->begin()->getParent();
11759}
11760