1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSelectionDAGInfo.h"
19#include "LoongArchSubtarget.h"
20#include "MCTargetDesc/LoongArchBaseInfo.h"
21#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22#include "MCTargetDesc/LoongArchMatInt.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/CodeGen/ISDOpcodes.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/RuntimeLibcallUtil.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/CodeGen.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/KnownBits.h"
37#include "llvm/Support/MathExtras.h"
38#include <llvm/Analysis/VectorUtils.h>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
46enum MaterializeFPImm {
47 NoMaterializeFPImm = 0,
48 MaterializeFPImm2Ins = 2,
49 MaterializeFPImm3Ins = 3,
50 MaterializeFPImm4Ins = 4,
51 MaterializeFPImm5Ins = 5,
52 MaterializeFPImm6Ins = 6
53};
54
55static cl::opt<MaterializeFPImm> MaterializeFPImmInsNum(
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
60 cl::init(Val: MaterializeFPImm3Ins),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 clEnumValN(MaterializeFPImm2Ins, "2",
63 "Materialize FP immediate within 2 instructions"),
64 clEnumValN(MaterializeFPImm3Ins, "3",
65 "Materialize FP immediate within 3 instructions"),
66 clEnumValN(MaterializeFPImm4Ins, "4",
67 "Materialize FP immediate within 4 instructions"),
68 clEnumValN(MaterializeFPImm5Ins, "5",
69 "Materialize FP immediate within 5 instructions"),
70 clEnumValN(MaterializeFPImm6Ins, "6",
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(Val: false));
77
78LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
107 setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
108 MemVT: MVT::i1, Action: Promote);
109
110 setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
111 setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
112 setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
113 setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
114 setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
115 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
116
117 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
118 ISD::JumpTable, ISD::GlobalTLSAddress},
119 VT: GRLenVT, Action: Custom);
120
121 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
122
123 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
124 setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
125 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
126 setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
127
128 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
129 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
130
131 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
132 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
133 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
134
135 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
141 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
142 setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
147 setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
148 } else {
149 setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand);
150 setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand);
151 setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand);
152 setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand);
153 setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom);
154 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
155 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
156 }
157
158 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
159 setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
160 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom);
161 setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
162 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
163 setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
164
165 setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
166 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
171 setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
172 setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
173 setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
174 setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
175 setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
176 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
177 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
178 setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
179 setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
180 setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
181 setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
182 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
183 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
184 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
185 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
186 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
187 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
188
189 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
190 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
191 setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32,
192 Action: Custom);
193 setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
199 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
200 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
201 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
202 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
203 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
204 if (Subtarget.hasBasicD())
205 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
206 }
207
208 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
209
210 static const ISD::CondCode FPCCToExpand[] = {
211 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
212 ISD::SETGE, ISD::SETNE, ISD::SETGT};
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
218 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
219 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
220 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
221 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
222
223 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f32, Action: Custom);
224 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
225 setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
226 setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
227 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
228 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
229 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
230 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
231 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
232 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
233 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
234 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
235 setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
236 setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
237 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
238 setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
239 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
240 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32,
241 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
242 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32,
243 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
244 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
245 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
246 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
249 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
250
251 if (!Subtarget.hasBasicD()) {
252 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
253 if (Subtarget.is64Bit()) {
254 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
255 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
264 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
265 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
266 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
267 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
268 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
269 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
270
271 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f64, Action: Custom);
272 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
273 setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
274 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
275 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
276 setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
277 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
278 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
279 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
280 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
281 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
282 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
283 setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
284 setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
285 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
286 setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
287 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
288 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
289 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64,
290 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
291 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
292 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
293 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
296 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
302 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
306 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
307 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
308 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 setOperationAction(Op, VT, Action: Expand);
314 }
315
316 for (MVT VT : LSXVTs) {
317 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
318 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
319 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
320
321 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
322 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
323 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
324
325 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
326 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
327 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
328 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT, Action: Legal);
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
331 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
332 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
333 Action: Legal);
334 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
335 VT, Action: Legal);
336 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
337 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
338 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
339 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
340 setCondCodeAction(
341 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
342 Action: Expand);
343 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
344 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
345 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
346 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
347 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
348 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
349 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
350 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
351 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
352 setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
355 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
357 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
359 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
360 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
363 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
364 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
365 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
366 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
367 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
368 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
369 ISD::SETUGE, ISD::SETUGT},
370 VT, Action: Expand);
371 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
372 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
373 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
374 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
375 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
376 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
377 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
378 }
379 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal);
380 setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal);
381 setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal);
382 setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal);
383 setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
388 setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom);
389 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
390 setOperationAction(Op: ISD::VECREDUCE_AND, VT, Action: Custom);
391 setOperationAction(Op: ISD::VECREDUCE_OR, VT, Action: Custom);
392 setOperationAction(Op: ISD::VECREDUCE_XOR, VT, Action: Custom);
393 setOperationAction(Op: ISD::VECREDUCE_SMAX, VT, Action: Custom);
394 setOperationAction(Op: ISD::VECREDUCE_SMIN, VT, Action: Custom);
395 setOperationAction(Op: ISD::VECREDUCE_UMAX, VT, Action: Custom);
396 setOperationAction(Op: ISD::VECREDUCE_UMIN, VT, Action: Custom);
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
404 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
405 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
406 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
407
408 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
409 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
410 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
411 setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Custom);
412 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT, Action: Legal);
413
414 setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
415 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
416 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
419 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
420 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
421 Action: Legal);
422 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
423 VT, Action: Legal);
424 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
425 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
426 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
427 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
428 setCondCodeAction(
429 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
430 Action: Expand);
431 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
432 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
433 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
434 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
435 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
436 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
437 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
438 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
439 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
440 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
441 setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
444 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
446 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
448 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
449 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
452 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
453 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
454 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
455 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
456 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
457 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
458 ISD::SETUGE, ISD::SETUGT},
459 VT, Action: Expand);
460 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
461 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
462 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
463 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
464 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
465 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
466 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
472 setTargetDAGCombine(ISD::SINT_TO_FP);
473 }
474
475 setTargetDAGCombine(ISD::AND);
476 setTargetDAGCombine(ISD::OR);
477 setTargetDAGCombine(ISD::SRL);
478 setTargetDAGCombine(ISD::SETCC);
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
483 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
484 setTargetDAGCombine(ISD::BITCAST);
485 }
486
487 // Compute derived properties from the register classes.
488 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
489
490 setStackPointerRegisterToSaveRestore(LoongArch::R3);
491
492 setBooleanContents(ZeroOrOneBooleanContent);
493 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
494
495 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
496
497 setMinCmpXchgSizeInBits(32);
498
499 // Function alignments.
500 setMinFunctionAlignment(Align(4));
501 // Set preferred alignments.
502 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
503 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
504 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
505
506 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
507 if (Subtarget.hasLAMCAS())
508 setMinCmpXchgSizeInBits(8);
509
510 if (Subtarget.hasSCQ()) {
511 setMaxAtomicSizeInBitsSupported(128);
512 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom);
513 }
514
515 // Disable strict node mutation.
516 IsStrictFPEnabled = true;
517}
518
519bool LoongArchTargetLowering::isOffsetFoldingLegal(
520 const GlobalAddressSDNode *GA) const {
521 // In order to maximise the opportunity for common subexpression elimination,
522 // keep a separate ADD node for the global address offset instead of folding
523 // it in the global address node. Later peephole optimisations may choose to
524 // fold it back in when profitable.
525 return false;
526}
527
528SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
529 SelectionDAG &DAG) const {
530 switch (Op.getOpcode()) {
531 case ISD::ATOMIC_FENCE:
532 return lowerATOMIC_FENCE(Op, DAG);
533 case ISD::EH_DWARF_CFA:
534 return lowerEH_DWARF_CFA(Op, DAG);
535 case ISD::GlobalAddress:
536 return lowerGlobalAddress(Op, DAG);
537 case ISD::GlobalTLSAddress:
538 return lowerGlobalTLSAddress(Op, DAG);
539 case ISD::INTRINSIC_WO_CHAIN:
540 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
541 case ISD::INTRINSIC_W_CHAIN:
542 return lowerINTRINSIC_W_CHAIN(Op, DAG);
543 case ISD::INTRINSIC_VOID:
544 return lowerINTRINSIC_VOID(Op, DAG);
545 case ISD::BlockAddress:
546 return lowerBlockAddress(Op, DAG);
547 case ISD::JumpTable:
548 return lowerJumpTable(Op, DAG);
549 case ISD::SHL_PARTS:
550 return lowerShiftLeftParts(Op, DAG);
551 case ISD::SRA_PARTS:
552 return lowerShiftRightParts(Op, DAG, IsSRA: true);
553 case ISD::SRL_PARTS:
554 return lowerShiftRightParts(Op, DAG, IsSRA: false);
555 case ISD::ConstantPool:
556 return lowerConstantPool(Op, DAG);
557 case ISD::FP_TO_SINT:
558 return lowerFP_TO_SINT(Op, DAG);
559 case ISD::BITCAST:
560 return lowerBITCAST(Op, DAG);
561 case ISD::UINT_TO_FP:
562 return lowerUINT_TO_FP(Op, DAG);
563 case ISD::SINT_TO_FP:
564 return lowerSINT_TO_FP(Op, DAG);
565 case ISD::VASTART:
566 return lowerVASTART(Op, DAG);
567 case ISD::FRAMEADDR:
568 return lowerFRAMEADDR(Op, DAG);
569 case ISD::RETURNADDR:
570 return lowerRETURNADDR(Op, DAG);
571 case ISD::WRITE_REGISTER:
572 return lowerWRITE_REGISTER(Op, DAG);
573 case ISD::INSERT_VECTOR_ELT:
574 return lowerINSERT_VECTOR_ELT(Op, DAG);
575 case ISD::EXTRACT_VECTOR_ELT:
576 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
577 case ISD::BUILD_VECTOR:
578 return lowerBUILD_VECTOR(Op, DAG);
579 case ISD::CONCAT_VECTORS:
580 return lowerCONCAT_VECTORS(Op, DAG);
581 case ISD::VECTOR_SHUFFLE:
582 return lowerVECTOR_SHUFFLE(Op, DAG);
583 case ISD::BITREVERSE:
584 return lowerBITREVERSE(Op, DAG);
585 case ISD::SCALAR_TO_VECTOR:
586 return lowerSCALAR_TO_VECTOR(Op, DAG);
587 case ISD::PREFETCH:
588 return lowerPREFETCH(Op, DAG);
589 case ISD::SELECT:
590 return lowerSELECT(Op, DAG);
591 case ISD::BRCOND:
592 return lowerBRCOND(Op, DAG);
593 case ISD::FP_TO_FP16:
594 return lowerFP_TO_FP16(Op, DAG);
595 case ISD::FP16_TO_FP:
596 return lowerFP16_TO_FP(Op, DAG);
597 case ISD::FP_TO_BF16:
598 return lowerFP_TO_BF16(Op, DAG);
599 case ISD::BF16_TO_FP:
600 return lowerBF16_TO_FP(Op, DAG);
601 case ISD::VECREDUCE_ADD:
602 return lowerVECREDUCE_ADD(Op, DAG);
603 case ISD::ROTL:
604 case ISD::ROTR:
605 return lowerRotate(Op, DAG);
606 case ISD::VECREDUCE_AND:
607 case ISD::VECREDUCE_OR:
608 case ISD::VECREDUCE_XOR:
609 case ISD::VECREDUCE_SMAX:
610 case ISD::VECREDUCE_SMIN:
611 case ISD::VECREDUCE_UMAX:
612 case ISD::VECREDUCE_UMIN:
613 return lowerVECREDUCE(Op, DAG);
614 case ISD::ConstantFP:
615 return lowerConstantFP(Op, DAG);
616 case ISD::SETCC:
617 return lowerSETCC(Op, DAG);
618 }
619 return SDValue();
620}
621
622// Helper to attempt to return a cheaper, bit-inverted version of \p V.
623static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
624 // TODO: don't always ignore oneuse constraints.
625 V = peekThroughBitcasts(V);
626 EVT VT = V.getValueType();
627
628 // Match not(xor X, -1) -> X.
629 if (V.getOpcode() == ISD::XOR &&
630 (ISD::isBuildVectorAllOnes(N: V.getOperand(i: 1).getNode()) ||
631 isAllOnesConstant(V: V.getOperand(i: 1))))
632 return V.getOperand(i: 0);
633
634 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
635 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
636 (isNullConstant(V: V.getOperand(i: 1)) || V.getOperand(i: 0).hasOneUse())) {
637 if (SDValue Not = isNOT(V: V.getOperand(i: 0), DAG)) {
638 Not = DAG.getBitcast(VT: V.getOperand(i: 0).getValueType(), V: Not);
639 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc(Not), VT, N1: Not,
640 N2: V.getOperand(i: 1));
641 }
642 }
643
644 // Match not(SplatVector(not(X)) -> SplatVector(X).
645 if (V.getOpcode() == ISD::BUILD_VECTOR) {
646 if (SDValue SplatValue =
647 cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
648 if (!V->isOnlyUserOf(N: SplatValue.getNode()))
649 return SDValue();
650
651 if (SDValue Not = isNOT(V: SplatValue, DAG)) {
652 Not = DAG.getBitcast(VT: V.getOperand(i: 0).getValueType(), V: Not);
653 return DAG.getSplat(VT, DL: SDLoc(Not), Op: Not);
654 }
655 }
656 }
657
658 // Match not(or(not(X),not(Y))) -> and(X, Y).
659 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
660 V.getOperand(i: 0).hasOneUse() && V.getOperand(i: 1).hasOneUse()) {
661 // TODO: Handle cases with single NOT operand -> VANDN
662 if (SDValue Op1 = isNOT(V: V.getOperand(i: 1), DAG))
663 if (SDValue Op0 = isNOT(V: V.getOperand(i: 0), DAG))
664 return DAG.getNode(Opcode: ISD::AND, DL: SDLoc(V), VT, N1: DAG.getBitcast(VT, V: Op0),
665 N2: DAG.getBitcast(VT, V: Op1));
666 }
667
668 // TODO: Add more matching patterns. Such as,
669 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
670 // not(slt(C, X)) -> slt(X - 1, C)
671
672 return SDValue();
673}
674
675SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
676 SelectionDAG &DAG) const {
677 EVT VT = Op.getValueType();
678 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Val&: Op);
679 const APFloat &FPVal = CFP->getValueAPF();
680 SDLoc DL(CFP);
681
682 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
683 (VT == MVT::f64 && Subtarget.hasBasicD()));
684
685 // If value is 0.0 or -0.0, just ignore it.
686 if (FPVal.isZero())
687 return SDValue();
688
689 // If lsx enabled, use cheaper 'vldi' instruction if possible.
690 if (isFPImmVLDILegal(Imm: FPVal, VT))
691 return SDValue();
692
693 // Construct as integer, and move to float register.
694 APInt INTVal = FPVal.bitcastToAPInt();
695
696 // If more than MaterializeFPImmInsNum instructions will be used to
697 // generate the INTVal and move it to float register, fallback to
698 // use floating point load from the constant pool.
699 auto Seq = LoongArchMatInt::generateInstSeq(Val: INTVal.getSExtValue());
700 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
701 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(V: +1.0))
702 return SDValue();
703
704 switch (VT.getSimpleVT().SimpleTy) {
705 default:
706 llvm_unreachable("Unexpected floating point type!");
707 break;
708 case MVT::f32: {
709 SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i32);
710 if (Subtarget.is64Bit())
711 NewVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: NewVal);
712 return DAG.getNode(Opcode: Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
713 : LoongArchISD::MOVGR2FR_W,
714 DL, VT, Operand: NewVal);
715 }
716 case MVT::f64: {
717 if (Subtarget.is64Bit()) {
718 SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i64);
719 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D, DL, VT, Operand: NewVal);
720 }
721 SDValue Lo = DAG.getConstant(Val: INTVal.trunc(width: 32), DL, VT: MVT::i32);
722 SDValue Hi = DAG.getConstant(Val: INTVal.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
723 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, N1: Lo, N2: Hi);
724 }
725 }
726
727 return SDValue();
728}
729
730// Ensure SETCC result and operand have the same bit width; isel does not
731// support mismatched widths.
732SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
733 SelectionDAG &DAG) const {
734 SDLoc DL(Op);
735 EVT ResultVT = Op.getValueType();
736 EVT OperandVT = Op.getOperand(i: 0).getValueType();
737
738 EVT SetCCResultVT =
739 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT);
740
741 if (ResultVT == SetCCResultVT)
742 return Op;
743
744 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
745 "SETCC operands must have the same type!");
746
747 SDValue SetCCNode =
748 DAG.getNode(Opcode: ISD::SETCC, DL, VT: SetCCResultVT, N1: Op.getOperand(i: 0),
749 N2: Op.getOperand(i: 1), N3: Op.getOperand(i: 2));
750
751 if (ResultVT.bitsGT(VT: SetCCResultVT))
752 SetCCNode = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: ResultVT, Operand: SetCCNode);
753 else if (ResultVT.bitsLT(VT: SetCCResultVT))
754 SetCCNode = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: SetCCNode);
755
756 return SetCCNode;
757}
758
759// Lower vecreduce_add using vhaddw instructions.
760// For Example:
761// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
762// can be lowered to:
763// VHADDW_D_W vr0, vr0, vr0
764// VHADDW_Q_D vr0, vr0, vr0
765// VPICKVE2GR_D a0, vr0, 0
766// ADDI_W a0, a0, 0
767SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
768 SelectionDAG &DAG) const {
769
770 SDLoc DL(Op);
771 MVT OpVT = Op.getSimpleValueType();
772 SDValue Val = Op.getOperand(i: 0);
773
774 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
775 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
776 unsigned ResBits = OpVT.getScalarSizeInBits();
777
778 unsigned LegalVecSize = 128;
779 bool isLASX256Vector =
780 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
781
782 // Ensure operand type legal or enable it legal.
783 while (!isTypeLegal(VT: Val.getSimpleValueType())) {
784 Val = DAG.WidenVector(N: Val, DL);
785 }
786
787 // NumEles is designed for iterations count, v4i32 for LSX
788 // and v8i32 for LASX should have the same count.
789 if (isLASX256Vector) {
790 NumEles /= 2;
791 LegalVecSize = 256;
792 }
793
794 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
795 MVT IntTy = MVT::getIntegerVT(BitWidth: EleBits);
796 MVT VecTy = MVT::getVectorVT(VT: IntTy, NumElements: LegalVecSize / EleBits);
797 Val = DAG.getNode(Opcode: LoongArchISD::VHADDW, DL, VT: VecTy, N1: Val, N2: Val);
798 }
799
800 if (isLASX256Vector) {
801 SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: Val,
802 N2: DAG.getConstant(Val: 2, DL, VT: Subtarget.getGRLenVT()));
803 Val = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i64, N1: Tmp, N2: Val);
804 }
805
806 Val = DAG.getBitcast(VT: MVT::getVectorVT(VT: OpVT, NumElements: LegalVecSize / ResBits), V: Val);
807 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
808 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()));
809}
810
811// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
812// For Example:
813// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
814// can be lowered to:
815// VBSRL_V vr1, vr0, 8
816// VMAX_W vr0, vr1, vr0
817// VBSRL_V vr1, vr0, 4
818// VMAX_W vr0, vr1, vr0
819// VPICKVE2GR_W a0, vr0, 0
820// For 256 bit vector, it is illegal and will be spilt into
821// two 128 bit vector by default then processed by this.
822SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
823 SelectionDAG &DAG) const {
824 SDLoc DL(Op);
825
826 MVT OpVT = Op.getSimpleValueType();
827 SDValue Val = Op.getOperand(i: 0);
828
829 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
830 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
831
832 // Ensure operand type legal or enable it legal.
833 while (!isTypeLegal(VT: Val.getSimpleValueType())) {
834 Val = DAG.WidenVector(N: Val, DL);
835 }
836
837 unsigned Opcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
838 MVT VecTy = Val.getSimpleValueType();
839 MVT GRLenVT = Subtarget.getGRLenVT();
840
841 for (int i = NumEles; i > 1; i /= 2) {
842 SDValue ShiftAmt = DAG.getConstant(Val: i * EleBits / 16, DL, VT: GRLenVT);
843 SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: VecTy, N1: Val, N2: ShiftAmt);
844 Val = DAG.getNode(Opcode, DL, VT: VecTy, N1: Tmp, N2: Val);
845 }
846
847 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
848 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
849}
850
851SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
852 SelectionDAG &DAG) const {
853 unsigned IsData = Op.getConstantOperandVal(i: 4);
854
855 // We don't support non-data prefetch.
856 // Just preserve the chain.
857 if (!IsData)
858 return Op.getOperand(i: 0);
859
860 return Op;
861}
862
863SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
864 SelectionDAG &DAG) const {
865 MVT VT = Op.getSimpleValueType();
866 assert(VT.isVector() && "Unexpected type");
867
868 SDLoc DL(Op);
869 SDValue R = Op.getOperand(i: 0);
870 SDValue Amt = Op.getOperand(i: 1);
871 unsigned Opcode = Op.getOpcode();
872 unsigned EltSizeInBits = VT.getScalarSizeInBits();
873
874 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
875 if (V.getOpcode() != ISD::BUILD_VECTOR)
876 return false;
877 if (SDValue SplatValue =
878 cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
879 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
880 CstSplatValue = C->getAPIntValue();
881 return true;
882 }
883 }
884 return false;
885 };
886
887 // Check for constant splat rotation amount.
888 APInt CstSplatValue;
889 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
890 bool isROTL = Opcode == ISD::ROTL;
891
892 // Check for splat rotate by zero.
893 if (IsCstSplat && CstSplatValue.urem(RHS: EltSizeInBits) == 0)
894 return R;
895
896 // LoongArch targets always prefer ISD::ROTR.
897 if (isROTL) {
898 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
899 return DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: R,
900 N2: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Amt));
901 }
902
903 // Rotate by a immediate.
904 if (IsCstSplat) {
905 // ISD::ROTR: Attemp to rotate by a positive immediate.
906 SDValue Bits = DAG.getConstant(Val: EltSizeInBits, DL, VT);
907 if (SDValue Urem =
908 DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL, VT, Ops: {Amt, Bits}))
909 return DAG.getNode(Opcode, DL, VT, N1: R, N2: Urem);
910 }
911
912 return Op;
913}
914
915// Return true if Val is equal to (setcc LHS, RHS, CC).
916// Return false if Val is the inverse of (setcc LHS, RHS, CC).
917// Otherwise, return std::nullopt.
918static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
919 ISD::CondCode CC, SDValue Val) {
920 assert(Val->getOpcode() == ISD::SETCC);
921 SDValue LHS2 = Val.getOperand(i: 0);
922 SDValue RHS2 = Val.getOperand(i: 1);
923 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get();
924
925 if (LHS == LHS2 && RHS == RHS2) {
926 if (CC == CC2)
927 return true;
928 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
929 return false;
930 } else if (LHS == RHS2 && RHS == LHS2) {
931 CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
932 if (CC == CC2)
933 return true;
934 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
935 return false;
936 }
937
938 return std::nullopt;
939}
940
941static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
942 const LoongArchSubtarget &Subtarget) {
943 SDValue CondV = N->getOperand(Num: 0);
944 SDValue TrueV = N->getOperand(Num: 1);
945 SDValue FalseV = N->getOperand(Num: 2);
946 MVT VT = N->getSimpleValueType(ResNo: 0);
947 SDLoc DL(N);
948
949 // (select c, -1, y) -> -c | y
950 if (isAllOnesConstant(V: TrueV)) {
951 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
952 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
953 }
954 // (select c, y, -1) -> (c-1) | y
955 if (isAllOnesConstant(V: FalseV)) {
956 SDValue Neg =
957 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
958 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
959 }
960
961 // (select c, 0, y) -> (c-1) & y
962 if (isNullConstant(V: TrueV)) {
963 SDValue Neg =
964 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
965 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
966 }
967 // (select c, y, 0) -> -c & y
968 if (isNullConstant(V: FalseV)) {
969 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
970 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
971 }
972
973 // select c, ~x, x --> xor -c, x
974 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
975 const APInt &TrueVal = TrueV->getAsAPIntVal();
976 const APInt &FalseVal = FalseV->getAsAPIntVal();
977 if (~TrueVal == FalseVal) {
978 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
979 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
980 }
981 }
982
983 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
984 // when both truev and falsev are also setcc.
985 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
986 FalseV.getOpcode() == ISD::SETCC) {
987 SDValue LHS = CondV.getOperand(i: 0);
988 SDValue RHS = CondV.getOperand(i: 1);
989 ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
990
991 // (select x, x, y) -> x | y
992 // (select !x, x, y) -> x & y
993 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
994 return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
995 N2: DAG.getFreeze(V: FalseV));
996 }
997 // (select x, y, x) -> x & y
998 // (select !x, y, x) -> x | y
999 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
1000 return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
1001 N1: DAG.getFreeze(V: TrueV), N2: FalseV);
1002 }
1003 }
1004
1005 return SDValue();
1006}
1007
1008// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1009// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1010// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1011// being `0` or `-1`. In such cases we can replace `select` with `and`.
1012// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1013// than `c0`?
1014static SDValue
1015foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
1016 const LoongArchSubtarget &Subtarget) {
1017 unsigned SelOpNo = 0;
1018 SDValue Sel = BO->getOperand(Num: 0);
1019 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1020 SelOpNo = 1;
1021 Sel = BO->getOperand(Num: 1);
1022 }
1023
1024 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1025 return SDValue();
1026
1027 unsigned ConstSelOpNo = 1;
1028 unsigned OtherSelOpNo = 2;
1029 if (!isa<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) {
1030 ConstSelOpNo = 2;
1031 OtherSelOpNo = 1;
1032 }
1033 SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo);
1034 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
1035 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1036 return SDValue();
1037
1038 SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1);
1039 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
1040 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1041 return SDValue();
1042
1043 SDLoc DL(Sel);
1044 EVT VT = BO->getValueType(ResNo: 0);
1045
1046 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1047 if (SelOpNo == 1)
1048 std::swap(a&: NewConstOps[0], b&: NewConstOps[1]);
1049
1050 SDValue NewConstOp =
1051 DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
1052 if (!NewConstOp)
1053 return SDValue();
1054
1055 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1056 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1057 return SDValue();
1058
1059 SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo);
1060 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1061 if (SelOpNo == 1)
1062 std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]);
1063 SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
1064
1065 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1066 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1067 return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF);
1068}
1069
1070// Changes the condition code and swaps operands if necessary, so the SetCC
1071// operation matches one of the comparisons supported directly by branches
1072// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1073// compare with 1/-1.
1074static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1075 ISD::CondCode &CC, SelectionDAG &DAG) {
1076 // If this is a single bit test that can't be handled by ANDI, shift the
1077 // bit to be tested to the MSB and perform a signed compare with 0.
1078 if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
1079 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1080 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) {
1081 uint64_t Mask = LHS.getConstantOperandVal(i: 1);
1082 if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) {
1083 unsigned ShAmt = 0;
1084 if (isPowerOf2_64(Value: Mask)) {
1085 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1086 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask);
1087 } else {
1088 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
1089 }
1090
1091 LHS = LHS.getOperand(i: 0);
1092 if (ShAmt != 0)
1093 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
1094 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
1095 return;
1096 }
1097 }
1098
1099 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
1100 int64_t C = RHSC->getSExtValue();
1101 switch (CC) {
1102 default:
1103 break;
1104 case ISD::SETGT:
1105 // Convert X > -1 to X >= 0.
1106 if (C == -1) {
1107 RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
1108 CC = ISD::SETGE;
1109 return;
1110 }
1111 break;
1112 case ISD::SETLT:
1113 // Convert X < 1 to 0 >= X.
1114 if (C == 1) {
1115 RHS = LHS;
1116 LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
1117 CC = ISD::SETGE;
1118 return;
1119 }
1120 break;
1121 }
1122 }
1123
1124 switch (CC) {
1125 default:
1126 break;
1127 case ISD::SETGT:
1128 case ISD::SETLE:
1129 case ISD::SETUGT:
1130 case ISD::SETULE:
1131 CC = ISD::getSetCCSwappedOperands(Operation: CC);
1132 std::swap(a&: LHS, b&: RHS);
1133 break;
1134 }
1135}
1136
1137SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1138 SelectionDAG &DAG) const {
1139 SDValue CondV = Op.getOperand(i: 0);
1140 SDValue TrueV = Op.getOperand(i: 1);
1141 SDValue FalseV = Op.getOperand(i: 2);
1142 SDLoc DL(Op);
1143 MVT VT = Op.getSimpleValueType();
1144 MVT GRLenVT = Subtarget.getGRLenVT();
1145
1146 if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
1147 return V;
1148
1149 if (Op.hasOneUse()) {
1150 unsigned UseOpc = Op->user_begin()->getOpcode();
1151 if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
1152 SDNode *BinOp = *Op->user_begin();
1153 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->user_begin(),
1154 DAG, Subtarget)) {
1155 DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
1156 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1157 // may return a constant node and cause crash in lowerSELECT.
1158 if (NewSel.getOpcode() == ISD::SELECT)
1159 return lowerSELECT(Op: NewSel, DAG);
1160 return NewSel;
1161 }
1162 }
1163 }
1164
1165 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1166 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1167 // (select condv, truev, falsev)
1168 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1169 if (CondV.getOpcode() != ISD::SETCC ||
1170 CondV.getOperand(i: 0).getSimpleValueType() != GRLenVT) {
1171 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: GRLenVT);
1172 SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
1173
1174 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1175
1176 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1177 }
1178
1179 // If the CondV is the output of a SETCC node which operates on GRLenVT
1180 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1181 // to take advantage of the integer compare+branch instructions. i.e.: (select
1182 // (setcc lhs, rhs, cc), truev, falsev)
1183 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1184 SDValue LHS = CondV.getOperand(i: 0);
1185 SDValue RHS = CondV.getOperand(i: 1);
1186 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1187
1188 // Special case for a select of 2 constants that have a difference of 1.
1189 // Normally this is done by DAGCombine, but if the select is introduced by
1190 // type legalization or op legalization, we miss it. Restricting to SETLT
1191 // case for now because that is what signed saturating add/sub need.
1192 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1193 // but we would probably want to swap the true/false values if the condition
1194 // is SETGE/SETLE to avoid an XORI.
1195 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
1196 CCVal == ISD::SETLT) {
1197 const APInt &TrueVal = TrueV->getAsAPIntVal();
1198 const APInt &FalseVal = FalseV->getAsAPIntVal();
1199 if (TrueVal - 1 == FalseVal)
1200 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
1201 if (TrueVal + 1 == FalseVal)
1202 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
1203 }
1204
1205 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1206 // 1 < x ? x : 1 -> 0 < x ? x : 1
1207 if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1208 RHS == TrueV && LHS == FalseV) {
1209 LHS = DAG.getConstant(Val: 0, DL, VT);
1210 // 0 <u x is the same as x != 0.
1211 if (CCVal == ISD::SETULT) {
1212 std::swap(a&: LHS, b&: RHS);
1213 CCVal = ISD::SETNE;
1214 }
1215 }
1216
1217 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1218 if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1219 RHS == FalseV) {
1220 RHS = DAG.getConstant(Val: 0, DL, VT);
1221 }
1222
1223 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1224
1225 if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
1226 // (select (setcc lhs, rhs, CC), constant, falsev)
1227 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1228 std::swap(a&: TrueV, b&: FalseV);
1229 TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
1230 }
1231
1232 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1233 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1234}
1235
1236SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1237 SelectionDAG &DAG) const {
1238 SDValue CondV = Op.getOperand(i: 1);
1239 SDLoc DL(Op);
1240 MVT GRLenVT = Subtarget.getGRLenVT();
1241
1242 if (CondV.getOpcode() == ISD::SETCC) {
1243 if (CondV.getOperand(i: 0).getValueType() == GRLenVT) {
1244 SDValue LHS = CondV.getOperand(i: 0);
1245 SDValue RHS = CondV.getOperand(i: 1);
1246 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1247
1248 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1249
1250 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1251 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1252 N1: Op.getOperand(i: 0), N2: LHS, N3: RHS, N4: TargetCC,
1253 N5: Op.getOperand(i: 2));
1254 } else if (CondV.getOperand(i: 0).getValueType().isFloatingPoint()) {
1255 return DAG.getNode(Opcode: LoongArchISD::BRCOND, DL, VT: Op.getValueType(),
1256 N1: Op.getOperand(i: 0), N2: CondV, N3: Op.getOperand(i: 2));
1257 }
1258 }
1259
1260 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1261 N1: Op.getOperand(i: 0), N2: CondV, N3: DAG.getConstant(Val: 0, DL, VT: GRLenVT),
1262 N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: 2));
1263}
1264
1265SDValue
1266LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1267 SelectionDAG &DAG) const {
1268 SDLoc DL(Op);
1269 MVT OpVT = Op.getSimpleValueType();
1270
1271 SDValue Vector = DAG.getUNDEF(VT: OpVT);
1272 SDValue Val = Op.getOperand(i: 0);
1273 SDValue Idx = DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT());
1274
1275 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx);
1276}
1277
1278SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1279 SelectionDAG &DAG) const {
1280 EVT ResTy = Op->getValueType(ResNo: 0);
1281 SDValue Src = Op->getOperand(Num: 0);
1282 SDLoc DL(Op);
1283
1284 // LoongArchISD::BITREV_8B is not supported on LA32.
1285 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1286 return SDValue();
1287
1288 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1289 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1290 unsigned int NewEltNum = NewVT.getVectorNumElements();
1291
1292 SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src);
1293
1294 SmallVector<SDValue, 8> Ops;
1295 for (unsigned int i = 0; i < NewEltNum; i++) {
1296 SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc,
1297 N2: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
1298 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1299 ? (unsigned)LoongArchISD::BITREV_8B
1300 : (unsigned)ISD::BITREVERSE;
1301 Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op));
1302 }
1303 SDValue Res =
1304 DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops));
1305
1306 switch (ResTy.getSimpleVT().SimpleTy) {
1307 default:
1308 return SDValue();
1309 case MVT::v16i8:
1310 case MVT::v32i8:
1311 return Res;
1312 case MVT::v8i16:
1313 case MVT::v16i16:
1314 case MVT::v4i32:
1315 case MVT::v8i32: {
1316 SmallVector<int, 32> Mask;
1317 for (unsigned int i = 0; i < NewEltNum; i++)
1318 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1319 Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i);
1320 return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask);
1321 }
1322 }
1323}
1324
1325// Widen element type to get a new mask value (if possible).
1326// For example:
1327// shufflevector <4 x i32> %a, <4 x i32> %b,
1328// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1329// is equivalent to:
1330// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1331// can be lowered to:
1332// VPACKOD_D vr0, vr0, vr1
1333static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1334 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1335 unsigned EltBits = VT.getScalarSizeInBits();
1336
1337 if (EltBits > 32 || EltBits == 1)
1338 return SDValue();
1339
1340 SmallVector<int, 8> NewMask;
1341 if (widenShuffleMaskElts(M: Mask, NewMask)) {
1342 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * 2)
1343 : MVT::getIntegerVT(BitWidth: EltBits * 2);
1344 MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / 2);
1345 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) {
1346 SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1);
1347 SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2);
1348 return DAG.getBitcast(
1349 VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask));
1350 }
1351 }
1352
1353 return SDValue();
1354}
1355
1356/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1357/// instruction.
1358// The funciton matches elements from one of the input vector shuffled to the
1359// left or right with zeroable elements 'shifted in'. It handles both the
1360// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1361// lane.
1362// Mostly copied from X86.
1363static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1364 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1365 int MaskOffset, const APInt &Zeroable) {
1366 int Size = Mask.size();
1367 unsigned SizeInBits = Size * ScalarSizeInBits;
1368
1369 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1370 for (int i = 0; i < Size; i += Scale)
1371 for (int j = 0; j < Shift; ++j)
1372 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1373 return false;
1374
1375 return true;
1376 };
1377
1378 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1379 int Step = 1) {
1380 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1381 if (!(Mask[i] == -1 || Mask[i] == Low))
1382 return false;
1383 return true;
1384 };
1385
1386 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1387 for (int i = 0; i != Size; i += Scale) {
1388 unsigned Pos = Left ? i + Shift : i;
1389 unsigned Low = Left ? i : i + Shift;
1390 unsigned Len = Scale - Shift;
1391 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1392 return -1;
1393 }
1394
1395 int ShiftEltBits = ScalarSizeInBits * Scale;
1396 bool ByteShift = ShiftEltBits > 64;
1397 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1398 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1399 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1400
1401 // Normalize the scale for byte shifts to still produce an i64 element
1402 // type.
1403 Scale = ByteShift ? Scale / 2 : Scale;
1404
1405 // We need to round trip through the appropriate type for the shift.
1406 MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale);
1407 ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / 8)
1408 : MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale);
1409 return (int)ShiftAmt;
1410 };
1411
1412 unsigned MaxWidth = 128;
1413 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1414 for (int Shift = 1; Shift != Scale; ++Shift)
1415 for (bool Left : {true, false})
1416 if (CheckZeros(Shift, Scale, Left)) {
1417 int ShiftAmt = MatchShift(Shift, Scale, Left);
1418 if (0 < ShiftAmt)
1419 return ShiftAmt;
1420 }
1421
1422 // no match
1423 return -1;
1424}
1425
1426/// Lower VECTOR_SHUFFLE as shift (if possible).
1427///
1428/// For example:
1429/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1430/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1431/// is lowered to:
1432/// (VBSLL_V $v0, $v0, 4)
1433///
1434/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1435/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1436/// is lowered to:
1437/// (VSLLI_D $v0, $v0, 32)
1438static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1439 MVT VT, SDValue V1, SDValue V2,
1440 SelectionDAG &DAG,
1441 const LoongArchSubtarget &Subtarget,
1442 const APInt &Zeroable) {
1443 int Size = Mask.size();
1444 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1445
1446 MVT ShiftVT;
1447 SDValue V = V1;
1448 unsigned Opcode;
1449
1450 // Try to match shuffle against V1 shift.
1451 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1452 Mask, MaskOffset: 0, Zeroable);
1453
1454 // If V1 failed, try to match shuffle against V2 shift.
1455 if (ShiftAmt < 0) {
1456 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1457 Mask, MaskOffset: Size, Zeroable);
1458 V = V2;
1459 }
1460
1461 if (ShiftAmt < 0)
1462 return SDValue();
1463
1464 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1465 "Illegal integer vector type");
1466 V = DAG.getBitcast(VT: ShiftVT, V);
1467 V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V,
1468 N2: DAG.getConstant(Val: ShiftAmt, DL, VT: Subtarget.getGRLenVT()));
1469 return DAG.getBitcast(VT, V);
1470}
1471
1472/// Determine whether a range fits a regular pattern of values.
1473/// This function accounts for the possibility of jumping over the End iterator.
1474template <typename ValType>
1475static bool
1476fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1477 unsigned CheckStride,
1478 typename SmallVectorImpl<ValType>::const_iterator End,
1479 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1480 auto &I = Begin;
1481
1482 while (I != End) {
1483 if (*I != -1 && *I != ExpectedIndex)
1484 return false;
1485 ExpectedIndex += ExpectedIndexStride;
1486
1487 // Incrementing past End is undefined behaviour so we must increment one
1488 // step at a time and check for End at each step.
1489 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1490 ; // Empty loop body.
1491 }
1492 return true;
1493}
1494
1495/// Compute whether each element of a shuffle is zeroable.
1496///
1497/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1498static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1499 SDValue V2, APInt &KnownUndef,
1500 APInt &KnownZero) {
1501 int Size = Mask.size();
1502 KnownUndef = KnownZero = APInt::getZero(numBits: Size);
1503
1504 V1 = peekThroughBitcasts(V: V1);
1505 V2 = peekThroughBitcasts(V: V2);
1506
1507 bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode());
1508 bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode());
1509
1510 int VectorSizeInBits = V1.getValueSizeInBits();
1511 int ScalarSizeInBits = VectorSizeInBits / Size;
1512 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1513 (void)ScalarSizeInBits;
1514
1515 for (int i = 0; i < Size; ++i) {
1516 int M = Mask[i];
1517 if (M < 0) {
1518 KnownUndef.setBit(i);
1519 continue;
1520 }
1521 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1522 KnownZero.setBit(i);
1523 continue;
1524 }
1525 }
1526}
1527
1528/// Test whether a shuffle mask is equivalent within each sub-lane.
1529///
1530/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1531/// non-trivial to compute in the face of undef lanes. The representation is
1532/// suitable for use with existing 128-bit shuffles as entries from the second
1533/// vector have been remapped to [LaneSize, 2*LaneSize).
1534static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1535 ArrayRef<int> Mask,
1536 SmallVectorImpl<int> &RepeatedMask) {
1537 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1538 RepeatedMask.assign(NumElts: LaneSize, Elt: -1);
1539 int Size = Mask.size();
1540 for (int i = 0; i < Size; ++i) {
1541 assert(Mask[i] == -1 || Mask[i] >= 0);
1542 if (Mask[i] < 0)
1543 continue;
1544 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1545 // This entry crosses lanes, so there is no way to model this shuffle.
1546 return false;
1547
1548 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1549 // Adjust second vector indices to start at LaneSize instead of Size.
1550 int LocalM =
1551 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1552 if (RepeatedMask[i % LaneSize] < 0)
1553 // This is the first non-undef entry in this slot of a 128-bit lane.
1554 RepeatedMask[i % LaneSize] = LocalM;
1555 else if (RepeatedMask[i % LaneSize] != LocalM)
1556 // Found a mismatch with the repeated mask.
1557 return false;
1558 }
1559 return true;
1560}
1561
1562/// Attempts to match vector shuffle as byte rotation.
1563static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1564 ArrayRef<int> Mask) {
1565
1566 SDValue Lo, Hi;
1567 SmallVector<int, 16> RepeatedMask;
1568
1569 if (!isRepeatedShuffleMask(LaneSizeInBits: 128, VT, Mask, RepeatedMask))
1570 return -1;
1571
1572 int NumElts = RepeatedMask.size();
1573 int Rotation = 0;
1574 int Scale = 16 / NumElts;
1575
1576 for (int i = 0; i < NumElts; ++i) {
1577 int M = RepeatedMask[i];
1578 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1579 "Unexpected mask index.");
1580 if (M < 0)
1581 continue;
1582
1583 // Determine where a rotated vector would have started.
1584 int StartIdx = i - (M % NumElts);
1585 if (StartIdx == 0)
1586 return -1;
1587
1588 // If we found the tail of a vector the rotation must be the missing
1589 // front. If we found the head of a vector, it must be how much of the
1590 // head.
1591 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1592
1593 if (Rotation == 0)
1594 Rotation = CandidateRotation;
1595 else if (Rotation != CandidateRotation)
1596 return -1;
1597
1598 // Compute which value this mask is pointing at.
1599 SDValue MaskV = M < NumElts ? V1 : V2;
1600
1601 // Compute which of the two target values this index should be assigned
1602 // to. This reflects whether the high elements are remaining or the low
1603 // elements are remaining.
1604 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1605
1606 // Either set up this value if we've not encountered it before, or check
1607 // that it remains consistent.
1608 if (!TargetV)
1609 TargetV = MaskV;
1610 else if (TargetV != MaskV)
1611 return -1;
1612 }
1613
1614 // Check that we successfully analyzed the mask, and normalize the results.
1615 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1616 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1617 if (!Lo)
1618 Lo = Hi;
1619 else if (!Hi)
1620 Hi = Lo;
1621
1622 V1 = Lo;
1623 V2 = Hi;
1624
1625 return Rotation * Scale;
1626}
1627
1628/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1629///
1630/// For example:
1631/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1632/// <2 x i32> <i32 3, i32 0>
1633/// is lowered to:
1634/// (VBSRL_V $v1, $v1, 8)
1635/// (VBSLL_V $v0, $v0, 8)
1636/// (VOR_V $v0, $V0, $v1)
1637static SDValue
1638lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1639 SDValue V1, SDValue V2, SelectionDAG &DAG,
1640 const LoongArchSubtarget &Subtarget) {
1641
1642 SDValue Lo = V1, Hi = V2;
1643 int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask);
1644 if (ByteRotation <= 0)
1645 return SDValue();
1646
1647 MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / 8);
1648 Lo = DAG.getBitcast(VT: ByteVT, V: Lo);
1649 Hi = DAG.getBitcast(VT: ByteVT, V: Hi);
1650
1651 int LoByteShift = 16 - ByteRotation;
1652 int HiByteShift = ByteRotation;
1653 MVT GRLenVT = Subtarget.getGRLenVT();
1654
1655 SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo,
1656 N2: DAG.getConstant(Val: LoByteShift, DL, VT: GRLenVT));
1657 SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi,
1658 N2: DAG.getConstant(Val: HiByteShift, DL, VT: GRLenVT));
1659 return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift));
1660}
1661
1662/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1663///
1664/// For example:
1665/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1666/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1667/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1668/// is lowered to:
1669/// (VREPLI $v1, 0)
1670/// (VILVL $v0, $v1, $v0)
1671static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1672 ArrayRef<int> Mask, MVT VT,
1673 SDValue V1, SDValue V2,
1674 SelectionDAG &DAG,
1675 const APInt &Zeroable) {
1676 int Bits = VT.getSizeInBits();
1677 int EltBits = VT.getScalarSizeInBits();
1678 int NumElements = VT.getVectorNumElements();
1679
1680 if (Zeroable.isAllOnes())
1681 return DAG.getConstant(Val: 0, DL, VT);
1682
1683 // Define a helper function to check a particular ext-scale and lower to it if
1684 // valid.
1685 auto Lower = [&](int Scale) -> SDValue {
1686 SDValue InputV;
1687 bool AnyExt = true;
1688 int Offset = 0;
1689 for (int i = 0; i < NumElements; i++) {
1690 int M = Mask[i];
1691 if (M < 0)
1692 continue;
1693 if (i % Scale != 0) {
1694 // Each of the extended elements need to be zeroable.
1695 if (!Zeroable[i])
1696 return SDValue();
1697
1698 AnyExt = false;
1699 continue;
1700 }
1701
1702 // Each of the base elements needs to be consecutive indices into the
1703 // same input vector.
1704 SDValue V = M < NumElements ? V1 : V2;
1705 M = M % NumElements;
1706 if (!InputV) {
1707 InputV = V;
1708 Offset = M - (i / Scale);
1709
1710 // These offset can't be handled
1711 if (Offset % (NumElements / Scale))
1712 return SDValue();
1713 } else if (InputV != V)
1714 return SDValue();
1715
1716 if (M != (Offset + (i / Scale)))
1717 return SDValue(); // Non-consecutive strided elements.
1718 }
1719
1720 // If we fail to find an input, we have a zero-shuffle which should always
1721 // have already been handled.
1722 if (!InputV)
1723 return SDValue();
1724
1725 do {
1726 unsigned VilVLoHi = LoongArchISD::VILVL;
1727 if (Offset >= (NumElements / 2)) {
1728 VilVLoHi = LoongArchISD::VILVH;
1729 Offset -= (NumElements / 2);
1730 }
1731
1732 MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements);
1733 SDValue Ext =
1734 AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: 0, DL, VT: InputVT);
1735 InputV = DAG.getBitcast(VT: InputVT, V: InputV);
1736 InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV);
1737 Scale /= 2;
1738 EltBits *= 2;
1739 NumElements /= 2;
1740 } while (Scale > 1);
1741 return DAG.getBitcast(VT, V: InputV);
1742 };
1743
1744 // Each iteration, try extending the elements half as much, but into twice as
1745 // many elements.
1746 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1747 NumExtElements *= 2) {
1748 if (SDValue V = Lower(NumElements / NumExtElements))
1749 return V;
1750 }
1751 return SDValue();
1752}
1753
1754/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1755///
1756/// VREPLVEI performs vector broadcast based on an element specified by an
1757/// integer immediate, with its mask being similar to:
1758/// <x, x, x, ...>
1759/// where x is any valid index.
1760///
1761/// When undef's appear in the mask they are treated as if they were whatever
1762/// value is necessary in order to fit the above form.
1763static SDValue
1764lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1765 SDValue V1, SelectionDAG &DAG,
1766 const LoongArchSubtarget &Subtarget) {
1767 int SplatIndex = -1;
1768 for (const auto &M : Mask) {
1769 if (M != -1) {
1770 SplatIndex = M;
1771 break;
1772 }
1773 }
1774
1775 if (SplatIndex == -1)
1776 return DAG.getUNDEF(VT);
1777
1778 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1779 if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) {
1780 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1781 N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
1782 }
1783
1784 return SDValue();
1785}
1786
1787/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1788///
1789/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1790/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1791///
1792/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1793/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1794/// When undef's appear they are treated as if they were whatever value is
1795/// necessary in order to fit the above forms.
1796///
1797/// For example:
1798/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1799/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1800/// i32 7, i32 6, i32 5, i32 4>
1801/// is lowered to:
1802/// (VSHUF4I_H $v0, $v1, 27)
1803/// where the 27 comes from:
1804/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1805static SDValue
1806lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1807 SDValue V1, SDValue V2, SelectionDAG &DAG,
1808 const LoongArchSubtarget &Subtarget) {
1809
1810 unsigned SubVecSize = 4;
1811 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1812 SubVecSize = 2;
1813
1814 int SubMask[4] = {-1, -1, -1, -1};
1815 for (unsigned i = 0; i < SubVecSize; ++i) {
1816 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1817 int M = Mask[j];
1818
1819 // Convert from vector index to 4-element subvector index
1820 // If an index refers to an element outside of the subvector then give up
1821 if (M != -1) {
1822 M -= 4 * (j / SubVecSize);
1823 if (M < 0 || M >= 4)
1824 return SDValue();
1825 }
1826
1827 // If the mask has an undef, replace it with the current index.
1828 // Note that it might still be undef if the current index is also undef
1829 if (SubMask[i] == -1)
1830 SubMask[i] = M;
1831 // Check that non-undef values are the same as in the mask. If they
1832 // aren't then give up
1833 else if (M != -1 && M != SubMask[i])
1834 return SDValue();
1835 }
1836 }
1837
1838 // Calculate the immediate. Replace any remaining undefs with zero
1839 int Imm = 0;
1840 for (int i = SubVecSize - 1; i >= 0; --i) {
1841 int M = SubMask[i];
1842
1843 if (M == -1)
1844 M = 0;
1845
1846 Imm <<= 2;
1847 Imm |= M & 0x3;
1848 }
1849
1850 MVT GRLenVT = Subtarget.getGRLenVT();
1851
1852 // Return vshuf4i.d
1853 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1854 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I_D, DL, VT, N1: V1, N2: V2,
1855 N3: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
1856
1857 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
1858 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
1859}
1860
1861/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1862///
1863/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1864/// reverse whose mask likes:
1865/// <7, 6, 5, 4, 3, 2, 1, 0>
1866///
1867/// When undef's appear in the mask they are treated as if they were whatever
1868/// value is necessary in order to fit the above forms.
1869static SDValue
1870lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1871 SDValue V1, SelectionDAG &DAG,
1872 const LoongArchSubtarget &Subtarget) {
1873 // Only vectors with i8/i16 elements which cannot match other patterns
1874 // directly needs to do this.
1875 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1876 VT != MVT::v16i16)
1877 return SDValue();
1878
1879 if (!ShuffleVectorInst::isReverseMask(Mask, NumSrcElts: Mask.size()))
1880 return SDValue();
1881
1882 int WidenNumElts = VT.getVectorNumElements() / 4;
1883 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1884 for (int i = 0; i < WidenNumElts; ++i)
1885 WidenMask[i] = WidenNumElts - 1 - i;
1886
1887 MVT WidenVT = MVT::getVectorVT(
1888 VT: VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, NumElements: WidenNumElts);
1889 SDValue NewV1 = DAG.getBitcast(VT: WidenVT, V: V1);
1890 SDValue WidenRev = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: NewV1,
1891 N2: DAG.getUNDEF(VT: WidenVT), Mask: WidenMask);
1892
1893 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT,
1894 N1: DAG.getBitcast(VT, V: WidenRev),
1895 N2: DAG.getConstant(Val: 27, DL, VT: Subtarget.getGRLenVT()));
1896}
1897
1898/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1899///
1900/// VPACKEV interleaves the even elements from each vector.
1901///
1902/// It is possible to lower into VPACKEV when the mask consists of two of the
1903/// following forms interleaved:
1904/// <0, 2, 4, ...>
1905/// <n, n+2, n+4, ...>
1906/// where n is the number of elements in the vector.
1907/// For example:
1908/// <0, 0, 2, 2, 4, 4, ...>
1909/// <0, n, 2, n+2, 4, n+4, ...>
1910///
1911/// When undef's appear in the mask they are treated as if they were whatever
1912/// value is necessary in order to fit the above forms.
1913static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1914 MVT VT, SDValue V1, SDValue V2,
1915 SelectionDAG &DAG) {
1916
1917 const auto &Begin = Mask.begin();
1918 const auto &End = Mask.end();
1919 SDValue OriV1 = V1, OriV2 = V2;
1920
1921 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1922 V1 = OriV1;
1923 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1924 V1 = OriV2;
1925 else
1926 return SDValue();
1927
1928 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1929 V2 = OriV1;
1930 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1931 V2 = OriV2;
1932 else
1933 return SDValue();
1934
1935 return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
1936}
1937
1938/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1939///
1940/// VPACKOD interleaves the odd elements from each vector.
1941///
1942/// It is possible to lower into VPACKOD when the mask consists of two of the
1943/// following forms interleaved:
1944/// <1, 3, 5, ...>
1945/// <n+1, n+3, n+5, ...>
1946/// where n is the number of elements in the vector.
1947/// For example:
1948/// <1, 1, 3, 3, 5, 5, ...>
1949/// <1, n+1, 3, n+3, 5, n+5, ...>
1950///
1951/// When undef's appear in the mask they are treated as if they were whatever
1952/// value is necessary in order to fit the above forms.
1953static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1954 MVT VT, SDValue V1, SDValue V2,
1955 SelectionDAG &DAG) {
1956
1957 const auto &Begin = Mask.begin();
1958 const auto &End = Mask.end();
1959 SDValue OriV1 = V1, OriV2 = V2;
1960
1961 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1962 V1 = OriV1;
1963 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1964 V1 = OriV2;
1965 else
1966 return SDValue();
1967
1968 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1969 V2 = OriV1;
1970 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1971 V2 = OriV2;
1972 else
1973 return SDValue();
1974
1975 return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
1976}
1977
1978/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1979///
1980/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1981/// of each vector.
1982///
1983/// It is possible to lower into VILVH when the mask consists of two of the
1984/// following forms interleaved:
1985/// <x, x+1, x+2, ...>
1986/// <n+x, n+x+1, n+x+2, ...>
1987/// where n is the number of elements in the vector and x is half n.
1988/// For example:
1989/// <x, x, x+1, x+1, x+2, x+2, ...>
1990/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1991///
1992/// When undef's appear in the mask they are treated as if they were whatever
1993/// value is necessary in order to fit the above forms.
1994static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
1995 MVT VT, SDValue V1, SDValue V2,
1996 SelectionDAG &DAG) {
1997
1998 const auto &Begin = Mask.begin();
1999 const auto &End = Mask.end();
2000 unsigned HalfSize = Mask.size() / 2;
2001 SDValue OriV1 = V1, OriV2 = V2;
2002
2003 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2004 V1 = OriV1;
2005 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2006 V1 = OriV2;
2007 else
2008 return SDValue();
2009
2010 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2011 V2 = OriV1;
2012 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize,
2013 ExpectedIndexStride: 1))
2014 V2 = OriV2;
2015 else
2016 return SDValue();
2017
2018 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2019}
2020
2021/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2022///
2023/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2024/// of each vector.
2025///
2026/// It is possible to lower into VILVL when the mask consists of two of the
2027/// following forms interleaved:
2028/// <0, 1, 2, ...>
2029/// <n, n+1, n+2, ...>
2030/// where n is the number of elements in the vector.
2031/// For example:
2032/// <0, 0, 1, 1, 2, 2, ...>
2033/// <0, n, 1, n+1, 2, n+2, ...>
2034///
2035/// When undef's appear in the mask they are treated as if they were whatever
2036/// value is necessary in order to fit the above forms.
2037static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
2038 MVT VT, SDValue V1, SDValue V2,
2039 SelectionDAG &DAG) {
2040
2041 const auto &Begin = Mask.begin();
2042 const auto &End = Mask.end();
2043 SDValue OriV1 = V1, OriV2 = V2;
2044
2045 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2046 V1 = OriV1;
2047 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
2048 V1 = OriV2;
2049 else
2050 return SDValue();
2051
2052 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2053 V2 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
2055 V2 = OriV2;
2056 else
2057 return SDValue();
2058
2059 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2060}
2061
2062/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2063///
2064/// VPICKEV copies the even elements of each vector into the result vector.
2065///
2066/// It is possible to lower into VPICKEV when the mask consists of two of the
2067/// following forms concatenated:
2068/// <0, 2, 4, ...>
2069/// <n, n+2, n+4, ...>
2070/// where n is the number of elements in the vector.
2071/// For example:
2072/// <0, 2, 4, ..., 0, 2, 4, ...>
2073/// <0, 2, 4, ..., n, n+2, n+4, ...>
2074///
2075/// When undef's appear in the mask they are treated as if they were whatever
2076/// value is necessary in order to fit the above forms.
2077static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2078 MVT VT, SDValue V1, SDValue V2,
2079 SelectionDAG &DAG) {
2080
2081 const auto &Begin = Mask.begin();
2082 const auto &Mid = Mask.begin() + Mask.size() / 2;
2083 const auto &End = Mask.end();
2084 SDValue OriV1 = V1, OriV2 = V2;
2085
2086 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
2087 V1 = OriV1;
2088 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2089 V1 = OriV2;
2090 else
2091 return SDValue();
2092
2093 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2094 V2 = OriV1;
2095 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2096 V2 = OriV2;
2097
2098 else
2099 return SDValue();
2100
2101 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2102}
2103
2104/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2105///
2106/// VPICKOD copies the odd elements of each vector into the result vector.
2107///
2108/// It is possible to lower into VPICKOD when the mask consists of two of the
2109/// following forms concatenated:
2110/// <1, 3, 5, ...>
2111/// <n+1, n+3, n+5, ...>
2112/// where n is the number of elements in the vector.
2113/// For example:
2114/// <1, 3, 5, ..., 1, 3, 5, ...>
2115/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2116///
2117/// When undef's appear in the mask they are treated as if they were whatever
2118/// value is necessary in order to fit the above forms.
2119static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2120 MVT VT, SDValue V1, SDValue V2,
2121 SelectionDAG &DAG) {
2122
2123 const auto &Begin = Mask.begin();
2124 const auto &Mid = Mask.begin() + Mask.size() / 2;
2125 const auto &End = Mask.end();
2126 SDValue OriV1 = V1, OriV2 = V2;
2127
2128 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
2129 V1 = OriV1;
2130 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2131 V1 = OriV2;
2132 else
2133 return SDValue();
2134
2135 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2136 V2 = OriV1;
2137 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2138 V2 = OriV2;
2139 else
2140 return SDValue();
2141
2142 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2143}
2144
2145/// Lower VECTOR_SHUFFLE into VSHUF.
2146///
2147/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2148/// adding it as an operand to the resulting VSHUF.
2149static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2150 MVT VT, SDValue V1, SDValue V2,
2151 SelectionDAG &DAG,
2152 const LoongArchSubtarget &Subtarget) {
2153
2154 SmallVector<SDValue, 16> Ops;
2155 for (auto M : Mask)
2156 Ops.push_back(Elt: DAG.getSignedConstant(Val: M, DL, VT: Subtarget.getGRLenVT()));
2157
2158 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2159 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
2160
2161 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2162 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2163 // VSHF concatenates the vectors in a bitwise fashion:
2164 // <0b00, 0b01> + <0b10, 0b11> ->
2165 // 0b0100 + 0b1110 -> 0b01001110
2166 // <0b10, 0b11, 0b00, 0b01>
2167 // We must therefore swap the operands to get the correct result.
2168 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2169}
2170
2171/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2172///
2173/// This routine breaks down the specific type of 128-bit shuffle and
2174/// dispatches to the lowering routines accordingly.
2175static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2176 SDValue V1, SDValue V2, SelectionDAG &DAG,
2177 const LoongArchSubtarget &Subtarget) {
2178 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2179 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2180 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2181 "Vector type is unsupported for lsx!");
2182 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2183 "Two operands have different types!");
2184 assert(VT.getVectorNumElements() == Mask.size() &&
2185 "Unexpected mask size for shuffle!");
2186 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2187
2188 APInt KnownUndef, KnownZero;
2189 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2190 APInt Zeroable = KnownUndef | KnownZero;
2191
2192 SDValue Result;
2193 // TODO: Add more comparison patterns.
2194 if (V2.isUndef()) {
2195 if ((Result =
2196 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2197 return Result;
2198 if ((Result =
2199 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2200 return Result;
2201 if ((Result =
2202 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2203 return Result;
2204
2205 // TODO: This comment may be enabled in the future to better match the
2206 // pattern for instruction selection.
2207 /* V2 = V1; */
2208 }
2209
2210 // It is recommended not to change the pattern comparison order for better
2211 // performance.
2212 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2213 return Result;
2214 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2215 return Result;
2216 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2217 return Result;
2218 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2219 return Result;
2220 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2221 return Result;
2222 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2223 return Result;
2224 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2225 (Result =
2226 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2227 return Result;
2228 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2229 Zeroable)))
2230 return Result;
2231 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2232 Zeroable)))
2233 return Result;
2234 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2235 Subtarget)))
2236 return Result;
2237 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2238 return NewShuffle;
2239 if ((Result =
2240 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2241 return Result;
2242 return SDValue();
2243}
2244
2245/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2246///
2247/// It is a XVREPLVEI when the mask is:
2248/// <x, x, x, ..., x+n, x+n, x+n, ...>
2249/// where the number of x is equal to n and n is half the length of vector.
2250///
2251/// When undef's appear in the mask they are treated as if they were whatever
2252/// value is necessary in order to fit the above form.
2253static SDValue
2254lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2255 SDValue V1, SelectionDAG &DAG,
2256 const LoongArchSubtarget &Subtarget) {
2257 int SplatIndex = -1;
2258 for (const auto &M : Mask) {
2259 if (M != -1) {
2260 SplatIndex = M;
2261 break;
2262 }
2263 }
2264
2265 if (SplatIndex == -1)
2266 return DAG.getUNDEF(VT);
2267
2268 const auto &Begin = Mask.begin();
2269 const auto &End = Mask.end();
2270 int HalfSize = Mask.size() / 2;
2271
2272 if (SplatIndex >= HalfSize)
2273 return SDValue();
2274
2275 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2276 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) &&
2277 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize,
2278 ExpectedIndexStride: 0)) {
2279 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
2280 N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
2281 }
2282
2283 return SDValue();
2284}
2285
2286/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2287static SDValue
2288lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2289 SDValue V1, SDValue V2, SelectionDAG &DAG,
2290 const LoongArchSubtarget &Subtarget) {
2291 // When the size is less than or equal to 4, lower cost instructions may be
2292 // used.
2293 if (Mask.size() <= 4)
2294 return SDValue();
2295 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2296}
2297
2298/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2299static SDValue
2300lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2301 SDValue V1, SelectionDAG &DAG,
2302 const LoongArchSubtarget &Subtarget) {
2303 // Only consider XVPERMI_D.
2304 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2305 return SDValue();
2306
2307 unsigned MaskImm = 0;
2308 for (unsigned i = 0; i < Mask.size(); ++i) {
2309 if (Mask[i] == -1)
2310 continue;
2311 MaskImm |= Mask[i] << (i * 2);
2312 }
2313
2314 return DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT, N1: V1,
2315 N2: DAG.getConstant(Val: MaskImm, DL, VT: Subtarget.getGRLenVT()));
2316}
2317
2318/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2319static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
2320 MVT VT, SDValue V1, SelectionDAG &DAG,
2321 const LoongArchSubtarget &Subtarget) {
2322 // LoongArch LASX only have XVPERM_W.
2323 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2324 return SDValue();
2325
2326 unsigned NumElts = VT.getVectorNumElements();
2327 unsigned HalfSize = NumElts / 2;
2328 bool FrontLo = true, FrontHi = true;
2329 bool BackLo = true, BackHi = true;
2330
2331 auto inRange = [](int val, int low, int high) {
2332 return (val == -1) || (val >= low && val < high);
2333 };
2334
2335 for (unsigned i = 0; i < HalfSize; ++i) {
2336 int Fronti = Mask[i];
2337 int Backi = Mask[i + HalfSize];
2338
2339 FrontLo &= inRange(Fronti, 0, HalfSize);
2340 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2341 BackLo &= inRange(Backi, 0, HalfSize);
2342 BackHi &= inRange(Backi, HalfSize, NumElts);
2343 }
2344
2345 // If both the lower and upper 128-bit parts access only one half of the
2346 // vector (either lower or upper), avoid using xvperm.w. The latency of
2347 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2348 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2349 return SDValue();
2350
2351 SmallVector<SDValue, 8> Masks;
2352 MVT GRLenVT = Subtarget.getGRLenVT();
2353 for (unsigned i = 0; i < NumElts; ++i)
2354 Masks.push_back(Elt: Mask[i] == -1 ? DAG.getUNDEF(VT: GRLenVT)
2355 : DAG.getConstant(Val: Mask[i], DL, VT: GRLenVT));
2356 SDValue MaskVec = DAG.getBuildVector(VT: MVT::v8i32, DL, Ops: Masks);
2357
2358 return DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT, N1: V1, N2: MaskVec);
2359}
2360
2361/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2362static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
2363 MVT VT, SDValue V1, SDValue V2,
2364 SelectionDAG &DAG) {
2365 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2366}
2367
2368/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2369static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
2370 MVT VT, SDValue V1, SDValue V2,
2371 SelectionDAG &DAG) {
2372 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2373}
2374
2375/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2376static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
2377 MVT VT, SDValue V1, SDValue V2,
2378 SelectionDAG &DAG) {
2379
2380 const auto &Begin = Mask.begin();
2381 const auto &End = Mask.end();
2382 unsigned HalfSize = Mask.size() / 2;
2383 unsigned LeftSize = HalfSize / 2;
2384 SDValue OriV1 = V1, OriV2 = V2;
2385
2386 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2387 ExpectedIndexStride: 1) &&
2388 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1))
2389 V1 = OriV1;
2390 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize,
2391 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
2392 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
2393 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
2394 V1 = OriV2;
2395 else
2396 return SDValue();
2397
2398 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2399 ExpectedIndexStride: 1) &&
2400 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize,
2401 ExpectedIndexStride: 1))
2402 V2 = OriV1;
2403 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize,
2404 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
2405 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
2406 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
2407 V2 = OriV2;
2408 else
2409 return SDValue();
2410
2411 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2412}
2413
2414/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2415static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
2416 MVT VT, SDValue V1, SDValue V2,
2417 SelectionDAG &DAG) {
2418
2419 const auto &Begin = Mask.begin();
2420 const auto &End = Mask.end();
2421 unsigned HalfSize = Mask.size() / 2;
2422 SDValue OriV1 = V1, OriV2 = V2;
2423
2424 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
2425 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2426 V1 = OriV1;
2427 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) &&
2428 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
2429 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2430 V1 = OriV2;
2431 else
2432 return SDValue();
2433
2434 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
2435 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2436 V2 = OriV1;
2437 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(),
2438 ExpectedIndexStride: 1) &&
2439 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
2440 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2441 V2 = OriV2;
2442 else
2443 return SDValue();
2444
2445 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2446}
2447
2448/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2449static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2450 MVT VT, SDValue V1, SDValue V2,
2451 SelectionDAG &DAG) {
2452
2453 const auto &Begin = Mask.begin();
2454 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2455 const auto &Mid = Mask.begin() + Mask.size() / 2;
2456 const auto &RightMid = Mask.end() - Mask.size() / 4;
2457 const auto &End = Mask.end();
2458 unsigned HalfSize = Mask.size() / 2;
2459 SDValue OriV1 = V1, OriV2 = V2;
2460
2461 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
2462 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
2463 V1 = OriV1;
2464 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
2465 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
2466 V1 = OriV2;
2467 else
2468 return SDValue();
2469
2470 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
2471 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
2472 V2 = OriV1;
2473 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
2474 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
2475 V2 = OriV2;
2476
2477 else
2478 return SDValue();
2479
2480 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2481}
2482
2483/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2484static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2485 MVT VT, SDValue V1, SDValue V2,
2486 SelectionDAG &DAG) {
2487
2488 const auto &Begin = Mask.begin();
2489 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2490 const auto &Mid = Mask.begin() + Mask.size() / 2;
2491 const auto &RightMid = Mask.end() - Mask.size() / 4;
2492 const auto &End = Mask.end();
2493 unsigned HalfSize = Mask.size() / 2;
2494 SDValue OriV1 = V1, OriV2 = V2;
2495
2496 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
2497 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
2498 V1 = OriV1;
2499 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
2500 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1,
2501 ExpectedIndexStride: 2))
2502 V1 = OriV2;
2503 else
2504 return SDValue();
2505
2506 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
2507 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
2508 V2 = OriV1;
2509 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
2510 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1,
2511 ExpectedIndexStride: 2))
2512 V2 = OriV2;
2513 else
2514 return SDValue();
2515
2516 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2517}
2518
2519/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2520static SDValue
2521lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2522 SDValue V1, SDValue V2, SelectionDAG &DAG,
2523 const LoongArchSubtarget &Subtarget) {
2524 // LoongArch LASX only supports xvinsve0.{w/d}.
2525 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2526 VT != MVT::v4f64)
2527 return SDValue();
2528
2529 MVT GRLenVT = Subtarget.getGRLenVT();
2530 int MaskSize = Mask.size();
2531 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2532
2533 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2534 // all other elements are either 'Base + i' or undef (-1). On success, return
2535 // the index of the replaced element. Otherwise, just return -1.
2536 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2537 int Idx = -1;
2538 for (int i = 0; i < MaskSize; ++i) {
2539 if (Mask[i] == Base + i || Mask[i] == -1)
2540 continue;
2541 if (Mask[i] != Replaced)
2542 return -1;
2543 if (Idx == -1)
2544 Idx = i;
2545 else
2546 return -1;
2547 }
2548 return Idx;
2549 };
2550
2551 // Case 1: the lowest element of V2 replaces one element in V1.
2552 int Idx = checkReplaceOne(0, MaskSize);
2553 if (Idx != -1)
2554 return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V1, N2: V2,
2555 N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
2556
2557 // Case 2: the lowest element of V1 replaces one element in V2.
2558 Idx = checkReplaceOne(MaskSize, 0);
2559 if (Idx != -1)
2560 return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V2, N2: V1,
2561 N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
2562
2563 return SDValue();
2564}
2565
2566/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2567static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2568 MVT VT, SDValue V1, SDValue V2,
2569 SelectionDAG &DAG) {
2570
2571 int MaskSize = Mask.size();
2572 int HalfSize = Mask.size() / 2;
2573 const auto &Begin = Mask.begin();
2574 const auto &Mid = Mask.begin() + HalfSize;
2575 const auto &End = Mask.end();
2576
2577 // VECTOR_SHUFFLE concatenates the vectors:
2578 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2579 // shuffling ->
2580 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2581 //
2582 // XVSHUF concatenates the vectors:
2583 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2584 // shuffling ->
2585 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2586 SmallVector<SDValue, 8> MaskAlloc;
2587 for (auto it = Begin; it < Mid; it++) {
2588 if (*it < 0) // UNDEF
2589 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
2590 else if ((*it >= 0 && *it < HalfSize) ||
2591 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2592 int M = *it < HalfSize ? *it : *it - HalfSize;
2593 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2594 } else
2595 return SDValue();
2596 }
2597 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2598
2599 for (auto it = Mid; it < End; it++) {
2600 if (*it < 0) // UNDEF
2601 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
2602 else if ((*it >= HalfSize && *it < MaskSize) ||
2603 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2604 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2605 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2606 } else
2607 return SDValue();
2608 }
2609 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2610
2611 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2612 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
2613 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2614}
2615
2616/// Shuffle vectors by lane to generate more optimized instructions.
2617/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2618///
2619/// Therefore, except for the following four cases, other cases are regarded
2620/// as cross-lane shuffles, where optimization is relatively limited.
2621///
2622/// - Shuffle high, low lanes of two inputs vector
2623/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2624/// - Shuffle low, high lanes of two inputs vector
2625/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2626/// - Shuffle low, low lanes of two inputs vector
2627/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2628/// - Shuffle high, high lanes of two inputs vector
2629/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2630///
2631/// The first case is the closest to LoongArch instructions and the other
2632/// cases need to be converted to it for processing.
2633///
2634/// This function will return true for the last three cases above and will
2635/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2636/// cross-lane shuffle cases.
2637static bool canonicalizeShuffleVectorByLane(
2638 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2639 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2640
2641 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2642
2643 int MaskSize = Mask.size();
2644 int HalfSize = Mask.size() / 2;
2645 MVT GRLenVT = Subtarget.getGRLenVT();
2646
2647 HalfMaskType preMask = None, postMask = None;
2648
2649 if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2650 return M < 0 || (M >= 0 && M < HalfSize) ||
2651 (M >= MaskSize && M < MaskSize + HalfSize);
2652 }))
2653 preMask = HighLaneTy;
2654 else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2655 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2656 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2657 }))
2658 preMask = LowLaneTy;
2659
2660 if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2661 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2662 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2663 }))
2664 postMask = LowLaneTy;
2665 else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2666 return M < 0 || (M >= 0 && M < HalfSize) ||
2667 (M >= MaskSize && M < MaskSize + HalfSize);
2668 }))
2669 postMask = HighLaneTy;
2670
2671 // The pre-half of mask is high lane type, and the post-half of mask
2672 // is low lane type, which is closest to the LoongArch instructions.
2673 //
2674 // Note: In the LoongArch architecture, the high lane of mask corresponds
2675 // to the lower 128-bit of vector register, and the low lane of mask
2676 // corresponds the higher 128-bit of vector register.
2677 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2678 return false;
2679 }
2680 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2681 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2682 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2683 N2: DAG.getConstant(Val: 0b01001110, DL, VT: GRLenVT));
2684 V1 = DAG.getBitcast(VT, V: V1);
2685
2686 if (!V2.isUndef()) {
2687 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2688 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2689 N2: DAG.getConstant(Val: 0b01001110, DL, VT: GRLenVT));
2690 V2 = DAG.getBitcast(VT, V: V2);
2691 }
2692
2693 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2694 *it = *it < 0 ? *it : *it - HalfSize;
2695 }
2696 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2697 *it = *it < 0 ? *it : *it + HalfSize;
2698 }
2699 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2700 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2701 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2702 N2: DAG.getConstant(Val: 0b11101110, DL, VT: GRLenVT));
2703 V1 = DAG.getBitcast(VT, V: V1);
2704
2705 if (!V2.isUndef()) {
2706 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2707 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2708 N2: DAG.getConstant(Val: 0b11101110, DL, VT: GRLenVT));
2709 V2 = DAG.getBitcast(VT, V: V2);
2710 }
2711
2712 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2713 *it = *it < 0 ? *it : *it - HalfSize;
2714 }
2715 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2716 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2717 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2718 N2: DAG.getConstant(Val: 0b01000100, DL, VT: GRLenVT));
2719 V1 = DAG.getBitcast(VT, V: V1);
2720
2721 if (!V2.isUndef()) {
2722 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2723 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2724 N2: DAG.getConstant(Val: 0b01000100, DL, VT: GRLenVT));
2725 V2 = DAG.getBitcast(VT, V: V2);
2726 }
2727
2728 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2729 *it = *it < 0 ? *it : *it + HalfSize;
2730 }
2731 } else { // cross-lane
2732 return false;
2733 }
2734
2735 return true;
2736}
2737
2738/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2739/// Only for 256-bit vector.
2740///
2741/// For example:
2742/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2743/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2744/// is lowerded to:
2745/// (XVPERMI $xr2, $xr0, 78)
2746/// (XVSHUF $xr1, $xr2, $xr0)
2747/// (XVORI $xr0, $xr1, 0)
2748static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2749 ArrayRef<int> Mask,
2750 MVT VT, SDValue V1,
2751 SDValue V2,
2752 SelectionDAG &DAG) {
2753 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2754 int Size = Mask.size();
2755 int LaneSize = Size / 2;
2756
2757 bool LaneCrossing[2] = {false, false};
2758 for (int i = 0; i < Size; ++i)
2759 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2760 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2761
2762 // Ensure that all lanes ared involved.
2763 if (!LaneCrossing[0] && !LaneCrossing[1])
2764 return SDValue();
2765
2766 SmallVector<int> InLaneMask;
2767 InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end());
2768 for (int i = 0; i < Size; ++i) {
2769 int &M = InLaneMask[i];
2770 if (M < 0)
2771 continue;
2772 if (((M % Size) / LaneSize) != (i / LaneSize))
2773 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2774 }
2775
2776 SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2777 Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped,
2778 N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {2, 3, 0, 1});
2779 Flipped = DAG.getBitcast(VT, V: Flipped);
2780 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask);
2781}
2782
2783/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2784///
2785/// This routine breaks down the specific type of 256-bit shuffle and
2786/// dispatches to the lowering routines accordingly.
2787static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2788 SDValue V1, SDValue V2, SelectionDAG &DAG,
2789 const LoongArchSubtarget &Subtarget) {
2790 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2791 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2792 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2793 "Vector type is unsupported for lasx!");
2794 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2795 "Two operands have different types!");
2796 assert(VT.getVectorNumElements() == Mask.size() &&
2797 "Unexpected mask size for shuffle!");
2798 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2799 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2800
2801 APInt KnownUndef, KnownZero;
2802 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2803 APInt Zeroable = KnownUndef | KnownZero;
2804
2805 SDValue Result;
2806 // TODO: Add more comparison patterns.
2807 if (V2.isUndef()) {
2808 if ((Result =
2809 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2810 return Result;
2811 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2812 Subtarget)))
2813 return Result;
2814 // Try to widen vectors to gain more optimization opportunities.
2815 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2816 return NewShuffle;
2817 if ((Result =
2818 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2819 return Result;
2820 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2821 return Result;
2822 if ((Result =
2823 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2824 return Result;
2825
2826 // TODO: This comment may be enabled in the future to better match the
2827 // pattern for instruction selection.
2828 /* V2 = V1; */
2829 }
2830
2831 // It is recommended not to change the pattern comparison order for better
2832 // performance.
2833 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2834 return Result;
2835 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2836 return Result;
2837 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2838 return Result;
2839 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2840 return Result;
2841 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2842 return Result;
2843 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2844 return Result;
2845 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2846 Zeroable)))
2847 return Result;
2848 if ((Result =
2849 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2850 return Result;
2851 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2852 Subtarget)))
2853 return Result;
2854
2855 // canonicalize non cross-lane shuffle vector
2856 SmallVector<int> NewMask(Mask);
2857 if (canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget))
2858 return lower256BitShuffle(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget);
2859
2860 // FIXME: Handling the remaining cases earlier can degrade performance
2861 // in some situations. Further analysis is required to enable more
2862 // effective optimizations.
2863 if (V2.isUndef()) {
2864 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT,
2865 V1, V2, DAG)))
2866 return Result;
2867 }
2868
2869 if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG))
2870 return NewShuffle;
2871 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
2872 return Result;
2873
2874 return SDValue();
2875}
2876
2877SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2878 SelectionDAG &DAG) const {
2879 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
2880 ArrayRef<int> OrigMask = SVOp->getMask();
2881 SDValue V1 = Op.getOperand(i: 0);
2882 SDValue V2 = Op.getOperand(i: 1);
2883 MVT VT = Op.getSimpleValueType();
2884 int NumElements = VT.getVectorNumElements();
2885 SDLoc DL(Op);
2886
2887 bool V1IsUndef = V1.isUndef();
2888 bool V2IsUndef = V2.isUndef();
2889 if (V1IsUndef && V2IsUndef)
2890 return DAG.getUNDEF(VT);
2891
2892 // When we create a shuffle node we put the UNDEF node to second operand,
2893 // but in some cases the first operand may be transformed to UNDEF.
2894 // In this case we should just commute the node.
2895 if (V1IsUndef)
2896 return DAG.getCommutedVectorShuffle(SV: *SVOp);
2897
2898 // Check for non-undef masks pointing at an undef vector and make the masks
2899 // undef as well. This makes it easier to match the shuffle based solely on
2900 // the mask.
2901 if (V2IsUndef &&
2902 any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
2903 SmallVector<int, 8> NewMask(OrigMask);
2904 for (int &M : NewMask)
2905 if (M >= NumElements)
2906 M = -1;
2907 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
2908 }
2909
2910 // Check for illegal shuffle mask element index values.
2911 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2912 (void)MaskUpperLimit;
2913 assert(llvm::all_of(OrigMask,
2914 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2915 "Out of bounds shuffle index");
2916
2917 // For each vector width, delegate to a specialized lowering routine.
2918 if (VT.is128BitVector())
2919 return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
2920
2921 if (VT.is256BitVector())
2922 return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
2923
2924 return SDValue();
2925}
2926
2927SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2928 SelectionDAG &DAG) const {
2929 // Custom lower to ensure the libcall return is passed in an FPR on hard
2930 // float ABIs.
2931 SDLoc DL(Op);
2932 MakeLibCallOptions CallOptions;
2933 SDValue Op0 = Op.getOperand(i: 0);
2934 SDValue Chain = SDValue();
2935 RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
2936 SDValue Res;
2937 std::tie(args&: Res, args&: Chain) =
2938 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
2939 if (Subtarget.is64Bit())
2940 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2941 return DAG.getBitcast(VT: MVT::i32, V: Res);
2942}
2943
2944SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2947 // float ABIs.
2948 SDLoc DL(Op);
2949 MakeLibCallOptions CallOptions;
2950 SDValue Op0 = Op.getOperand(i: 0);
2951 SDValue Chain = SDValue();
2952 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2953 DL, VT: MVT::f32, Operand: Op0)
2954 : DAG.getBitcast(VT: MVT::f32, V: Op0);
2955 SDValue Res;
2956 std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
2957 CallOptions, dl: DL, Chain);
2958 return Res;
2959}
2960
2961SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2962 SelectionDAG &DAG) const {
2963 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2964 SDLoc DL(Op);
2965 MakeLibCallOptions CallOptions;
2966 RTLIB::Libcall LC =
2967 RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::bf16);
2968 SDValue Res =
2969 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first;
2970 if (Subtarget.is64Bit())
2971 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2972 return DAG.getBitcast(VT: MVT::i32, V: Res);
2973}
2974
2975SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2976 SelectionDAG &DAG) const {
2977 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2978 MVT VT = Op.getSimpleValueType();
2979 SDLoc DL(Op);
2980 Op = DAG.getNode(
2981 Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0),
2982 N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL));
2983 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2984 DL, VT: MVT::f32, Operand: Op)
2985 : DAG.getBitcast(VT: MVT::f32, V: Op);
2986 if (VT != MVT::f32)
2987 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
2988 return Res;
2989}
2990
2991// Lower BUILD_VECTOR as broadcast load (if possible).
2992// For example:
2993// %a = load i8, ptr %ptr
2994// %b = build_vector %a, %a, %a, %a
2995// is lowered to :
2996// (VLDREPL_B $a0, 0)
2997static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
2998 const SDLoc &DL,
2999 SelectionDAG &DAG) {
3000 MVT VT = BVOp->getSimpleValueType(ResNo: 0);
3001 int NumOps = BVOp->getNumOperands();
3002
3003 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3004 "Unsupported vector type for broadcast.");
3005
3006 SDValue IdentitySrc;
3007 bool IsIdeneity = true;
3008
3009 for (int i = 0; i != NumOps; i++) {
3010 SDValue Op = BVOp->getOperand(Num: i);
3011 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3012 IsIdeneity = false;
3013 break;
3014 }
3015 IdentitySrc = BVOp->getOperand(Num: 0);
3016 }
3017
3018 // make sure that this load is valid and only has one user.
3019 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(N: IdentitySrc.getNode()))
3020 return SDValue();
3021
3022 auto *LN = cast<LoadSDNode>(Val&: IdentitySrc);
3023 auto ExtType = LN->getExtensionType();
3024
3025 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3026 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3027 // Indexed loads and stores are not supported on LoongArch.
3028 assert(LN->isUnindexed() && "Unexpected indexed load.");
3029
3030 SDVTList Tys = DAG.getVTList(VT1: VT, VT2: MVT::Other);
3031 // The offset operand of unindexed load is always undefined, so there is
3032 // no need to pass it to VLDREPL.
3033 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3034 SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops);
3035 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN, 1), To: BCast.getValue(R: 1));
3036 return BCast;
3037 }
3038 return SDValue();
3039}
3040
3041// Sequentially insert elements from Ops into Vector, from low to high indices.
3042// Note: Ops can have fewer elements than Vector.
3043static void fillVector(ArrayRef<SDValue> Ops, SelectionDAG &DAG, SDLoc DL,
3044 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3045 EVT ResTy) {
3046 assert(Ops.size() <= ResTy.getVectorNumElements());
3047
3048 SDValue Op0 = Ops[0];
3049 if (!Op0.isUndef())
3050 Vector = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: ResTy, Operand: Op0);
3051 for (unsigned i = 1; i < Ops.size(); ++i) {
3052 SDValue Opi = Ops[i];
3053 if (Opi.isUndef())
3054 continue;
3055 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, N2: Opi,
3056 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3057 }
3058}
3059
3060// Build a ResTy subvector from Node, taking NumElts elements starting at index
3061// 'first'.
3062static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
3063 SelectionDAG &DAG, SDLoc DL,
3064 const LoongArchSubtarget &Subtarget,
3065 EVT ResTy, unsigned first) {
3066 unsigned NumElts = ResTy.getVectorNumElements();
3067
3068 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3069
3070 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3071 Node->op_begin() + first + NumElts);
3072 SDValue Vector = DAG.getUNDEF(VT: ResTy);
3073 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3074 return Vector;
3075}
3076
3077SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3078 SelectionDAG &DAG) const {
3079 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
3080 MVT VT = Node->getSimpleValueType(ResNo: 0);
3081 EVT ResTy = Op->getValueType(ResNo: 0);
3082 unsigned NumElts = ResTy.getVectorNumElements();
3083 SDLoc DL(Op);
3084 APInt SplatValue, SplatUndef;
3085 unsigned SplatBitSize;
3086 bool HasAnyUndefs;
3087 bool IsConstant = false;
3088 bool UseSameConstant = true;
3089 SDValue ConstantValue;
3090 bool Is128Vec = ResTy.is128BitVector();
3091 bool Is256Vec = ResTy.is256BitVector();
3092
3093 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3094 (!Subtarget.hasExtLASX() || !Is256Vec))
3095 return SDValue();
3096
3097 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG))
3098 return Result;
3099
3100 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3101 /*MinSplatBits=*/8) &&
3102 SplatBitSize <= 64) {
3103 // We can only cope with 8, 16, 32, or 64-bit elements.
3104 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3105 SplatBitSize != 64)
3106 return SDValue();
3107
3108 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3109 // We can only handle 64-bit elements that are within
3110 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3111 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3112 if (!SplatValue.isSignedIntN(N: 10) &&
3113 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3114 return SDValue();
3115 if ((Is128Vec && ResTy == MVT::v4i32) ||
3116 (Is256Vec && ResTy == MVT::v8i32))
3117 return Op;
3118 }
3119
3120 EVT ViaVecTy;
3121
3122 switch (SplatBitSize) {
3123 default:
3124 return SDValue();
3125 case 8:
3126 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3127 break;
3128 case 16:
3129 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3130 break;
3131 case 32:
3132 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3133 break;
3134 case 64:
3135 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3136 break;
3137 }
3138
3139 // SelectionDAG::getConstant will promote SplatValue appropriately.
3140 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
3141
3142 // Bitcast to the type we originally wanted.
3143 if (ViaVecTy != ResTy)
3144 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
3145
3146 return Result;
3147 }
3148
3149 if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false))
3150 return Op;
3151
3152 for (unsigned i = 0; i < NumElts; ++i) {
3153 SDValue Opi = Node->getOperand(Num: i);
3154 if (isIntOrFPConstant(V: Opi)) {
3155 IsConstant = true;
3156 if (!ConstantValue.getNode())
3157 ConstantValue = Opi;
3158 else if (ConstantValue != Opi)
3159 UseSameConstant = false;
3160 }
3161 }
3162
3163 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3164 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3165 SDValue Result = DAG.getSplatBuildVector(VT: ResTy, DL, Op: ConstantValue);
3166 for (unsigned i = 0; i < NumElts; ++i) {
3167 SDValue Opi = Node->getOperand(Num: i);
3168 if (!isIntOrFPConstant(V: Opi))
3169 Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Result, N2: Opi,
3170 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3171 }
3172 return Result;
3173 }
3174
3175 if (!IsConstant) {
3176 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3177 // the sub-sequence of the vector and then broadcast the sub-sequence.
3178 //
3179 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3180 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3181 // generates worse code in some cases. This could be further optimized
3182 // with more consideration.
3183 SmallVector<SDValue> Sequence;
3184 BitVector UndefElements;
3185 if (Node->getRepeatedSequence(Sequence, UndefElements: &UndefElements) &&
3186 UndefElements.count() == 0) {
3187 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3188 // because the high part can be simply treated as undef.
3189 SDValue Vector = DAG.getUNDEF(VT: ResTy);
3190 EVT FillTy = Is256Vec
3191 ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext())
3192 : ResTy;
3193 SDValue FillVec =
3194 Is256Vec ? DAG.getExtractSubvector(DL, VT: FillTy, Vec: Vector, Idx: 0) : Vector;
3195
3196 fillVector(Ops: Sequence, DAG, DL, Subtarget, Vector&: FillVec, ResTy: FillTy);
3197
3198 unsigned SeqLen = Sequence.size();
3199 unsigned SplatLen = NumElts / SeqLen;
3200 MVT SplatEltTy = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits() * SeqLen);
3201 MVT SplatTy = MVT::getVectorVT(VT: SplatEltTy, NumElements: SplatLen);
3202
3203 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3204 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3205 if (SplatEltTy == MVT::i128)
3206 SplatTy = MVT::v4i64;
3207
3208 SDValue SplatVec;
3209 SDValue SrcVec = DAG.getBitcast(
3210 VT: SplatTy,
3211 V: Is256Vec ? DAG.getInsertSubvector(DL, Vec: Vector, SubVec: FillVec, Idx: 0) : FillVec);
3212 if (Is256Vec) {
3213 SplatVec =
3214 DAG.getNode(Opcode: (SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3215 : LoongArchISD::XVREPLVE0,
3216 DL, VT: SplatTy, Operand: SrcVec);
3217 } else {
3218 SplatVec = DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT: SplatTy, N1: SrcVec,
3219 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()));
3220 }
3221
3222 return DAG.getBitcast(VT: ResTy, V: SplatVec);
3223 }
3224
3225 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3226 // using memory operations is much lower.
3227 //
3228 // For 256-bit vectors, normally split into two halves and concatenate.
3229 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3230 // one non-undef element, skip spliting to avoid a worse result.
3231 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3232 ResTy == MVT::v4f64) {
3233 unsigned NonUndefCount = 0;
3234 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3235 if (!Node->getOperand(Num: i).isUndef()) {
3236 ++NonUndefCount;
3237 if (NonUndefCount > 1)
3238 break;
3239 }
3240 }
3241 if (NonUndefCount == 1)
3242 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, first: 0);
3243 }
3244
3245 EVT VecTy =
3246 Is256Vec ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext()) : ResTy;
3247 SDValue Vector =
3248 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy: VecTy, first: 0);
3249
3250 if (Is128Vec)
3251 return Vector;
3252
3253 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3254 ResTy: VecTy, first: NumElts / 2);
3255
3256 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResTy, N1: Vector, N2: VectorHi);
3257 }
3258
3259 return SDValue();
3260}
3261
3262SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 SDLoc DL(Op);
3265 MVT ResVT = Op.getSimpleValueType();
3266 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3267
3268 unsigned NumOperands = Op.getNumOperands();
3269 unsigned NumFreezeUndef = 0;
3270 unsigned NumZero = 0;
3271 unsigned NumNonZero = 0;
3272 unsigned NonZeros = 0;
3273 SmallSet<SDValue, 4> Undefs;
3274 for (unsigned i = 0; i != NumOperands; ++i) {
3275 SDValue SubVec = Op.getOperand(i);
3276 if (SubVec.isUndef())
3277 continue;
3278 if (ISD::isFreezeUndef(N: SubVec.getNode())) {
3279 // If the freeze(undef) has multiple uses then we must fold to zero.
3280 if (SubVec.hasOneUse()) {
3281 ++NumFreezeUndef;
3282 } else {
3283 ++NumZero;
3284 Undefs.insert(V: SubVec);
3285 }
3286 } else if (ISD::isBuildVectorAllZeros(N: SubVec.getNode()))
3287 ++NumZero;
3288 else {
3289 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3290 NonZeros |= 1 << i;
3291 ++NumNonZero;
3292 }
3293 }
3294
3295 // If we have more than 2 non-zeros, build each half separately.
3296 if (NumNonZero > 2) {
3297 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3298 ArrayRef<SDUse> Ops = Op->ops();
3299 SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3300 Ops: Ops.slice(N: 0, M: NumOperands / 2));
3301 SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3302 Ops: Ops.slice(N: NumOperands / 2));
3303 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResVT, N1: Lo, N2: Hi);
3304 }
3305
3306 // Otherwise, build it up through insert_subvectors.
3307 SDValue Vec = NumZero ? DAG.getConstant(Val: 0, DL, VT: ResVT)
3308 : (NumFreezeUndef ? DAG.getFreeze(V: DAG.getUNDEF(VT: ResVT))
3309 : DAG.getUNDEF(VT: ResVT));
3310
3311 // Replace Undef operands with ZeroVector.
3312 for (SDValue U : Undefs)
3313 DAG.ReplaceAllUsesWith(From: U, To: DAG.getConstant(Val: 0, DL, VT: U.getSimpleValueType()));
3314
3315 MVT SubVT = Op.getOperand(i: 0).getSimpleValueType();
3316 unsigned NumSubElems = SubVT.getVectorNumElements();
3317 for (unsigned i = 0; i != NumOperands; ++i) {
3318 if ((NonZeros & (1 << i)) == 0)
3319 continue;
3320
3321 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ResVT, N1: Vec, N2: Op.getOperand(i),
3322 N3: DAG.getVectorIdxConstant(Val: i * NumSubElems, DL));
3323 }
3324
3325 return Vec;
3326}
3327
3328SDValue
3329LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3330 SelectionDAG &DAG) const {
3331 MVT EltVT = Op.getSimpleValueType();
3332 SDValue Vec = Op->getOperand(Num: 0);
3333 EVT VecTy = Vec->getValueType(ResNo: 0);
3334 SDValue Idx = Op->getOperand(Num: 1);
3335 SDLoc DL(Op);
3336 MVT GRLenVT = Subtarget.getGRLenVT();
3337
3338 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3339
3340 if (isa<ConstantSDNode>(Val: Idx))
3341 return Op;
3342
3343 switch (VecTy.getSimpleVT().SimpleTy) {
3344 default:
3345 llvm_unreachable("Unexpected type");
3346 case MVT::v32i8:
3347 case MVT::v16i16:
3348 case MVT::v4i64:
3349 case MVT::v4f64: {
3350 // Extract the high half subvector and place it to the low half of a new
3351 // vector. It doesn't matter what the high half of the new vector is.
3352 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
3353 SDValue VecHi =
3354 DAG.getExtractSubvector(DL, VT: HalfTy, Vec, Idx: HalfTy.getVectorNumElements());
3355 SDValue TmpVec =
3356 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecTy, N1: DAG.getUNDEF(VT: VecTy),
3357 N2: VecHi, N3: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3358
3359 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3360 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3361 // desired element.
3362 SDValue IdxCp =
3363 Subtarget.is64Bit()
3364 ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Idx)
3365 : DAG.getBitcast(VT: MVT::f32, V: Idx);
3366 SDValue IdxVec = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v8f32, Operand: IdxCp);
3367 SDValue MaskVec =
3368 DAG.getBitcast(VT: (VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, V: IdxVec);
3369 SDValue ResVec =
3370 DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT: VecTy, N1: MaskVec, N2: TmpVec, N3: Vec);
3371
3372 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: ResVec,
3373 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3374 }
3375 case MVT::v8i32:
3376 case MVT::v8f32: {
3377 SDValue SplatIdx = DAG.getSplatBuildVector(VT: MVT::v8i32, DL, Op: Idx);
3378 SDValue SplatValue =
3379 DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT: VecTy, N1: Vec, N2: SplatIdx);
3380
3381 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: SplatValue,
3382 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3383 }
3384 }
3385}
3386
3387SDValue
3388LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3389 SelectionDAG &DAG) const {
3390 MVT VT = Op.getSimpleValueType();
3391 MVT EltVT = VT.getVectorElementType();
3392 unsigned NumElts = VT.getVectorNumElements();
3393 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3394 SDLoc DL(Op);
3395 SDValue Op0 = Op.getOperand(i: 0);
3396 SDValue Op1 = Op.getOperand(i: 1);
3397 SDValue Op2 = Op.getOperand(i: 2);
3398
3399 if (isa<ConstantSDNode>(Val: Op2))
3400 return Op;
3401
3402 MVT IdxTy = MVT::getIntegerVT(BitWidth: EltSizeInBits);
3403 MVT IdxVTy = MVT::getVectorVT(VT: IdxTy, NumElements: NumElts);
3404
3405 if (!isTypeLegal(VT) || !isTypeLegal(VT: IdxVTy))
3406 return SDValue();
3407
3408 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op: Op1);
3409 SmallVector<SDValue, 32> RawIndices;
3410 SDValue SplatIdx;
3411 SDValue Indices;
3412
3413 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3414 MVT PairVTy = MVT::getVectorVT(VT: MVT::i32, NumElements: NumElts * 2);
3415 for (unsigned i = 0; i < NumElts; ++i) {
3416 RawIndices.push_back(Elt: Op2);
3417 RawIndices.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
3418 }
3419 SplatIdx = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3420 SplatIdx = DAG.getBitcast(VT: IdxVTy, V: SplatIdx);
3421
3422 RawIndices.clear();
3423 for (unsigned i = 0; i < NumElts; ++i) {
3424 RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: MVT::i32));
3425 RawIndices.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
3426 }
3427 Indices = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3428 Indices = DAG.getBitcast(VT: IdxVTy, V: Indices);
3429 } else {
3430 SplatIdx = DAG.getSplatBuildVector(VT: IdxVTy, DL, Op: Op2);
3431
3432 for (unsigned i = 0; i < NumElts; ++i)
3433 RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3434 Indices = DAG.getBuildVector(VT: IdxVTy, DL, Ops: RawIndices);
3435 }
3436
3437 // insert vec, elt, idx
3438 // =>
3439 // select (splatidx == {0,1,2...}) ? splatelt : vec
3440 SDValue SelectCC =
3441 DAG.getSetCC(DL, VT: IdxVTy, LHS: SplatIdx, RHS: Indices, Cond: ISD::CondCode::SETEQ);
3442 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectCC, N2: SplatElt, N3: Op0);
3443}
3444
3445SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3446 SelectionDAG &DAG) const {
3447 SDLoc DL(Op);
3448 SyncScope::ID FenceSSID =
3449 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
3450
3451 // singlethread fences only synchronize with signal handlers on the same
3452 // thread and thus only need to preserve instruction order, not actually
3453 // enforce memory ordering.
3454 if (FenceSSID == SyncScope::SingleThread)
3455 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3456 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
3457
3458 return Op;
3459}
3460
3461SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3462 SelectionDAG &DAG) const {
3463
3464 if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) {
3465 DAG.getContext()->emitError(
3466 ErrorStr: "On LA64, only 64-bit registers can be written.");
3467 return Op.getOperand(i: 0);
3468 }
3469
3470 if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) {
3471 DAG.getContext()->emitError(
3472 ErrorStr: "On LA32, only 32-bit registers can be written.");
3473 return Op.getOperand(i: 0);
3474 }
3475
3476 return Op;
3477}
3478
3479SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3480 SelectionDAG &DAG) const {
3481 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
3482 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
3483 "be a constant integer");
3484 return SDValue();
3485 }
3486
3487 MachineFunction &MF = DAG.getMachineFunction();
3488 MF.getFrameInfo().setFrameAddressIsTaken(true);
3489 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3490 EVT VT = Op.getValueType();
3491 SDLoc DL(Op);
3492 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
3493 unsigned Depth = Op.getConstantOperandVal(i: 0);
3494 int GRLenInBytes = Subtarget.getGRLen() / 8;
3495
3496 while (Depth--) {
3497 int Offset = -(GRLenInBytes * 2);
3498 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
3499 N2: DAG.getSignedConstant(Val: Offset, DL, VT));
3500 FrameAddr =
3501 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
3502 }
3503 return FrameAddr;
3504}
3505
3506SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3507 SelectionDAG &DAG) const {
3508 // Currently only support lowering return address for current frame.
3509 if (Op.getConstantOperandVal(i: 0) != 0) {
3510 DAG.getContext()->emitError(
3511 ErrorStr: "return address can only be determined for the current frame");
3512 return SDValue();
3513 }
3514
3515 MachineFunction &MF = DAG.getMachineFunction();
3516 MF.getFrameInfo().setReturnAddressIsTaken(true);
3517 MVT GRLenVT = Subtarget.getGRLenVT();
3518
3519 // Return the value of the return address register, marking it an implicit
3520 // live-in.
3521 Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
3522 RC: getRegClassFor(VT: GRLenVT));
3523 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT);
3524}
3525
3526SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3527 SelectionDAG &DAG) const {
3528 MachineFunction &MF = DAG.getMachineFunction();
3529 auto Size = Subtarget.getGRLen() / 8;
3530 auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false);
3531 return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
3532}
3533
3534SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3535 SelectionDAG &DAG) const {
3536 MachineFunction &MF = DAG.getMachineFunction();
3537 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3538
3539 SDLoc DL(Op);
3540 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
3541 VT: getPointerTy(DL: MF.getDataLayout()));
3542
3543 // vastart just stores the address of the VarArgsFrameIndex slot into the
3544 // memory location argument.
3545 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
3546 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
3547 PtrInfo: MachinePointerInfo(SV));
3548}
3549
3550SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3551 SelectionDAG &DAG) const {
3552 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3553 !Subtarget.hasBasicD() && "unexpected target features");
3554
3555 SDLoc DL(Op);
3556 SDValue Op0 = Op.getOperand(i: 0);
3557 if (Op0->getOpcode() == ISD::AND) {
3558 auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
3559 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3560 return Op;
3561 }
3562
3563 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3564 Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) &&
3565 Op0.getConstantOperandVal(i: 2) == UINT64_C(0))
3566 return Op;
3567
3568 if (Op0.getOpcode() == ISD::AssertZext &&
3569 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32))
3570 return Op;
3571
3572 EVT OpVT = Op0.getValueType();
3573 EVT RetVT = Op.getValueType();
3574 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3575 MakeLibCallOptions CallOptions;
3576 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
3577 SDValue Chain = SDValue();
3578 SDValue Result;
3579 std::tie(args&: Result, args&: Chain) =
3580 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
3581 return Result;
3582}
3583
3584SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3585 SelectionDAG &DAG) const {
3586 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3587 !Subtarget.hasBasicD() && "unexpected target features");
3588
3589 SDLoc DL(Op);
3590 SDValue Op0 = Op.getOperand(i: 0);
3591
3592 if ((Op0.getOpcode() == ISD::AssertSext ||
3593 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
3594 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32))
3595 return Op;
3596
3597 EVT OpVT = Op0.getValueType();
3598 EVT RetVT = Op.getValueType();
3599 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3600 MakeLibCallOptions CallOptions;
3601 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
3602 SDValue Chain = SDValue();
3603 SDValue Result;
3604 std::tie(args&: Result, args&: Chain) =
3605 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
3606 return Result;
3607}
3608
3609SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3610 SelectionDAG &DAG) const {
3611
3612 SDLoc DL(Op);
3613 EVT VT = Op.getValueType();
3614 SDValue Op0 = Op.getOperand(i: 0);
3615 EVT Op0VT = Op0.getValueType();
3616
3617 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3618 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3619 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
3620 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
3621 }
3622 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3623 SDValue Lo, Hi;
3624 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
3625 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
3626 }
3627 return Op;
3628}
3629
3630SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3631 SelectionDAG &DAG) const {
3632
3633 SDLoc DL(Op);
3634 SDValue Op0 = Op.getOperand(i: 0);
3635
3636 if (Op0.getValueType() == MVT::f16)
3637 Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
3638
3639 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3640 !Subtarget.hasBasicD()) {
3641 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0);
3642 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
3643 }
3644
3645 EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
3646 SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0);
3647 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
3648}
3649
3650static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3651 SelectionDAG &DAG, unsigned Flags) {
3652 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
3653}
3654
3655static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3656 SelectionDAG &DAG, unsigned Flags) {
3657 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
3658 TargetFlags: Flags);
3659}
3660
3661static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3662 SelectionDAG &DAG, unsigned Flags) {
3663 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
3664 Offset: N->getOffset(), TargetFlags: Flags);
3665}
3666
3667static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3668 SelectionDAG &DAG, unsigned Flags) {
3669 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
3670}
3671
3672template <class NodeTy>
3673SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3674 CodeModel::Model M,
3675 bool IsLocal) const {
3676 SDLoc DL(N);
3677 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3678 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3679 SDValue Load;
3680
3681 switch (M) {
3682 default:
3683 report_fatal_error(reason: "Unsupported code model");
3684
3685 case CodeModel::Large: {
3686 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3687
3688 // This is not actually used, but is necessary for successfully matching
3689 // the PseudoLA_*_LARGE nodes.
3690 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3691 if (IsLocal) {
3692 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3693 // eventually becomes the desired 5-insn code sequence.
3694 Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
3695 Op1: Tmp, Op2: Addr),
3696 0);
3697 } else {
3698 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3699 // eventually becomes the desired 5-insn code sequence.
3700 Load = SDValue(
3701 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
3702 0);
3703 }
3704 break;
3705 }
3706
3707 case CodeModel::Small:
3708 case CodeModel::Medium:
3709 if (IsLocal) {
3710 // This generates the pattern (PseudoLA_PCREL sym), which
3711 //
3712 // for la32r expands to:
3713 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3714 //
3715 // for la32s and la64 expands to:
3716 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3717 Load = SDValue(
3718 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0);
3719 } else {
3720 // This generates the pattern (PseudoLA_GOT sym), which
3721 //
3722 // for la32r expands to:
3723 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3724 //
3725 // for la32s and la64 expands to:
3726 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3727 Load =
3728 SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0);
3729 }
3730 }
3731
3732 if (!IsLocal) {
3733 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3734 MachineFunction &MF = DAG.getMachineFunction();
3735 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3736 PtrInfo: MachinePointerInfo::getGOT(MF),
3737 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3738 MachineMemOperand::MOInvariant,
3739 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
3740 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
3741 }
3742
3743 return Load;
3744}
3745
3746SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3747 SelectionDAG &DAG) const {
3748 return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
3749 M: DAG.getTarget().getCodeModel());
3750}
3751
3752SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3753 SelectionDAG &DAG) const {
3754 return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
3755 M: DAG.getTarget().getCodeModel());
3756}
3757
3758SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3759 SelectionDAG &DAG) const {
3760 return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
3761 M: DAG.getTarget().getCodeModel());
3762}
3763
3764SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3765 SelectionDAG &DAG) const {
3766 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
3767 assert(N->getOffset() == 0 && "unexpected offset in global node");
3768 auto CM = DAG.getTarget().getCodeModel();
3769 const GlobalValue *GV = N->getGlobal();
3770
3771 if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
3772 if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
3773 CM = *GCM;
3774 }
3775
3776 return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
3777}
3778
3779SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3780 SelectionDAG &DAG,
3781 unsigned Opc, bool UseGOT,
3782 bool Large) const {
3783 SDLoc DL(N);
3784 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3785 MVT GRLenVT = Subtarget.getGRLenVT();
3786
3787 // This is not actually used, but is necessary for successfully matching the
3788 // PseudoLA_*_LARGE nodes.
3789 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3790 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
3791
3792 // Only IE needs an extra argument for large code model.
3793 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3794 ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
3795 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
3796
3797 // If it is LE for normal/medium code model, the add tp operation will occur
3798 // during the pseudo-instruction expansion.
3799 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3800 return Offset;
3801
3802 if (UseGOT) {
3803 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3804 MachineFunction &MF = DAG.getMachineFunction();
3805 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3806 PtrInfo: MachinePointerInfo::getGOT(MF),
3807 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3808 MachineMemOperand::MOInvariant,
3809 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
3810 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
3811 }
3812
3813 // Add the thread pointer.
3814 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
3815 N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
3816}
3817
3818SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3819 SelectionDAG &DAG,
3820 unsigned Opc,
3821 bool Large) const {
3822 SDLoc DL(N);
3823 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3824 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
3825
3826 // This is not actually used, but is necessary for successfully matching the
3827 // PseudoLA_*_LARGE nodes.
3828 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3829
3830 // Use a PC-relative addressing mode to access the dynamic GOT address.
3831 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
3832 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
3833 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
3834
3835 // Prepare argument list to generate call.
3836 ArgListTy Args;
3837 Args.emplace_back(args&: Load, args&: CallTy);
3838
3839 // Setup call to __tls_get_addr.
3840 TargetLowering::CallLoweringInfo CLI(DAG);
3841 CLI.setDebugLoc(DL)
3842 .setChain(DAG.getEntryNode())
3843 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
3844 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
3845 ArgsList: std::move(Args));
3846
3847 return LowerCallTo(CLI).first;
3848}
3849
3850SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3851 SelectionDAG &DAG, unsigned Opc,
3852 bool Large) const {
3853 SDLoc DL(N);
3854 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3855 const GlobalValue *GV = N->getGlobal();
3856
3857 // This is not actually used, but is necessary for successfully matching the
3858 // PseudoLA_*_LARGE nodes.
3859 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3860
3861 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3862 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3863 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
3864 return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
3865 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
3866}
3867
3868SDValue
3869LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3870 SelectionDAG &DAG) const {
3871 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3872 CallingConv::GHC)
3873 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
3874
3875 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3876 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3877
3878 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
3879 assert(N->getOffset() == 0 && "unexpected offset in global node");
3880
3881 if (DAG.getTarget().useEmulatedTLS())
3882 reportFatalUsageError(reason: "the emulated TLS is prohibited");
3883
3884 bool IsDesc = DAG.getTarget().useTLSDESC();
3885
3886 switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
3887 case TLSModel::GeneralDynamic:
3888 // In this model, application code calls the dynamic linker function
3889 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3890 // runtime.
3891 if (!IsDesc)
3892 return getDynamicTLSAddr(N, DAG,
3893 Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3894 : LoongArch::PseudoLA_TLS_GD,
3895 Large);
3896 break;
3897 case TLSModel::LocalDynamic:
3898 // Same as GeneralDynamic, except for assembly modifiers and relocation
3899 // records.
3900 if (!IsDesc)
3901 return getDynamicTLSAddr(N, DAG,
3902 Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3903 : LoongArch::PseudoLA_TLS_LD,
3904 Large);
3905 break;
3906 case TLSModel::InitialExec:
3907 // This model uses the GOT to resolve TLS offsets.
3908 return getStaticTLSAddr(N, DAG,
3909 Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3910 : LoongArch::PseudoLA_TLS_IE,
3911 /*UseGOT=*/true, Large);
3912 case TLSModel::LocalExec:
3913 // This model is used when static linking as the TLS offsets are resolved
3914 // during program linking.
3915 //
3916 // This node doesn't need an extra argument for the large code model.
3917 return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
3918 /*UseGOT=*/false, Large);
3919 }
3920
3921 return getTLSDescAddr(N, DAG,
3922 Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3923 : LoongArch::PseudoLA_TLS_DESC,
3924 Large);
3925}
3926
3927template <unsigned N>
3928static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3929 SelectionDAG &DAG, bool IsSigned = false) {
3930 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
3931 // Check the ImmArg.
3932 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3933 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3934 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) +
3935 ": argument out of range.");
3936 return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType());
3937 }
3938 return SDValue();
3939}
3940
3941SDValue
3942LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3943 SelectionDAG &DAG) const {
3944 switch (Op.getConstantOperandVal(i: 0)) {
3945 default:
3946 return SDValue(); // Don't custom lower most intrinsics.
3947 case Intrinsic::thread_pointer: {
3948 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3949 return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
3950 }
3951 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3952 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3953 case Intrinsic::loongarch_lsx_vreplvei_d:
3954 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3955 return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG);
3956 case Intrinsic::loongarch_lsx_vreplvei_w:
3957 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3958 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3959 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3960 case Intrinsic::loongarch_lasx_xvpickve_d:
3961 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3962 return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG);
3963 case Intrinsic::loongarch_lasx_xvinsve0_d:
3964 return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG);
3965 case Intrinsic::loongarch_lsx_vsat_b:
3966 case Intrinsic::loongarch_lsx_vsat_bu:
3967 case Intrinsic::loongarch_lsx_vrotri_b:
3968 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3969 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3970 case Intrinsic::loongarch_lsx_vsrlri_b:
3971 case Intrinsic::loongarch_lsx_vsrari_b:
3972 case Intrinsic::loongarch_lsx_vreplvei_h:
3973 case Intrinsic::loongarch_lasx_xvsat_b:
3974 case Intrinsic::loongarch_lasx_xvsat_bu:
3975 case Intrinsic::loongarch_lasx_xvrotri_b:
3976 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3977 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3978 case Intrinsic::loongarch_lasx_xvsrlri_b:
3979 case Intrinsic::loongarch_lasx_xvsrari_b:
3980 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3981 case Intrinsic::loongarch_lasx_xvpickve_w:
3982 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3983 return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG);
3984 case Intrinsic::loongarch_lasx_xvinsve0_w:
3985 return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG);
3986 case Intrinsic::loongarch_lsx_vsat_h:
3987 case Intrinsic::loongarch_lsx_vsat_hu:
3988 case Intrinsic::loongarch_lsx_vrotri_h:
3989 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3990 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3991 case Intrinsic::loongarch_lsx_vsrlri_h:
3992 case Intrinsic::loongarch_lsx_vsrari_h:
3993 case Intrinsic::loongarch_lsx_vreplvei_b:
3994 case Intrinsic::loongarch_lasx_xvsat_h:
3995 case Intrinsic::loongarch_lasx_xvsat_hu:
3996 case Intrinsic::loongarch_lasx_xvrotri_h:
3997 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3998 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3999 case Intrinsic::loongarch_lasx_xvsrlri_h:
4000 case Intrinsic::loongarch_lasx_xvsrari_h:
4001 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4002 return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG);
4003 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4004 case Intrinsic::loongarch_lsx_vsrani_b_h:
4005 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4006 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4007 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4008 case Intrinsic::loongarch_lsx_vssrani_b_h:
4009 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4010 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4011 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4012 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4013 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4014 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4015 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4016 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4017 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4018 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4019 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4020 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4021 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4022 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4023 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4024 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4025 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4026 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4027 return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG);
4028 case Intrinsic::loongarch_lsx_vsat_w:
4029 case Intrinsic::loongarch_lsx_vsat_wu:
4030 case Intrinsic::loongarch_lsx_vrotri_w:
4031 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4032 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4033 case Intrinsic::loongarch_lsx_vsrlri_w:
4034 case Intrinsic::loongarch_lsx_vsrari_w:
4035 case Intrinsic::loongarch_lsx_vslei_bu:
4036 case Intrinsic::loongarch_lsx_vslei_hu:
4037 case Intrinsic::loongarch_lsx_vslei_wu:
4038 case Intrinsic::loongarch_lsx_vslei_du:
4039 case Intrinsic::loongarch_lsx_vslti_bu:
4040 case Intrinsic::loongarch_lsx_vslti_hu:
4041 case Intrinsic::loongarch_lsx_vslti_wu:
4042 case Intrinsic::loongarch_lsx_vslti_du:
4043 case Intrinsic::loongarch_lsx_vbsll_v:
4044 case Intrinsic::loongarch_lsx_vbsrl_v:
4045 case Intrinsic::loongarch_lasx_xvsat_w:
4046 case Intrinsic::loongarch_lasx_xvsat_wu:
4047 case Intrinsic::loongarch_lasx_xvrotri_w:
4048 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4049 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4050 case Intrinsic::loongarch_lasx_xvsrlri_w:
4051 case Intrinsic::loongarch_lasx_xvsrari_w:
4052 case Intrinsic::loongarch_lasx_xvslei_bu:
4053 case Intrinsic::loongarch_lasx_xvslei_hu:
4054 case Intrinsic::loongarch_lasx_xvslei_wu:
4055 case Intrinsic::loongarch_lasx_xvslei_du:
4056 case Intrinsic::loongarch_lasx_xvslti_bu:
4057 case Intrinsic::loongarch_lasx_xvslti_hu:
4058 case Intrinsic::loongarch_lasx_xvslti_wu:
4059 case Intrinsic::loongarch_lasx_xvslti_du:
4060 case Intrinsic::loongarch_lasx_xvbsll_v:
4061 case Intrinsic::loongarch_lasx_xvbsrl_v:
4062 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG);
4063 case Intrinsic::loongarch_lsx_vseqi_b:
4064 case Intrinsic::loongarch_lsx_vseqi_h:
4065 case Intrinsic::loongarch_lsx_vseqi_w:
4066 case Intrinsic::loongarch_lsx_vseqi_d:
4067 case Intrinsic::loongarch_lsx_vslei_b:
4068 case Intrinsic::loongarch_lsx_vslei_h:
4069 case Intrinsic::loongarch_lsx_vslei_w:
4070 case Intrinsic::loongarch_lsx_vslei_d:
4071 case Intrinsic::loongarch_lsx_vslti_b:
4072 case Intrinsic::loongarch_lsx_vslti_h:
4073 case Intrinsic::loongarch_lsx_vslti_w:
4074 case Intrinsic::loongarch_lsx_vslti_d:
4075 case Intrinsic::loongarch_lasx_xvseqi_b:
4076 case Intrinsic::loongarch_lasx_xvseqi_h:
4077 case Intrinsic::loongarch_lasx_xvseqi_w:
4078 case Intrinsic::loongarch_lasx_xvseqi_d:
4079 case Intrinsic::loongarch_lasx_xvslei_b:
4080 case Intrinsic::loongarch_lasx_xvslei_h:
4081 case Intrinsic::loongarch_lasx_xvslei_w:
4082 case Intrinsic::loongarch_lasx_xvslei_d:
4083 case Intrinsic::loongarch_lasx_xvslti_b:
4084 case Intrinsic::loongarch_lasx_xvslti_h:
4085 case Intrinsic::loongarch_lasx_xvslti_w:
4086 case Intrinsic::loongarch_lasx_xvslti_d:
4087 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true);
4088 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4089 case Intrinsic::loongarch_lsx_vsrani_h_w:
4090 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4091 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4092 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4093 case Intrinsic::loongarch_lsx_vssrani_h_w:
4094 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4095 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4096 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4097 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4098 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4099 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4100 case Intrinsic::loongarch_lsx_vfrstpi_b:
4101 case Intrinsic::loongarch_lsx_vfrstpi_h:
4102 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4103 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4104 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4105 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4106 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4107 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4108 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4109 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4110 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4111 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4112 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4113 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4114 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4115 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4116 return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG);
4117 case Intrinsic::loongarch_lsx_vsat_d:
4118 case Intrinsic::loongarch_lsx_vsat_du:
4119 case Intrinsic::loongarch_lsx_vrotri_d:
4120 case Intrinsic::loongarch_lsx_vsrlri_d:
4121 case Intrinsic::loongarch_lsx_vsrari_d:
4122 case Intrinsic::loongarch_lasx_xvsat_d:
4123 case Intrinsic::loongarch_lasx_xvsat_du:
4124 case Intrinsic::loongarch_lasx_xvrotri_d:
4125 case Intrinsic::loongarch_lasx_xvsrlri_d:
4126 case Intrinsic::loongarch_lasx_xvsrari_d:
4127 return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG);
4128 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4129 case Intrinsic::loongarch_lsx_vsrani_w_d:
4130 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4131 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4132 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4133 case Intrinsic::loongarch_lsx_vssrani_w_d:
4134 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4135 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4136 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4137 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4138 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4139 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4140 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4141 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4142 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4143 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4144 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4145 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4146 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4147 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4148 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4149 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4150 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4151 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4152 return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG);
4153 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4154 case Intrinsic::loongarch_lsx_vsrani_d_q:
4155 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4156 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4157 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4158 case Intrinsic::loongarch_lsx_vssrani_d_q:
4159 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4160 case Intrinsic::loongarch_lsx_vssrani_du_q:
4161 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4162 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4163 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4164 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4165 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4166 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4167 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4168 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4169 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4170 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4171 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4172 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4173 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4174 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4175 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4176 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4177 return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG);
4178 case Intrinsic::loongarch_lsx_vnori_b:
4179 case Intrinsic::loongarch_lsx_vshuf4i_b:
4180 case Intrinsic::loongarch_lsx_vshuf4i_h:
4181 case Intrinsic::loongarch_lsx_vshuf4i_w:
4182 case Intrinsic::loongarch_lasx_xvnori_b:
4183 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4184 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4185 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4186 case Intrinsic::loongarch_lasx_xvpermi_d:
4187 return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG);
4188 case Intrinsic::loongarch_lsx_vshuf4i_d:
4189 case Intrinsic::loongarch_lsx_vpermi_w:
4190 case Intrinsic::loongarch_lsx_vbitseli_b:
4191 case Intrinsic::loongarch_lsx_vextrins_b:
4192 case Intrinsic::loongarch_lsx_vextrins_h:
4193 case Intrinsic::loongarch_lsx_vextrins_w:
4194 case Intrinsic::loongarch_lsx_vextrins_d:
4195 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4196 case Intrinsic::loongarch_lasx_xvpermi_w:
4197 case Intrinsic::loongarch_lasx_xvpermi_q:
4198 case Intrinsic::loongarch_lasx_xvbitseli_b:
4199 case Intrinsic::loongarch_lasx_xvextrins_b:
4200 case Intrinsic::loongarch_lasx_xvextrins_h:
4201 case Intrinsic::loongarch_lasx_xvextrins_w:
4202 case Intrinsic::loongarch_lasx_xvextrins_d:
4203 return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG);
4204 case Intrinsic::loongarch_lsx_vrepli_b:
4205 case Intrinsic::loongarch_lsx_vrepli_h:
4206 case Intrinsic::loongarch_lsx_vrepli_w:
4207 case Intrinsic::loongarch_lsx_vrepli_d:
4208 case Intrinsic::loongarch_lasx_xvrepli_b:
4209 case Intrinsic::loongarch_lasx_xvrepli_h:
4210 case Intrinsic::loongarch_lasx_xvrepli_w:
4211 case Intrinsic::loongarch_lasx_xvrepli_d:
4212 return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
4213 case Intrinsic::loongarch_lsx_vldi:
4214 case Intrinsic::loongarch_lasx_xvldi:
4215 return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
4216 }
4217}
4218
4219// Helper function that emits error message for intrinsics with chain and return
4220// merge values of a UNDEF and the chain.
4221static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
4222 StringRef ErrorMsg,
4223 SelectionDAG &DAG) {
4224 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4225 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)},
4226 dl: SDLoc(Op));
4227}
4228
4229SDValue
4230LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4231 SelectionDAG &DAG) const {
4232 SDLoc DL(Op);
4233 MVT GRLenVT = Subtarget.getGRLenVT();
4234 EVT VT = Op.getValueType();
4235 SDValue Chain = Op.getOperand(i: 0);
4236 const StringRef ErrorMsgOOR = "argument out of range";
4237 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4238 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4239
4240 switch (Op.getConstantOperandVal(i: 1)) {
4241 default:
4242 return Op;
4243 case Intrinsic::loongarch_crc_w_b_w:
4244 case Intrinsic::loongarch_crc_w_h_w:
4245 case Intrinsic::loongarch_crc_w_w_w:
4246 case Intrinsic::loongarch_crc_w_d_w:
4247 case Intrinsic::loongarch_crcc_w_b_w:
4248 case Intrinsic::loongarch_crcc_w_h_w:
4249 case Intrinsic::loongarch_crcc_w_w_w:
4250 case Intrinsic::loongarch_crcc_w_d_w:
4251 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4252 case Intrinsic::loongarch_csrrd_w:
4253 case Intrinsic::loongarch_csrrd_d: {
4254 unsigned Imm = Op.getConstantOperandVal(i: 2);
4255 return !isUInt<14>(x: Imm)
4256 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4257 : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
4258 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4259 }
4260 case Intrinsic::loongarch_csrwr_w:
4261 case Intrinsic::loongarch_csrwr_d: {
4262 unsigned Imm = Op.getConstantOperandVal(i: 3);
4263 return !isUInt<14>(x: Imm)
4264 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4265 : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
4266 Ops: {Chain, Op.getOperand(i: 2),
4267 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4268 }
4269 case Intrinsic::loongarch_csrxchg_w:
4270 case Intrinsic::loongarch_csrxchg_d: {
4271 unsigned Imm = Op.getConstantOperandVal(i: 4);
4272 return !isUInt<14>(x: Imm)
4273 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4274 : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
4275 Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3),
4276 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4277 }
4278 case Intrinsic::loongarch_iocsrrd_d: {
4279 return DAG.getNode(
4280 Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
4281 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))});
4282 }
4283#define IOCSRRD_CASE(NAME, NODE) \
4284 case Intrinsic::loongarch_##NAME: { \
4285 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4286 {Chain, Op.getOperand(2)}); \
4287 }
4288 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4289 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4290 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4291#undef IOCSRRD_CASE
4292 case Intrinsic::loongarch_cpucfg: {
4293 return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
4294 Ops: {Chain, Op.getOperand(i: 2)});
4295 }
4296 case Intrinsic::loongarch_lddir_d: {
4297 unsigned Imm = Op.getConstantOperandVal(i: 3);
4298 return !isUInt<8>(x: Imm)
4299 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4300 : Op;
4301 }
4302 case Intrinsic::loongarch_movfcsr2gr: {
4303 if (!Subtarget.hasBasicF())
4304 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4305 unsigned Imm = Op.getConstantOperandVal(i: 2);
4306 return !isUInt<2>(x: Imm)
4307 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4308 : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
4309 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4310 }
4311 case Intrinsic::loongarch_lsx_vld:
4312 case Intrinsic::loongarch_lsx_vldrepl_b:
4313 case Intrinsic::loongarch_lasx_xvld:
4314 case Intrinsic::loongarch_lasx_xvldrepl_b:
4315 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4316 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4317 : SDValue();
4318 case Intrinsic::loongarch_lsx_vldrepl_h:
4319 case Intrinsic::loongarch_lasx_xvldrepl_h:
4320 return !isShiftedInt<11, 1>(
4321 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4322 ? emitIntrinsicWithChainErrorMessage(
4323 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4324 : SDValue();
4325 case Intrinsic::loongarch_lsx_vldrepl_w:
4326 case Intrinsic::loongarch_lasx_xvldrepl_w:
4327 return !isShiftedInt<10, 2>(
4328 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4329 ? emitIntrinsicWithChainErrorMessage(
4330 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4331 : SDValue();
4332 case Intrinsic::loongarch_lsx_vldrepl_d:
4333 case Intrinsic::loongarch_lasx_xvldrepl_d:
4334 return !isShiftedInt<9, 3>(
4335 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4336 ? emitIntrinsicWithChainErrorMessage(
4337 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4338 : SDValue();
4339 }
4340}
4341
4342// Helper function that emits error message for intrinsics with void return
4343// value and return the chain.
4344static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
4345 SelectionDAG &DAG) {
4346
4347 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4348 return Op.getOperand(i: 0);
4349}
4350
4351SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4352 SelectionDAG &DAG) const {
4353 SDLoc DL(Op);
4354 MVT GRLenVT = Subtarget.getGRLenVT();
4355 SDValue Chain = Op.getOperand(i: 0);
4356 uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1);
4357 SDValue Op2 = Op.getOperand(i: 2);
4358 const StringRef ErrorMsgOOR = "argument out of range";
4359 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4360 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4361 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4362
4363 switch (IntrinsicEnum) {
4364 default:
4365 // TODO: Add more Intrinsics.
4366 return SDValue();
4367 case Intrinsic::loongarch_cacop_d:
4368 case Intrinsic::loongarch_cacop_w: {
4369 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4370 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4371 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4372 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
4373 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4374 unsigned Imm1 = Op2->getAsZExtVal();
4375 int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue();
4376 if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2))
4377 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
4378 return Op;
4379 }
4380 case Intrinsic::loongarch_dbar: {
4381 unsigned Imm = Op2->getAsZExtVal();
4382 return !isUInt<15>(x: Imm)
4383 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4384 : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
4385 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4386 }
4387 case Intrinsic::loongarch_ibar: {
4388 unsigned Imm = Op2->getAsZExtVal();
4389 return !isUInt<15>(x: Imm)
4390 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4391 : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
4392 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4393 }
4394 case Intrinsic::loongarch_break: {
4395 unsigned Imm = Op2->getAsZExtVal();
4396 return !isUInt<15>(x: Imm)
4397 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4398 : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
4399 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4400 }
4401 case Intrinsic::loongarch_movgr2fcsr: {
4402 if (!Subtarget.hasBasicF())
4403 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4404 unsigned Imm = Op2->getAsZExtVal();
4405 return !isUInt<2>(x: Imm)
4406 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4407 : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
4408 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
4409 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
4410 Operand: Op.getOperand(i: 3)));
4411 }
4412 case Intrinsic::loongarch_syscall: {
4413 unsigned Imm = Op2->getAsZExtVal();
4414 return !isUInt<15>(x: Imm)
4415 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4416 : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
4417 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4418 }
4419#define IOCSRWR_CASE(NAME, NODE) \
4420 case Intrinsic::loongarch_##NAME: { \
4421 SDValue Op3 = Op.getOperand(3); \
4422 return Subtarget.is64Bit() \
4423 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4424 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4426 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4427 Op3); \
4428 }
4429 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4430 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4431 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4432#undef IOCSRWR_CASE
4433 case Intrinsic::loongarch_iocsrwr_d: {
4434 return !Subtarget.is64Bit()
4435 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
4436 : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
4437 N2: Op2,
4438 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
4439 Operand: Op.getOperand(i: 3)));
4440 }
4441#define ASRT_LE_GT_CASE(NAME) \
4442 case Intrinsic::loongarch_##NAME: { \
4443 return !Subtarget.is64Bit() \
4444 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4445 : Op; \
4446 }
4447 ASRT_LE_GT_CASE(asrtle_d)
4448 ASRT_LE_GT_CASE(asrtgt_d)
4449#undef ASRT_LE_GT_CASE
4450 case Intrinsic::loongarch_ldpte_d: {
4451 unsigned Imm = Op.getConstantOperandVal(i: 3);
4452 return !Subtarget.is64Bit()
4453 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
4454 : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4455 : Op;
4456 }
4457 case Intrinsic::loongarch_lsx_vst:
4458 case Intrinsic::loongarch_lasx_xvst:
4459 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue())
4460 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4461 : SDValue();
4462 case Intrinsic::loongarch_lasx_xvstelm_b:
4463 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4464 !isUInt<5>(x: Op.getConstantOperandVal(i: 5)))
4465 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4466 : SDValue();
4467 case Intrinsic::loongarch_lsx_vstelm_b:
4468 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4469 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
4470 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4471 : SDValue();
4472 case Intrinsic::loongarch_lasx_xvstelm_h:
4473 return (!isShiftedInt<8, 1>(
4474 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4475 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
4476 ? emitIntrinsicErrorMessage(
4477 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4478 : SDValue();
4479 case Intrinsic::loongarch_lsx_vstelm_h:
4480 return (!isShiftedInt<8, 1>(
4481 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4482 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
4483 ? emitIntrinsicErrorMessage(
4484 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4485 : SDValue();
4486 case Intrinsic::loongarch_lasx_xvstelm_w:
4487 return (!isShiftedInt<8, 2>(
4488 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4489 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
4490 ? emitIntrinsicErrorMessage(
4491 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4492 : SDValue();
4493 case Intrinsic::loongarch_lsx_vstelm_w:
4494 return (!isShiftedInt<8, 2>(
4495 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4496 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
4497 ? emitIntrinsicErrorMessage(
4498 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4499 : SDValue();
4500 case Intrinsic::loongarch_lasx_xvstelm_d:
4501 return (!isShiftedInt<8, 3>(
4502 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4503 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
4504 ? emitIntrinsicErrorMessage(
4505 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4506 : SDValue();
4507 case Intrinsic::loongarch_lsx_vstelm_d:
4508 return (!isShiftedInt<8, 3>(
4509 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4510 !isUInt<1>(x: Op.getConstantOperandVal(i: 5)))
4511 ? emitIntrinsicErrorMessage(
4512 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4513 : SDValue();
4514 }
4515}
4516
4517SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4518 SelectionDAG &DAG) const {
4519 SDLoc DL(Op);
4520 SDValue Lo = Op.getOperand(i: 0);
4521 SDValue Hi = Op.getOperand(i: 1);
4522 SDValue Shamt = Op.getOperand(i: 2);
4523 EVT VT = Lo.getValueType();
4524
4525 // if Shamt-GRLen < 0: // Shamt < GRLen
4526 // Lo = Lo << Shamt
4527 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4528 // else:
4529 // Lo = 0
4530 // Hi = Lo << (Shamt-GRLen)
4531
4532 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
4533 SDValue One = DAG.getConstant(Val: 1, DL, VT);
4534 SDValue MinusGRLen =
4535 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
4536 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
4537 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
4538 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
4539
4540 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
4541 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
4542 SDValue ShiftRightLo =
4543 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
4544 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
4545 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
4546 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
4547
4548 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
4549
4550 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
4551 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
4552
4553 SDValue Parts[2] = {Lo, Hi};
4554 return DAG.getMergeValues(Ops: Parts, dl: DL);
4555}
4556
4557SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4558 SelectionDAG &DAG,
4559 bool IsSRA) const {
4560 SDLoc DL(Op);
4561 SDValue Lo = Op.getOperand(i: 0);
4562 SDValue Hi = Op.getOperand(i: 1);
4563 SDValue Shamt = Op.getOperand(i: 2);
4564 EVT VT = Lo.getValueType();
4565
4566 // SRA expansion:
4567 // if Shamt-GRLen < 0: // Shamt < GRLen
4568 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4569 // Hi = Hi >>s Shamt
4570 // else:
4571 // Lo = Hi >>s (Shamt-GRLen);
4572 // Hi = Hi >>s (GRLen-1)
4573 //
4574 // SRL expansion:
4575 // if Shamt-GRLen < 0: // Shamt < GRLen
4576 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4577 // Hi = Hi >>u Shamt
4578 // else:
4579 // Lo = Hi >>u (Shamt-GRLen);
4580 // Hi = 0;
4581
4582 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4583
4584 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
4585 SDValue One = DAG.getConstant(Val: 1, DL, VT);
4586 SDValue MinusGRLen =
4587 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
4588 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
4589 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
4590 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
4591
4592 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
4593 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
4594 SDValue ShiftLeftHi =
4595 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
4596 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
4597 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
4598 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
4599 SDValue HiFalse =
4600 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
4601
4602 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
4603
4604 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
4605 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
4606
4607 SDValue Parts[2] = {Lo, Hi};
4608 return DAG.getMergeValues(Ops: Parts, dl: DL);
4609}
4610
4611// Returns the opcode of the target-specific SDNode that implements the 32-bit
4612// form of the given Opcode.
4613static unsigned getLoongArchWOpcode(unsigned Opcode) {
4614 switch (Opcode) {
4615 default:
4616 llvm_unreachable("Unexpected opcode");
4617 case ISD::SDIV:
4618 return LoongArchISD::DIV_W;
4619 case ISD::UDIV:
4620 return LoongArchISD::DIV_WU;
4621 case ISD::SREM:
4622 return LoongArchISD::MOD_W;
4623 case ISD::UREM:
4624 return LoongArchISD::MOD_WU;
4625 case ISD::SHL:
4626 return LoongArchISD::SLL_W;
4627 case ISD::SRA:
4628 return LoongArchISD::SRA_W;
4629 case ISD::SRL:
4630 return LoongArchISD::SRL_W;
4631 case ISD::ROTL:
4632 case ISD::ROTR:
4633 return LoongArchISD::ROTR_W;
4634 case ISD::CTTZ:
4635 return LoongArchISD::CTZ_W;
4636 case ISD::CTLZ:
4637 return LoongArchISD::CLZ_W;
4638 }
4639}
4640
4641// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4642// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4643// otherwise be promoted to i64, making it difficult to select the
4644// SLL_W/.../*W later one because the fact the operation was originally of
4645// type i8/i16/i32 is lost.
4646static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
4647 unsigned ExtOpc = ISD::ANY_EXTEND) {
4648 SDLoc DL(N);
4649 unsigned WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
4650 SDValue NewOp0, NewRes;
4651
4652 switch (NumOp) {
4653 default:
4654 llvm_unreachable("Unexpected NumOp");
4655 case 1: {
4656 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
4657 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
4658 break;
4659 }
4660 case 2: {
4661 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
4662 SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
4663 if (N->getOpcode() == ISD::ROTL) {
4664 SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64);
4665 NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
4666 }
4667 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
4668 break;
4669 }
4670 // TODO:Handle more NumOp.
4671 }
4672
4673 // ReplaceNodeResults requires we maintain the same type for the return
4674 // value.
4675 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
4676}
4677
4678// Converts the given 32-bit operation to a i64 operation with signed extension
4679// semantic to reduce the signed extension instructions.
4680static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
4681 SDLoc DL(N);
4682 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
4683 SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
4684 SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
4685 SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
4686 N2: DAG.getValueType(MVT::i32));
4687 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
4688}
4689
4690// Helper function that emits error message for intrinsics with/without chain
4691// and return a UNDEF or and the chain as the results.
4692static void emitErrorAndReplaceIntrinsicResults(
4693 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
4694 StringRef ErrorMsg, bool WithChain = true) {
4695 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4696 Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)));
4697 if (!WithChain)
4698 return;
4699 Results.push_back(Elt: N->getOperand(Num: 0));
4700}
4701
4702template <unsigned N>
4703static void
4704replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
4705 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4706 unsigned ResOp) {
4707 const StringRef ErrorMsgOOR = "argument out of range";
4708 unsigned Imm = Node->getConstantOperandVal(Num: 2);
4709 if (!isUInt<N>(Imm)) {
4710 emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
4711 /*WithChain=*/false);
4712 return;
4713 }
4714 SDLoc DL(Node);
4715 SDValue Vec = Node->getOperand(Num: 1);
4716
4717 SDValue PickElt =
4718 DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
4719 N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
4720 N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
4721 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0),
4722 Operand: PickElt.getValue(R: 0)));
4723}
4724
4725static void replaceVecCondBranchResults(SDNode *N,
4726 SmallVectorImpl<SDValue> &Results,
4727 SelectionDAG &DAG,
4728 const LoongArchSubtarget &Subtarget,
4729 unsigned ResOp) {
4730 SDLoc DL(N);
4731 SDValue Vec = N->getOperand(Num: 1);
4732
4733 SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
4734 Results.push_back(
4735 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0)));
4736}
4737
4738static void
4739replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
4740 SelectionDAG &DAG,
4741 const LoongArchSubtarget &Subtarget) {
4742 switch (N->getConstantOperandVal(Num: 0)) {
4743 default:
4744 llvm_unreachable("Unexpected Intrinsic.");
4745 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4746 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
4747 ResOp: LoongArchISD::VPICK_SEXT_ELT);
4748 break;
4749 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4750 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4751 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
4752 ResOp: LoongArchISD::VPICK_SEXT_ELT);
4753 break;
4754 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4755 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
4756 ResOp: LoongArchISD::VPICK_SEXT_ELT);
4757 break;
4758 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4759 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
4760 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4761 break;
4762 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4763 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4764 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
4765 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4766 break;
4767 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4768 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
4769 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4770 break;
4771 case Intrinsic::loongarch_lsx_bz_b:
4772 case Intrinsic::loongarch_lsx_bz_h:
4773 case Intrinsic::loongarch_lsx_bz_w:
4774 case Intrinsic::loongarch_lsx_bz_d:
4775 case Intrinsic::loongarch_lasx_xbz_b:
4776 case Intrinsic::loongarch_lasx_xbz_h:
4777 case Intrinsic::loongarch_lasx_xbz_w:
4778 case Intrinsic::loongarch_lasx_xbz_d:
4779 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4780 ResOp: LoongArchISD::VALL_ZERO);
4781 break;
4782 case Intrinsic::loongarch_lsx_bz_v:
4783 case Intrinsic::loongarch_lasx_xbz_v:
4784 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4785 ResOp: LoongArchISD::VANY_ZERO);
4786 break;
4787 case Intrinsic::loongarch_lsx_bnz_b:
4788 case Intrinsic::loongarch_lsx_bnz_h:
4789 case Intrinsic::loongarch_lsx_bnz_w:
4790 case Intrinsic::loongarch_lsx_bnz_d:
4791 case Intrinsic::loongarch_lasx_xbnz_b:
4792 case Intrinsic::loongarch_lasx_xbnz_h:
4793 case Intrinsic::loongarch_lasx_xbnz_w:
4794 case Intrinsic::loongarch_lasx_xbnz_d:
4795 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4796 ResOp: LoongArchISD::VALL_NONZERO);
4797 break;
4798 case Intrinsic::loongarch_lsx_bnz_v:
4799 case Intrinsic::loongarch_lasx_xbnz_v:
4800 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4801 ResOp: LoongArchISD::VANY_NONZERO);
4802 break;
4803 }
4804}
4805
4806static void replaceCMP_XCHG_128Results(SDNode *N,
4807 SmallVectorImpl<SDValue> &Results,
4808 SelectionDAG &DAG) {
4809 assert(N->getValueType(0) == MVT::i128 &&
4810 "AtomicCmpSwap on types less than 128 should be legal");
4811 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4812
4813 unsigned Opcode;
4814 switch (MemOp->getMergedOrdering()) {
4815 case AtomicOrdering::Acquire:
4816 case AtomicOrdering::AcquireRelease:
4817 case AtomicOrdering::SequentiallyConsistent:
4818 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4819 break;
4820 case AtomicOrdering::Monotonic:
4821 case AtomicOrdering::Release:
4822 Opcode = LoongArch::PseudoCmpXchg128;
4823 break;
4824 default:
4825 llvm_unreachable("Unexpected ordering!");
4826 }
4827
4828 SDLoc DL(N);
4829 auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: 2), DL, LoVT: MVT::i64, HiVT: MVT::i64);
4830 auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: 3), DL, LoVT: MVT::i64, HiVT: MVT::i64);
4831 SDValue Ops[] = {N->getOperand(Num: 1), CmpVal.first, CmpVal.second,
4832 NewVal.first, NewVal.second, N->getOperand(Num: 0)};
4833
4834 SDNode *CmpSwap = DAG.getMachineNode(
4835 Opcode, dl: SDLoc(N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other),
4836 Ops);
4837 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4838 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128,
4839 N1: SDValue(CmpSwap, 0), N2: SDValue(CmpSwap, 1)));
4840 Results.push_back(Elt: SDValue(CmpSwap, 3));
4841}
4842
4843void LoongArchTargetLowering::ReplaceNodeResults(
4844 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4845 SDLoc DL(N);
4846 EVT VT = N->getValueType(ResNo: 0);
4847 switch (N->getOpcode()) {
4848 default:
4849 llvm_unreachable("Don't know how to legalize this operation");
4850 case ISD::ADD:
4851 case ISD::SUB:
4852 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4853 "Unexpected custom legalisation");
4854 Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
4855 break;
4856 case ISD::SDIV:
4857 case ISD::UDIV:
4858 case ISD::SREM:
4859 case ISD::UREM:
4860 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4861 "Unexpected custom legalisation");
4862 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2,
4863 ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32
4864 ? ISD::ANY_EXTEND
4865 : ISD::SIGN_EXTEND));
4866 break;
4867 case ISD::SHL:
4868 case ISD::SRA:
4869 case ISD::SRL:
4870 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4871 "Unexpected custom legalisation");
4872 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
4873 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
4874 break;
4875 }
4876 break;
4877 case ISD::ROTL:
4878 case ISD::ROTR:
4879 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4880 "Unexpected custom legalisation");
4881 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
4882 break;
4883 case ISD::FP_TO_SINT: {
4884 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4885 "Unexpected custom legalisation");
4886 SDValue Src = N->getOperand(Num: 0);
4887 EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0));
4888 if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
4889 TargetLowering::TypeSoftenFloat) {
4890 if (!isTypeLegal(VT: Src.getValueType()))
4891 return;
4892 if (Src.getValueType() == MVT::f16)
4893 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src);
4894 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
4895 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
4896 return;
4897 }
4898 // If the FP type needs to be softened, emit a library call using the 'si'
4899 // version. If we left it to default legalization we'd end up with 'di'.
4900 RTLIB::Libcall LC;
4901 LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
4902 MakeLibCallOptions CallOptions;
4903 EVT OpVT = Src.getValueType();
4904 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT);
4905 SDValue Chain = SDValue();
4906 SDValue Result;
4907 std::tie(args&: Result, args&: Chain) =
4908 makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
4909 Results.push_back(Elt: Result);
4910 break;
4911 }
4912 case ISD::BITCAST: {
4913 SDValue Src = N->getOperand(Num: 0);
4914 EVT SrcVT = Src.getValueType();
4915 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4916 Subtarget.hasBasicF()) {
4917 SDValue Dst =
4918 DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
4919 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
4920 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4921 SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
4922 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src);
4923 SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
4924 N1: NewReg.getValue(R: 0), N2: NewReg.getValue(R: 1));
4925 Results.push_back(Elt: RetReg);
4926 }
4927 break;
4928 }
4929 case ISD::FP_TO_UINT: {
4930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4931 "Unexpected custom legalisation");
4932 auto &TLI = DAG.getTargetLoweringInfo();
4933 SDValue Tmp1, Tmp2;
4934 TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
4935 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
4936 break;
4937 }
4938 case ISD::BSWAP: {
4939 SDValue Src = N->getOperand(Num: 0);
4940 assert((VT == MVT::i16 || VT == MVT::i32) &&
4941 "Unexpected custom legalization");
4942 MVT GRLenVT = Subtarget.getGRLenVT();
4943 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4944 SDValue Tmp;
4945 switch (VT.getSizeInBits()) {
4946 default:
4947 llvm_unreachable("Unexpected operand width");
4948 case 16:
4949 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
4950 break;
4951 case 32:
4952 // Only LA64 will get to here due to the size mismatch between VT and
4953 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4954 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
4955 break;
4956 }
4957 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4958 break;
4959 }
4960 case ISD::BITREVERSE: {
4961 SDValue Src = N->getOperand(Num: 0);
4962 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4963 "Unexpected custom legalization");
4964 MVT GRLenVT = Subtarget.getGRLenVT();
4965 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4966 SDValue Tmp;
4967 switch (VT.getSizeInBits()) {
4968 default:
4969 llvm_unreachable("Unexpected operand width");
4970 case 8:
4971 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
4972 break;
4973 case 32:
4974 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
4975 break;
4976 }
4977 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4978 break;
4979 }
4980 case ISD::CTLZ:
4981 case ISD::CTTZ: {
4982 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4983 "Unexpected custom legalisation");
4984 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1));
4985 break;
4986 }
4987 case ISD::INTRINSIC_W_CHAIN: {
4988 SDValue Chain = N->getOperand(Num: 0);
4989 SDValue Op2 = N->getOperand(Num: 2);
4990 MVT GRLenVT = Subtarget.getGRLenVT();
4991 const StringRef ErrorMsgOOR = "argument out of range";
4992 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4993 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4994
4995 switch (N->getConstantOperandVal(Num: 1)) {
4996 default:
4997 llvm_unreachable("Unexpected Intrinsic.");
4998 case Intrinsic::loongarch_movfcsr2gr: {
4999 if (!Subtarget.hasBasicF()) {
5000 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
5001 return;
5002 }
5003 unsigned Imm = Op2->getAsZExtVal();
5004 if (!isUInt<2>(x: Imm)) {
5005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5006 return;
5007 }
5008 SDValue MOVFCSR2GRResults = DAG.getNode(
5009 Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other},
5010 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5011 Results.push_back(
5012 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0)));
5013 Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1));
5014 break;
5015 }
5016#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5017 case Intrinsic::loongarch_##NAME: { \
5018 SDValue NODE = DAG.getNode( \
5019 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5020 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5021 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5022 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5023 Results.push_back(NODE.getValue(1)); \
5024 break; \
5025 }
5026 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5027 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5028 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5029 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5030 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5031 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5032#undef CRC_CASE_EXT_BINARYOP
5033
5034#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5035 case Intrinsic::loongarch_##NAME: { \
5036 SDValue NODE = DAG.getNode( \
5037 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5038 {Chain, Op2, \
5039 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5040 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5041 Results.push_back(NODE.getValue(1)); \
5042 break; \
5043 }
5044 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5045 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5046#undef CRC_CASE_EXT_UNARYOP
5047#define CSR_CASE(ID) \
5048 case Intrinsic::loongarch_##ID: { \
5049 if (!Subtarget.is64Bit()) \
5050 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5051 break; \
5052 }
5053 CSR_CASE(csrrd_d);
5054 CSR_CASE(csrwr_d);
5055 CSR_CASE(csrxchg_d);
5056 CSR_CASE(iocsrrd_d);
5057#undef CSR_CASE
5058 case Intrinsic::loongarch_csrrd_w: {
5059 unsigned Imm = Op2->getAsZExtVal();
5060 if (!isUInt<14>(x: Imm)) {
5061 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5062 return;
5063 }
5064 SDValue CSRRDResults =
5065 DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
5066 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5067 Results.push_back(
5068 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0)));
5069 Results.push_back(Elt: CSRRDResults.getValue(R: 1));
5070 break;
5071 }
5072 case Intrinsic::loongarch_csrwr_w: {
5073 unsigned Imm = N->getConstantOperandVal(Num: 3);
5074 if (!isUInt<14>(x: Imm)) {
5075 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5076 return;
5077 }
5078 SDValue CSRWRResults =
5079 DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
5080 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5081 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5082 Results.push_back(
5083 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0)));
5084 Results.push_back(Elt: CSRWRResults.getValue(R: 1));
5085 break;
5086 }
5087 case Intrinsic::loongarch_csrxchg_w: {
5088 unsigned Imm = N->getConstantOperandVal(Num: 4);
5089 if (!isUInt<14>(x: Imm)) {
5090 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5091 return;
5092 }
5093 SDValue CSRXCHGResults = DAG.getNode(
5094 Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
5095 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5096 DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)),
5097 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5098 Results.push_back(
5099 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0)));
5100 Results.push_back(Elt: CSRXCHGResults.getValue(R: 1));
5101 break;
5102 }
5103#define IOCSRRD_CASE(NAME, NODE) \
5104 case Intrinsic::loongarch_##NAME: { \
5105 SDValue IOCSRRDResults = \
5106 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5107 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5108 Results.push_back( \
5109 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5110 Results.push_back(IOCSRRDResults.getValue(1)); \
5111 break; \
5112 }
5113 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5114 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5115 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5116#undef IOCSRRD_CASE
5117 case Intrinsic::loongarch_cpucfg: {
5118 SDValue CPUCFGResults =
5119 DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
5120 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
5121 Results.push_back(
5122 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0)));
5123 Results.push_back(Elt: CPUCFGResults.getValue(R: 1));
5124 break;
5125 }
5126 case Intrinsic::loongarch_lddir_d: {
5127 if (!Subtarget.is64Bit()) {
5128 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
5129 return;
5130 }
5131 break;
5132 }
5133 }
5134 break;
5135 }
5136 case ISD::READ_REGISTER: {
5137 if (Subtarget.is64Bit())
5138 DAG.getContext()->emitError(
5139 ErrorStr: "On LA64, only 64-bit registers can be read.");
5140 else
5141 DAG.getContext()->emitError(
5142 ErrorStr: "On LA32, only 32-bit registers can be read.");
5143 Results.push_back(Elt: DAG.getUNDEF(VT));
5144 Results.push_back(Elt: N->getOperand(Num: 0));
5145 break;
5146 }
5147 case ISD::INTRINSIC_WO_CHAIN: {
5148 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5149 break;
5150 }
5151 case ISD::LROUND: {
5152 SDValue Op0 = N->getOperand(Num: 0);
5153 EVT OpVT = Op0.getValueType();
5154 RTLIB::Libcall LC =
5155 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5156 MakeLibCallOptions CallOptions;
5157 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64);
5158 SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
5159 Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
5160 Results.push_back(Elt: Result);
5161 break;
5162 }
5163 case ISD::ATOMIC_CMP_SWAP: {
5164 replaceCMP_XCHG_128Results(N, Results, DAG);
5165 break;
5166 }
5167 case ISD::TRUNCATE: {
5168 MVT VT = N->getSimpleValueType(ResNo: 0);
5169 if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector)
5170 return;
5171
5172 MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT();
5173 SDValue In = N->getOperand(Num: 0);
5174 EVT InVT = In.getValueType();
5175 EVT InEltVT = InVT.getVectorElementType();
5176 EVT EltVT = VT.getVectorElementType();
5177 unsigned MinElts = VT.getVectorNumElements();
5178 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5179 unsigned InBits = InVT.getSizeInBits();
5180
5181 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5182 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5183 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5184 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5185 for (unsigned I = 0; I < MinElts; ++I)
5186 TruncMask[I] = Scale * I;
5187
5188 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5189 MVT SVT = In.getSimpleValueType().getScalarType();
5190 MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts);
5191 SDValue WidenIn =
5192 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In,
5193 N3: DAG.getVectorIdxConstant(Val: 0, DL));
5194 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5195 "Illegal vector type in truncation");
5196 WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn);
5197 Results.push_back(
5198 Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask));
5199 return;
5200 }
5201 }
5202
5203 break;
5204 }
5205 }
5206}
5207
5208/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5209static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
5210 SelectionDAG &DAG) {
5211 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5212
5213 MVT VT = N->getSimpleValueType(ResNo: 0);
5214 if (!VT.is128BitVector() && !VT.is256BitVector())
5215 return SDValue();
5216
5217 SDValue X, Y;
5218 SDValue N0 = N->getOperand(Num: 0);
5219 SDValue N1 = N->getOperand(Num: 1);
5220
5221 if (SDValue Not = isNOT(V: N0, DAG)) {
5222 X = Not;
5223 Y = N1;
5224 } else if (SDValue Not = isNOT(V: N1, DAG)) {
5225 X = Not;
5226 Y = N0;
5227 } else
5228 return SDValue();
5229
5230 X = DAG.getBitcast(VT, V: X);
5231 Y = DAG.getBitcast(VT, V: Y);
5232 return DAG.getNode(Opcode: LoongArchISD::VANDN, DL, VT, N1: X, N2: Y);
5233}
5234
5235static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
5236 TargetLowering::DAGCombinerInfo &DCI,
5237 const LoongArchSubtarget &Subtarget) {
5238 if (DCI.isBeforeLegalizeOps())
5239 return SDValue();
5240
5241 SDValue FirstOperand = N->getOperand(Num: 0);
5242 SDValue SecondOperand = N->getOperand(Num: 1);
5243 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5244 EVT ValTy = N->getValueType(ResNo: 0);
5245 SDLoc DL(N);
5246 uint64_t lsb, msb;
5247 unsigned SMIdx, SMLen;
5248 ConstantSDNode *CN;
5249 SDValue NewOperand;
5250 MVT GRLenVT = Subtarget.getGRLenVT();
5251
5252 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5253 return R;
5254
5255 // BSTRPICK requires the 32S feature.
5256 if (!Subtarget.has32S())
5257 return SDValue();
5258
5259 // Op's second operand must be a shifted mask.
5260 if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) ||
5261 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
5262 return SDValue();
5263
5264 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5265 // Pattern match BSTRPICK.
5266 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5267 // => BSTRPICK $dst, $src, msb, lsb
5268 // where msb = lsb + len - 1
5269
5270 // The second operand of the shift must be an immediate.
5271 if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))))
5272 return SDValue();
5273
5274 lsb = CN->getZExtValue();
5275
5276 // Return if the shifted mask does not start at bit 0 or the sum of its
5277 // length and lsb exceeds the word's size.
5278 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5279 return SDValue();
5280
5281 NewOperand = FirstOperand.getOperand(i: 0);
5282 } else {
5283 // Pattern match BSTRPICK.
5284 // $dst = and $src, (2**len- 1) , if len > 12
5285 // => BSTRPICK $dst, $src, msb, lsb
5286 // where lsb = 0 and msb = len - 1
5287
5288 // If the mask is <= 0xfff, andi can be used instead.
5289 if (CN->getZExtValue() <= 0xfff)
5290 return SDValue();
5291
5292 // Return if the MSB exceeds.
5293 if (SMIdx + SMLen > ValTy.getSizeInBits())
5294 return SDValue();
5295
5296 if (SMIdx > 0) {
5297 // Omit if the constant has more than 2 uses. This a conservative
5298 // decision. Whether it is a win depends on the HW microarchitecture.
5299 // However it should always be better for 1 and 2 uses.
5300 if (CN->use_size() > 2)
5301 return SDValue();
5302 // Return if the constant can be composed by a single LU12I.W.
5303 if ((CN->getZExtValue() & 0xfff) == 0)
5304 return SDValue();
5305 // Return if the constand can be composed by a single ADDI with
5306 // the zero register.
5307 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5308 return SDValue();
5309 }
5310
5311 lsb = SMIdx;
5312 NewOperand = FirstOperand;
5313 }
5314
5315 msb = lsb + SMLen - 1;
5316 SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
5317 N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
5318 N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
5319 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5320 return NR0;
5321 // Try to optimize to
5322 // bstrpick $Rd, $Rs, msb, lsb
5323 // slli $Rd, $Rd, lsb
5324 return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
5325 N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
5326}
5327
5328static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
5329 TargetLowering::DAGCombinerInfo &DCI,
5330 const LoongArchSubtarget &Subtarget) {
5331 // BSTRPICK requires the 32S feature.
5332 if (!Subtarget.has32S())
5333 return SDValue();
5334
5335 if (DCI.isBeforeLegalizeOps())
5336 return SDValue();
5337
5338 // $dst = srl (and $src, Mask), Shamt
5339 // =>
5340 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5341 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5342 //
5343
5344 SDValue FirstOperand = N->getOperand(Num: 0);
5345 ConstantSDNode *CN;
5346 EVT ValTy = N->getValueType(ResNo: 0);
5347 SDLoc DL(N);
5348 MVT GRLenVT = Subtarget.getGRLenVT();
5349 unsigned MaskIdx, MaskLen;
5350 uint64_t Shamt;
5351
5352 // The first operand must be an AND and the second operand of the AND must be
5353 // a shifted mask.
5354 if (FirstOperand.getOpcode() != ISD::AND ||
5355 !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) ||
5356 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
5357 return SDValue();
5358
5359 // The second operand (shift amount) must be an immediate.
5360 if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))))
5361 return SDValue();
5362
5363 Shamt = CN->getZExtValue();
5364 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5365 return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
5366 N1: FirstOperand->getOperand(Num: 0),
5367 N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
5368 N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5369
5370 return SDValue();
5371}
5372
5373// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5374// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5375static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5376 unsigned Depth) {
5377 // Limit recursion.
5378 if (Depth >= SelectionDAG::MaxRecursionDepth)
5379 return false;
5380 switch (Src.getOpcode()) {
5381 case ISD::SETCC:
5382 case ISD::TRUNCATE:
5383 return Src.getOperand(i: 0).getValueSizeInBits() == Size;
5384 case ISD::FREEZE:
5385 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1);
5386 case ISD::AND:
5387 case ISD::XOR:
5388 case ISD::OR:
5389 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1) &&
5390 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1);
5391 case ISD::SELECT:
5392 case ISD::VSELECT:
5393 return Src.getOperand(i: 0).getScalarValueSizeInBits() == 1 &&
5394 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1) &&
5395 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 2), Size, Depth: Depth + 1);
5396 case ISD::BUILD_VECTOR:
5397 return ISD::isBuildVectorAllZeros(N: Src.getNode()) ||
5398 ISD::isBuildVectorAllOnes(N: Src.getNode());
5399 }
5400 return false;
5401}
5402
5403// Helper to push sign extension of vXi1 SETCC result through bitops.
5404static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
5405 SDValue Src, const SDLoc &DL) {
5406 switch (Src.getOpcode()) {
5407 case ISD::SETCC:
5408 case ISD::FREEZE:
5409 case ISD::TRUNCATE:
5410 case ISD::BUILD_VECTOR:
5411 return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
5412 case ISD::AND:
5413 case ISD::XOR:
5414 case ISD::OR:
5415 return DAG.getNode(
5416 Opcode: Src.getOpcode(), DL, VT: SExtVT,
5417 N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 0), DL),
5418 N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL));
5419 case ISD::SELECT:
5420 case ISD::VSELECT:
5421 return DAG.getSelect(
5422 DL, VT: SExtVT, Cond: Src.getOperand(i: 0),
5423 LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL),
5424 RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 2), DL));
5425 }
5426 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5427}
5428
5429static SDValue
5430performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
5431 TargetLowering::DAGCombinerInfo &DCI,
5432 const LoongArchSubtarget &Subtarget) {
5433 SDLoc DL(N);
5434 EVT VT = N->getValueType(ResNo: 0);
5435 SDValue Src = N->getOperand(Num: 0);
5436 EVT SrcVT = Src.getValueType();
5437
5438 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5439 return SDValue();
5440
5441 bool UseLASX;
5442 unsigned Opc = ISD::DELETED_NODE;
5443 EVT CmpVT = Src.getOperand(i: 0).getValueType();
5444 EVT EltVT = CmpVT.getVectorElementType();
5445
5446 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5447 UseLASX = false;
5448 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5449 CmpVT.getSizeInBits() == 256)
5450 UseLASX = true;
5451 else
5452 return SDValue();
5453
5454 SDValue SrcN1 = Src.getOperand(i: 1);
5455 switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get()) {
5456 default:
5457 break;
5458 case ISD::SETEQ:
5459 // x == 0 => not (vmsknez.b x)
5460 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5461 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5462 break;
5463 case ISD::SETGT:
5464 // x > -1 => vmskgez.b x
5465 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8)
5466 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5467 break;
5468 case ISD::SETGE:
5469 // x >= 0 => vmskgez.b x
5470 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5471 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5472 break;
5473 case ISD::SETLT:
5474 // x < 0 => vmskltz.{b,h,w,d} x
5475 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) &&
5476 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5477 EltVT == MVT::i64))
5478 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5479 break;
5480 case ISD::SETLE:
5481 // x <= -1 => vmskltz.{b,h,w,d} x
5482 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) &&
5483 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5484 EltVT == MVT::i64))
5485 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486 break;
5487 case ISD::SETNE:
5488 // x != 0 => vmsknez.b x
5489 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5490 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5491 break;
5492 }
5493
5494 if (Opc == ISD::DELETED_NODE)
5495 return SDValue();
5496
5497 SDValue V = DAG.getNode(Opcode: Opc, DL, VT: Subtarget.getGRLenVT(), Operand: Src.getOperand(i: 0));
5498 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
5499 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
5500 return DAG.getBitcast(VT, V);
5501}
5502
5503static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
5504 TargetLowering::DAGCombinerInfo &DCI,
5505 const LoongArchSubtarget &Subtarget) {
5506 SDLoc DL(N);
5507 EVT VT = N->getValueType(ResNo: 0);
5508 SDValue Src = N->getOperand(Num: 0);
5509 EVT SrcVT = Src.getValueType();
5510 MVT GRLenVT = Subtarget.getGRLenVT();
5511
5512 if (!DCI.isBeforeLegalizeOps())
5513 return SDValue();
5514
5515 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5516 return SDValue();
5517
5518 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5519 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5520 if (Res)
5521 return Res;
5522
5523 // Generate vXi1 using [X]VMSKLTZ
5524 MVT SExtVT;
5525 unsigned Opc;
5526 bool UseLASX = false;
5527 bool PropagateSExt = false;
5528
5529 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5530 EVT CmpVT = Src.getOperand(i: 0).getValueType();
5531 if (CmpVT.getSizeInBits() > 256)
5532 return SDValue();
5533 }
5534
5535 switch (SrcVT.getSimpleVT().SimpleTy) {
5536 default:
5537 return SDValue();
5538 case MVT::v2i1:
5539 SExtVT = MVT::v2i64;
5540 break;
5541 case MVT::v4i1:
5542 SExtVT = MVT::v4i32;
5543 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
5544 SExtVT = MVT::v4i64;
5545 UseLASX = true;
5546 PropagateSExt = true;
5547 }
5548 break;
5549 case MVT::v8i1:
5550 SExtVT = MVT::v8i16;
5551 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
5552 SExtVT = MVT::v8i32;
5553 UseLASX = true;
5554 PropagateSExt = true;
5555 }
5556 break;
5557 case MVT::v16i1:
5558 SExtVT = MVT::v16i8;
5559 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
5560 SExtVT = MVT::v16i16;
5561 UseLASX = true;
5562 PropagateSExt = true;
5563 }
5564 break;
5565 case MVT::v32i1:
5566 SExtVT = MVT::v32i8;
5567 UseLASX = true;
5568 break;
5569 };
5570 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5571 : DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
5572
5573 SDValue V;
5574 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5575 if (Src.getSimpleValueType() == MVT::v32i8) {
5576 SDValue Lo, Hi;
5577 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Src, DL);
5578 Lo = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Lo);
5579 Hi = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Hi);
5580 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: GRLenVT, N1: Hi,
5581 N2: DAG.getShiftAmountConstant(Val: 16, VT: GRLenVT, DL));
5582 V = DAG.getNode(Opcode: ISD::OR, DL, VT: GRLenVT, N1: Lo, N2: Hi);
5583 } else if (UseLASX) {
5584 return SDValue();
5585 }
5586 }
5587
5588 if (!V) {
5589 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5590 V = DAG.getNode(Opcode: Opc, DL, VT: GRLenVT, Operand: Src);
5591 }
5592
5593 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
5594 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
5595 return DAG.getBitcast(VT, V);
5596}
5597
5598static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
5599 TargetLowering::DAGCombinerInfo &DCI,
5600 const LoongArchSubtarget &Subtarget) {
5601 MVT GRLenVT = Subtarget.getGRLenVT();
5602 EVT ValTy = N->getValueType(ResNo: 0);
5603 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
5604 ConstantSDNode *CN0, *CN1;
5605 SDLoc DL(N);
5606 unsigned ValBits = ValTy.getSizeInBits();
5607 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5608 unsigned Shamt;
5609 bool SwapAndRetried = false;
5610
5611 // BSTRPICK requires the 32S feature.
5612 if (!Subtarget.has32S())
5613 return SDValue();
5614
5615 if (DCI.isBeforeLegalizeOps())
5616 return SDValue();
5617
5618 if (ValBits != 32 && ValBits != 64)
5619 return SDValue();
5620
5621Retry:
5622 // 1st pattern to match BSTRINS:
5623 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5624 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5625 // =>
5626 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5627 if (N0.getOpcode() == ISD::AND &&
5628 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5629 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5630 N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
5631 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5632 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
5633 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5634 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5635 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5636 (MaskIdx0 + MaskLen0 <= ValBits)) {
5637 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5638 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5639 N2: N1.getOperand(i: 0).getOperand(i: 0),
5640 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
5641 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5642 }
5643
5644 // 2nd pattern to match BSTRINS:
5645 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5646 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5647 // =>
5648 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5649 if (N0.getOpcode() == ISD::AND &&
5650 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5651 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5652 N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
5653 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5654 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5655 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5656 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
5657 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5658 (MaskIdx0 + MaskLen0 <= ValBits)) {
5659 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5660 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5661 N2: N1.getOperand(i: 0).getOperand(i: 0),
5662 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
5663 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5664 }
5665
5666 // 3rd pattern to match BSTRINS:
5667 // R = or (and X, mask0), (and Y, mask1)
5668 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5669 // =>
5670 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5671 // where msb = lsb + size - 1
5672 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5673 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5674 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5675 (MaskIdx0 + MaskLen0 <= 64) &&
5676 (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) &&
5677 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5678 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5679 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5680 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1,
5681 N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
5682 N3: DAG.getConstant(Val: ValBits == 32
5683 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5684 : (MaskIdx0 + MaskLen0 - 1),
5685 DL, VT: GRLenVT),
5686 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5687 }
5688
5689 // 4th pattern to match BSTRINS:
5690 // R = or (and X, mask), (shl Y, shamt)
5691 // where mask = (2**shamt - 1)
5692 // =>
5693 // R = BSTRINS X, Y, ValBits - 1, shamt
5694 // where ValBits = 32 or 64
5695 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5696 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5697 isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5698 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5699 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5700 (MaskIdx0 + MaskLen0 <= ValBits)) {
5701 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5702 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5703 N2: N1.getOperand(i: 0),
5704 N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT),
5705 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5706 }
5707
5708 // 5th pattern to match BSTRINS:
5709 // R = or (and X, mask), const
5710 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5711 // =>
5712 // R = BSTRINS X, (const >> lsb), msb, lsb
5713 // where msb = lsb + size - 1
5714 if (N0.getOpcode() == ISD::AND &&
5715 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5716 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5717 (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
5718 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5719 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5720 return DAG.getNode(
5721 Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5722 N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
5723 N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5724 : (MaskIdx0 + MaskLen0 - 1),
5725 DL, VT: GRLenVT),
5726 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5727 }
5728
5729 // 6th pattern.
5730 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5731 // by the incoming bits are known to be zero.
5732 // =>
5733 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5734 //
5735 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5736 // pattern is more common than the 1st. So we put the 1st before the 6th in
5737 // order to match as many nodes as possible.
5738 ConstantSDNode *CNMask, *CNShamt;
5739 unsigned MaskIdx, MaskLen;
5740 if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
5741 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5742 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5743 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5744 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5745 Shamt = CNShamt->getZExtValue();
5746 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5747 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5748 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5749 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5750 N2: N1.getOperand(i: 0).getOperand(i: 0),
5751 N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT),
5752 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5753 }
5754 }
5755
5756 // 7th pattern.
5757 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5758 // overwritten by the incoming bits are known to be zero.
5759 // =>
5760 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5761 //
5762 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5763 // before the 7th in order to match as many nodes as possible.
5764 if (N1.getOpcode() == ISD::AND &&
5765 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5766 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5767 N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
5768 (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5769 CNShamt->getZExtValue() == MaskIdx) {
5770 APInt ShMask(ValBits, CNMask->getZExtValue());
5771 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5772 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5773 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5774 N2: N1.getOperand(i: 0).getOperand(i: 0),
5775 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
5776 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
5777 }
5778 }
5779
5780 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5781 if (!SwapAndRetried) {
5782 std::swap(a&: N0, b&: N1);
5783 SwapAndRetried = true;
5784 goto Retry;
5785 }
5786
5787 SwapAndRetried = false;
5788Retry2:
5789 // 8th pattern.
5790 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5791 // the incoming bits are known to be zero.
5792 // =>
5793 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5794 //
5795 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5796 // we put it here in order to match as many nodes as possible or generate less
5797 // instructions.
5798 if (N1.getOpcode() == ISD::AND &&
5799 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5800 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5801 APInt ShMask(ValBits, CNMask->getZExtValue());
5802 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5803 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5804 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5805 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0),
5806 N1: N1->getOperand(Num: 0),
5807 N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
5808 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
5809 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
5810 }
5811 }
5812 // Swap N0/N1 and retry.
5813 if (!SwapAndRetried) {
5814 std::swap(a&: N0, b&: N1);
5815 SwapAndRetried = true;
5816 goto Retry2;
5817 }
5818
5819 return SDValue();
5820}
5821
5822static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5823 ExtType = ISD::NON_EXTLOAD;
5824
5825 switch (V.getNode()->getOpcode()) {
5826 case ISD::LOAD: {
5827 LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
5828 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5829 (LoadNode->getMemoryVT() == MVT::i16)) {
5830 ExtType = LoadNode->getExtensionType();
5831 return true;
5832 }
5833 return false;
5834 }
5835 case ISD::AssertSext: {
5836 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
5837 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5838 ExtType = ISD::SEXTLOAD;
5839 return true;
5840 }
5841 return false;
5842 }
5843 case ISD::AssertZext: {
5844 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
5845 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5846 ExtType = ISD::ZEXTLOAD;
5847 return true;
5848 }
5849 return false;
5850 }
5851 default:
5852 return false;
5853 }
5854
5855 return false;
5856}
5857
5858// Eliminate redundant truncation and zero-extension nodes.
5859// * Case 1:
5860// +------------+ +------------+ +------------+
5861// | Input1 | | Input2 | | CC |
5862// +------------+ +------------+ +------------+
5863// | | |
5864// V V +----+
5865// +------------+ +------------+ |
5866// | TRUNCATE | | TRUNCATE | |
5867// +------------+ +------------+ |
5868// | | |
5869// V V |
5870// +------------+ +------------+ |
5871// | ZERO_EXT | | ZERO_EXT | |
5872// +------------+ +------------+ |
5873// | | |
5874// | +-------------+ |
5875// V V | |
5876// +----------------+ | |
5877// | AND | | |
5878// +----------------+ | |
5879// | | |
5880// +---------------+ | |
5881// | | |
5882// V V V
5883// +-------------+
5884// | CMP |
5885// +-------------+
5886// * Case 2:
5887// +------------+ +------------+ +-------------+ +------------+ +------------+
5888// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5889// +------------+ +------------+ +-------------+ +------------+ +------------+
5890// | | | | |
5891// V | | | |
5892// +------------+ | | | |
5893// | XOR |<---------------------+ | |
5894// +------------+ | | |
5895// | | | |
5896// V V +---------------+ |
5897// +------------+ +------------+ | |
5898// | TRUNCATE | | TRUNCATE | | +-------------------------+
5899// +------------+ +------------+ | |
5900// | | | |
5901// V V | |
5902// +------------+ +------------+ | |
5903// | ZERO_EXT | | ZERO_EXT | | |
5904// +------------+ +------------+ | |
5905// | | | |
5906// V V | |
5907// +----------------+ | |
5908// | AND | | |
5909// +----------------+ | |
5910// | | |
5911// +---------------+ | |
5912// | | |
5913// V V V
5914// +-------------+
5915// | CMP |
5916// +-------------+
5917static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
5918 TargetLowering::DAGCombinerInfo &DCI,
5919 const LoongArchSubtarget &Subtarget) {
5920 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
5921
5922 SDNode *AndNode = N->getOperand(Num: 0).getNode();
5923 if (AndNode->getOpcode() != ISD::AND)
5924 return SDValue();
5925
5926 SDValue AndInputValue2 = AndNode->getOperand(Num: 1);
5927 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5928 return SDValue();
5929
5930 SDValue CmpInputValue = N->getOperand(Num: 1);
5931 SDValue AndInputValue1 = AndNode->getOperand(Num: 0);
5932 if (AndInputValue1.getOpcode() == ISD::XOR) {
5933 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5934 return SDValue();
5935 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1));
5936 if (!CN || !CN->isAllOnes())
5937 return SDValue();
5938 CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
5939 if (!CN || !CN->isZero())
5940 return SDValue();
5941 AndInputValue1 = AndInputValue1.getOperand(i: 0);
5942 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5943 return SDValue();
5944 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5945 if (AndInputValue2 != CmpInputValue)
5946 return SDValue();
5947 } else {
5948 return SDValue();
5949 }
5950
5951 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0);
5952 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5953 return SDValue();
5954
5955 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0);
5956 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5957 return SDValue();
5958
5959 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0);
5960 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0);
5961 ISD::LoadExtType ExtType1;
5962 ISD::LoadExtType ExtType2;
5963
5964 if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) ||
5965 !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
5966 return SDValue();
5967
5968 if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) ||
5969 AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0))
5970 return SDValue();
5971
5972 if ((ExtType2 != ISD::ZEXTLOAD) &&
5973 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5974 return SDValue();
5975
5976 // These truncation and zero-extension nodes are not necessary, remove them.
5977 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0),
5978 N1: TruncInputValue1, N2: TruncInputValue2);
5979 SDValue NewSetCC =
5980 DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
5981 DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
5982 return SDValue(N, 0);
5983}
5984
5985// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5986static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5987 TargetLowering::DAGCombinerInfo &DCI,
5988 const LoongArchSubtarget &Subtarget) {
5989 if (DCI.isBeforeLegalizeOps())
5990 return SDValue();
5991
5992 SDValue Src = N->getOperand(Num: 0);
5993 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5994 return SDValue();
5995
5996 return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
5997 Operand: Src.getOperand(i: 0));
5998}
5999
6000// Perform common combines for BR_CC and SELECT_CC conditions.
6001static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6002 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6003 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
6004
6005 // As far as arithmetic right shift always saves the sign,
6006 // shift can be omitted.
6007 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6008 // setge (sra X, N), 0 -> setge X, 0
6009 if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6010 LHS.getOpcode() == ISD::SRA) {
6011 LHS = LHS.getOperand(i: 0);
6012 return true;
6013 }
6014
6015 if (!ISD::isIntEqualitySetCC(Code: CCVal))
6016 return false;
6017
6018 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6019 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6020 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
6021 LHS.getOperand(i: 0).getValueType() == Subtarget.getGRLenVT()) {
6022 // If we're looking for eq 0 instead of ne 0, we need to invert the
6023 // condition.
6024 bool Invert = CCVal == ISD::SETEQ;
6025 CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: 2))->get();
6026 if (Invert)
6027 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
6028
6029 RHS = LHS.getOperand(i: 1);
6030 LHS = LHS.getOperand(i: 0);
6031 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
6032
6033 CC = DAG.getCondCode(Cond: CCVal);
6034 return true;
6035 }
6036
6037 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6038 if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6039 LHS.getOperand(i: 1).getOpcode() == ISD::Constant) {
6040 SDValue LHS0 = LHS.getOperand(i: 0);
6041 if (LHS0.getOpcode() == ISD::AND &&
6042 LHS0.getOperand(i: 1).getOpcode() == ISD::Constant) {
6043 uint64_t Mask = LHS0.getConstantOperandVal(i: 1);
6044 uint64_t ShAmt = LHS.getConstantOperandVal(i: 1);
6045 if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
6046 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6047 CC = DAG.getCondCode(Cond: CCVal);
6048
6049 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6050 LHS = LHS0.getOperand(i: 0);
6051 if (ShAmt != 0)
6052 LHS =
6053 DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: 0),
6054 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
6055 return true;
6056 }
6057 }
6058 }
6059
6060 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6061 // This can occur when legalizing some floating point comparisons.
6062 APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: 1);
6063 if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
6064 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
6065 CC = DAG.getCondCode(Cond: CCVal);
6066 RHS = DAG.getConstant(Val: 0, DL, VT: LHS.getValueType());
6067 return true;
6068 }
6069
6070 return false;
6071}
6072
6073static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG,
6074 TargetLowering::DAGCombinerInfo &DCI,
6075 const LoongArchSubtarget &Subtarget) {
6076 SDValue LHS = N->getOperand(Num: 1);
6077 SDValue RHS = N->getOperand(Num: 2);
6078 SDValue CC = N->getOperand(Num: 3);
6079 SDLoc DL(N);
6080
6081 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6082 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: N->getValueType(ResNo: 0),
6083 N1: N->getOperand(Num: 0), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: 4));
6084
6085 return SDValue();
6086}
6087
6088static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
6089 TargetLowering::DAGCombinerInfo &DCI,
6090 const LoongArchSubtarget &Subtarget) {
6091 // Transform
6092 SDValue LHS = N->getOperand(Num: 0);
6093 SDValue RHS = N->getOperand(Num: 1);
6094 SDValue CC = N->getOperand(Num: 2);
6095 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
6096 SDValue TrueV = N->getOperand(Num: 3);
6097 SDValue FalseV = N->getOperand(Num: 4);
6098 SDLoc DL(N);
6099 EVT VT = N->getValueType(ResNo: 0);
6100
6101 // If the True and False values are the same, we don't need a select_cc.
6102 if (TrueV == FalseV)
6103 return TrueV;
6104
6105 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6106 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6107 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
6108 isNullConstant(V: RHS) &&
6109 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6110 if (CCVal == ISD::CondCode::SETGE)
6111 std::swap(a&: TrueV, b&: FalseV);
6112
6113 int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
6114 int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
6115 // Only handle simm12, if it is not in this range, it can be considered as
6116 // register.
6117 if (isInt<12>(x: TrueSImm) && isInt<12>(x: FalseSImm) &&
6118 isInt<12>(x: TrueSImm - FalseSImm)) {
6119 SDValue SRA =
6120 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
6121 N2: DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT));
6122 SDValue AND =
6123 DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
6124 N2: DAG.getSignedConstant(Val: TrueSImm - FalseSImm, DL, VT));
6125 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
6126 }
6127
6128 if (CCVal == ISD::CondCode::SETGE)
6129 std::swap(a&: TrueV, b&: FalseV);
6130 }
6131
6132 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6133 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT: N->getValueType(ResNo: 0),
6134 Ops: {LHS, RHS, CC, TrueV, FalseV});
6135
6136 return SDValue();
6137}
6138
6139template <unsigned N>
6140static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
6141 SelectionDAG &DAG,
6142 const LoongArchSubtarget &Subtarget,
6143 bool IsSigned = false) {
6144 SDLoc DL(Node);
6145 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
6146 // Check the ImmArg.
6147 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6148 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6149 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6150 ": argument out of range.");
6151 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
6152 }
6153 return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
6154}
6155
6156template <unsigned N>
6157static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6158 SelectionDAG &DAG, bool IsSigned = false) {
6159 SDLoc DL(Node);
6160 EVT ResTy = Node->getValueType(ResNo: 0);
6161 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
6162
6163 // Check the ImmArg.
6164 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6165 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6166 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6167 ": argument out of range.");
6168 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6169 }
6170 return DAG.getConstant(
6171 Val: APInt(ResTy.getScalarType().getSizeInBits(),
6172 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6173 DL, VT: ResTy);
6174}
6175
6176static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
6177 SDLoc DL(Node);
6178 EVT ResTy = Node->getValueType(ResNo: 0);
6179 SDValue Vec = Node->getOperand(Num: 2);
6180 SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy);
6181 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
6182}
6183
6184static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
6185 SDLoc DL(Node);
6186 EVT ResTy = Node->getValueType(ResNo: 0);
6187 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
6188 SDValue Bit =
6189 DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
6190
6191 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1),
6192 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
6193}
6194
6195template <unsigned N>
6196static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
6197 SDLoc DL(Node);
6198 EVT ResTy = Node->getValueType(ResNo: 0);
6199 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
6200 // Check the unsigned ImmArg.
6201 if (!isUInt<N>(CImm->getZExtValue())) {
6202 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6203 ": argument out of range.");
6204 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6205 }
6206
6207 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6208 SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
6209
6210 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask);
6211}
6212
6213template <unsigned N>
6214static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
6215 SDLoc DL(Node);
6216 EVT ResTy = Node->getValueType(ResNo: 0);
6217 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
6218 // Check the unsigned ImmArg.
6219 if (!isUInt<N>(CImm->getZExtValue())) {
6220 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6221 ": argument out of range.");
6222 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6223 }
6224
6225 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6226 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
6227 return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
6228}
6229
6230template <unsigned N>
6231static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
6232 SDLoc DL(Node);
6233 EVT ResTy = Node->getValueType(ResNo: 0);
6234 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
6235 // Check the unsigned ImmArg.
6236 if (!isUInt<N>(CImm->getZExtValue())) {
6237 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6238 ": argument out of range.");
6239 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6240 }
6241
6242 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6243 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
6244 return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
6245}
6246
6247template <unsigned W>
6248static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
6249 unsigned ResOp) {
6250 unsigned Imm = N->getConstantOperandVal(Num: 2);
6251 if (!isUInt<W>(Imm)) {
6252 const StringRef ErrorMsg = "argument out of range";
6253 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
6254 return DAG.getUNDEF(VT: N->getValueType(ResNo: 0));
6255 }
6256 SDLoc DL(N);
6257 SDValue Vec = N->getOperand(Num: 1);
6258 SDValue Idx = DAG.getConstant(Val: Imm, DL, VT: MVT::i32);
6259 SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
6260 return DAG.getNode(Opcode: ResOp, DL, VT: N->getValueType(ResNo: 0), N1: Vec, N2: Idx, N3: EltVT);
6261}
6262
6263static SDValue
6264performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
6265 TargetLowering::DAGCombinerInfo &DCI,
6266 const LoongArchSubtarget &Subtarget) {
6267 SDLoc DL(N);
6268 switch (N->getConstantOperandVal(Num: 0)) {
6269 default:
6270 break;
6271 case Intrinsic::loongarch_lsx_vadd_b:
6272 case Intrinsic::loongarch_lsx_vadd_h:
6273 case Intrinsic::loongarch_lsx_vadd_w:
6274 case Intrinsic::loongarch_lsx_vadd_d:
6275 case Intrinsic::loongarch_lasx_xvadd_b:
6276 case Intrinsic::loongarch_lasx_xvadd_h:
6277 case Intrinsic::loongarch_lasx_xvadd_w:
6278 case Intrinsic::loongarch_lasx_xvadd_d:
6279 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6280 N2: N->getOperand(Num: 2));
6281 case Intrinsic::loongarch_lsx_vaddi_bu:
6282 case Intrinsic::loongarch_lsx_vaddi_hu:
6283 case Intrinsic::loongarch_lsx_vaddi_wu:
6284 case Intrinsic::loongarch_lsx_vaddi_du:
6285 case Intrinsic::loongarch_lasx_xvaddi_bu:
6286 case Intrinsic::loongarch_lasx_xvaddi_hu:
6287 case Intrinsic::loongarch_lasx_xvaddi_wu:
6288 case Intrinsic::loongarch_lasx_xvaddi_du:
6289 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6290 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6291 case Intrinsic::loongarch_lsx_vsub_b:
6292 case Intrinsic::loongarch_lsx_vsub_h:
6293 case Intrinsic::loongarch_lsx_vsub_w:
6294 case Intrinsic::loongarch_lsx_vsub_d:
6295 case Intrinsic::loongarch_lasx_xvsub_b:
6296 case Intrinsic::loongarch_lasx_xvsub_h:
6297 case Intrinsic::loongarch_lasx_xvsub_w:
6298 case Intrinsic::loongarch_lasx_xvsub_d:
6299 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6300 N2: N->getOperand(Num: 2));
6301 case Intrinsic::loongarch_lsx_vsubi_bu:
6302 case Intrinsic::loongarch_lsx_vsubi_hu:
6303 case Intrinsic::loongarch_lsx_vsubi_wu:
6304 case Intrinsic::loongarch_lsx_vsubi_du:
6305 case Intrinsic::loongarch_lasx_xvsubi_bu:
6306 case Intrinsic::loongarch_lasx_xvsubi_hu:
6307 case Intrinsic::loongarch_lasx_xvsubi_wu:
6308 case Intrinsic::loongarch_lasx_xvsubi_du:
6309 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6310 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6311 case Intrinsic::loongarch_lsx_vneg_b:
6312 case Intrinsic::loongarch_lsx_vneg_h:
6313 case Intrinsic::loongarch_lsx_vneg_w:
6314 case Intrinsic::loongarch_lsx_vneg_d:
6315 case Intrinsic::loongarch_lasx_xvneg_b:
6316 case Intrinsic::loongarch_lasx_xvneg_h:
6317 case Intrinsic::loongarch_lasx_xvneg_w:
6318 case Intrinsic::loongarch_lasx_xvneg_d:
6319 return DAG.getNode(
6320 Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0),
6321 N1: DAG.getConstant(
6322 Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0,
6323 /*isSigned=*/true),
6324 DL: SDLoc(N), VT: N->getValueType(ResNo: 0)),
6325 N2: N->getOperand(Num: 1));
6326 case Intrinsic::loongarch_lsx_vmax_b:
6327 case Intrinsic::loongarch_lsx_vmax_h:
6328 case Intrinsic::loongarch_lsx_vmax_w:
6329 case Intrinsic::loongarch_lsx_vmax_d:
6330 case Intrinsic::loongarch_lasx_xvmax_b:
6331 case Intrinsic::loongarch_lasx_xvmax_h:
6332 case Intrinsic::loongarch_lasx_xvmax_w:
6333 case Intrinsic::loongarch_lasx_xvmax_d:
6334 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6335 N2: N->getOperand(Num: 2));
6336 case Intrinsic::loongarch_lsx_vmax_bu:
6337 case Intrinsic::loongarch_lsx_vmax_hu:
6338 case Intrinsic::loongarch_lsx_vmax_wu:
6339 case Intrinsic::loongarch_lsx_vmax_du:
6340 case Intrinsic::loongarch_lasx_xvmax_bu:
6341 case Intrinsic::loongarch_lasx_xvmax_hu:
6342 case Intrinsic::loongarch_lasx_xvmax_wu:
6343 case Intrinsic::loongarch_lasx_xvmax_du:
6344 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6345 N2: N->getOperand(Num: 2));
6346 case Intrinsic::loongarch_lsx_vmaxi_b:
6347 case Intrinsic::loongarch_lsx_vmaxi_h:
6348 case Intrinsic::loongarch_lsx_vmaxi_w:
6349 case Intrinsic::loongarch_lsx_vmaxi_d:
6350 case Intrinsic::loongarch_lasx_xvmaxi_b:
6351 case Intrinsic::loongarch_lasx_xvmaxi_h:
6352 case Intrinsic::loongarch_lasx_xvmaxi_w:
6353 case Intrinsic::loongarch_lasx_xvmaxi_d:
6354 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6355 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
6356 case Intrinsic::loongarch_lsx_vmaxi_bu:
6357 case Intrinsic::loongarch_lsx_vmaxi_hu:
6358 case Intrinsic::loongarch_lsx_vmaxi_wu:
6359 case Intrinsic::loongarch_lsx_vmaxi_du:
6360 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6361 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6362 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6363 case Intrinsic::loongarch_lasx_xvmaxi_du:
6364 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6365 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6366 case Intrinsic::loongarch_lsx_vmin_b:
6367 case Intrinsic::loongarch_lsx_vmin_h:
6368 case Intrinsic::loongarch_lsx_vmin_w:
6369 case Intrinsic::loongarch_lsx_vmin_d:
6370 case Intrinsic::loongarch_lasx_xvmin_b:
6371 case Intrinsic::loongarch_lasx_xvmin_h:
6372 case Intrinsic::loongarch_lasx_xvmin_w:
6373 case Intrinsic::loongarch_lasx_xvmin_d:
6374 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6375 N2: N->getOperand(Num: 2));
6376 case Intrinsic::loongarch_lsx_vmin_bu:
6377 case Intrinsic::loongarch_lsx_vmin_hu:
6378 case Intrinsic::loongarch_lsx_vmin_wu:
6379 case Intrinsic::loongarch_lsx_vmin_du:
6380 case Intrinsic::loongarch_lasx_xvmin_bu:
6381 case Intrinsic::loongarch_lasx_xvmin_hu:
6382 case Intrinsic::loongarch_lasx_xvmin_wu:
6383 case Intrinsic::loongarch_lasx_xvmin_du:
6384 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6385 N2: N->getOperand(Num: 2));
6386 case Intrinsic::loongarch_lsx_vmini_b:
6387 case Intrinsic::loongarch_lsx_vmini_h:
6388 case Intrinsic::loongarch_lsx_vmini_w:
6389 case Intrinsic::loongarch_lsx_vmini_d:
6390 case Intrinsic::loongarch_lasx_xvmini_b:
6391 case Intrinsic::loongarch_lasx_xvmini_h:
6392 case Intrinsic::loongarch_lasx_xvmini_w:
6393 case Intrinsic::loongarch_lasx_xvmini_d:
6394 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6395 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
6396 case Intrinsic::loongarch_lsx_vmini_bu:
6397 case Intrinsic::loongarch_lsx_vmini_hu:
6398 case Intrinsic::loongarch_lsx_vmini_wu:
6399 case Intrinsic::loongarch_lsx_vmini_du:
6400 case Intrinsic::loongarch_lasx_xvmini_bu:
6401 case Intrinsic::loongarch_lasx_xvmini_hu:
6402 case Intrinsic::loongarch_lasx_xvmini_wu:
6403 case Intrinsic::loongarch_lasx_xvmini_du:
6404 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6405 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6406 case Intrinsic::loongarch_lsx_vmul_b:
6407 case Intrinsic::loongarch_lsx_vmul_h:
6408 case Intrinsic::loongarch_lsx_vmul_w:
6409 case Intrinsic::loongarch_lsx_vmul_d:
6410 case Intrinsic::loongarch_lasx_xvmul_b:
6411 case Intrinsic::loongarch_lasx_xvmul_h:
6412 case Intrinsic::loongarch_lasx_xvmul_w:
6413 case Intrinsic::loongarch_lasx_xvmul_d:
6414 return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6415 N2: N->getOperand(Num: 2));
6416 case Intrinsic::loongarch_lsx_vmadd_b:
6417 case Intrinsic::loongarch_lsx_vmadd_h:
6418 case Intrinsic::loongarch_lsx_vmadd_w:
6419 case Intrinsic::loongarch_lsx_vmadd_d:
6420 case Intrinsic::loongarch_lasx_xvmadd_b:
6421 case Intrinsic::loongarch_lasx_xvmadd_h:
6422 case Intrinsic::loongarch_lasx_xvmadd_w:
6423 case Intrinsic::loongarch_lasx_xvmadd_d: {
6424 EVT ResTy = N->getValueType(ResNo: 0);
6425 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
6426 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
6427 N2: N->getOperand(Num: 3)));
6428 }
6429 case Intrinsic::loongarch_lsx_vmsub_b:
6430 case Intrinsic::loongarch_lsx_vmsub_h:
6431 case Intrinsic::loongarch_lsx_vmsub_w:
6432 case Intrinsic::loongarch_lsx_vmsub_d:
6433 case Intrinsic::loongarch_lasx_xvmsub_b:
6434 case Intrinsic::loongarch_lasx_xvmsub_h:
6435 case Intrinsic::loongarch_lasx_xvmsub_w:
6436 case Intrinsic::loongarch_lasx_xvmsub_d: {
6437 EVT ResTy = N->getValueType(ResNo: 0);
6438 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
6439 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
6440 N2: N->getOperand(Num: 3)));
6441 }
6442 case Intrinsic::loongarch_lsx_vdiv_b:
6443 case Intrinsic::loongarch_lsx_vdiv_h:
6444 case Intrinsic::loongarch_lsx_vdiv_w:
6445 case Intrinsic::loongarch_lsx_vdiv_d:
6446 case Intrinsic::loongarch_lasx_xvdiv_b:
6447 case Intrinsic::loongarch_lasx_xvdiv_h:
6448 case Intrinsic::loongarch_lasx_xvdiv_w:
6449 case Intrinsic::loongarch_lasx_xvdiv_d:
6450 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6451 N2: N->getOperand(Num: 2));
6452 case Intrinsic::loongarch_lsx_vdiv_bu:
6453 case Intrinsic::loongarch_lsx_vdiv_hu:
6454 case Intrinsic::loongarch_lsx_vdiv_wu:
6455 case Intrinsic::loongarch_lsx_vdiv_du:
6456 case Intrinsic::loongarch_lasx_xvdiv_bu:
6457 case Intrinsic::loongarch_lasx_xvdiv_hu:
6458 case Intrinsic::loongarch_lasx_xvdiv_wu:
6459 case Intrinsic::loongarch_lasx_xvdiv_du:
6460 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6461 N2: N->getOperand(Num: 2));
6462 case Intrinsic::loongarch_lsx_vmod_b:
6463 case Intrinsic::loongarch_lsx_vmod_h:
6464 case Intrinsic::loongarch_lsx_vmod_w:
6465 case Intrinsic::loongarch_lsx_vmod_d:
6466 case Intrinsic::loongarch_lasx_xvmod_b:
6467 case Intrinsic::loongarch_lasx_xvmod_h:
6468 case Intrinsic::loongarch_lasx_xvmod_w:
6469 case Intrinsic::loongarch_lasx_xvmod_d:
6470 return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6471 N2: N->getOperand(Num: 2));
6472 case Intrinsic::loongarch_lsx_vmod_bu:
6473 case Intrinsic::loongarch_lsx_vmod_hu:
6474 case Intrinsic::loongarch_lsx_vmod_wu:
6475 case Intrinsic::loongarch_lsx_vmod_du:
6476 case Intrinsic::loongarch_lasx_xvmod_bu:
6477 case Intrinsic::loongarch_lasx_xvmod_hu:
6478 case Intrinsic::loongarch_lasx_xvmod_wu:
6479 case Intrinsic::loongarch_lasx_xvmod_du:
6480 return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6481 N2: N->getOperand(Num: 2));
6482 case Intrinsic::loongarch_lsx_vand_v:
6483 case Intrinsic::loongarch_lasx_xvand_v:
6484 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6485 N2: N->getOperand(Num: 2));
6486 case Intrinsic::loongarch_lsx_vor_v:
6487 case Intrinsic::loongarch_lasx_xvor_v:
6488 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6489 N2: N->getOperand(Num: 2));
6490 case Intrinsic::loongarch_lsx_vxor_v:
6491 case Intrinsic::loongarch_lasx_xvxor_v:
6492 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6493 N2: N->getOperand(Num: 2));
6494 case Intrinsic::loongarch_lsx_vnor_v:
6495 case Intrinsic::loongarch_lasx_xvnor_v: {
6496 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6497 N2: N->getOperand(Num: 2));
6498 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
6499 }
6500 case Intrinsic::loongarch_lsx_vandi_b:
6501 case Intrinsic::loongarch_lasx_xvandi_b:
6502 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6503 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
6504 case Intrinsic::loongarch_lsx_vori_b:
6505 case Intrinsic::loongarch_lasx_xvori_b:
6506 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6507 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
6508 case Intrinsic::loongarch_lsx_vxori_b:
6509 case Intrinsic::loongarch_lasx_xvxori_b:
6510 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6511 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
6512 case Intrinsic::loongarch_lsx_vsll_b:
6513 case Intrinsic::loongarch_lsx_vsll_h:
6514 case Intrinsic::loongarch_lsx_vsll_w:
6515 case Intrinsic::loongarch_lsx_vsll_d:
6516 case Intrinsic::loongarch_lasx_xvsll_b:
6517 case Intrinsic::loongarch_lasx_xvsll_h:
6518 case Intrinsic::loongarch_lasx_xvsll_w:
6519 case Intrinsic::loongarch_lasx_xvsll_d:
6520 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6521 N2: truncateVecElts(Node: N, DAG));
6522 case Intrinsic::loongarch_lsx_vslli_b:
6523 case Intrinsic::loongarch_lasx_xvslli_b:
6524 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6525 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
6526 case Intrinsic::loongarch_lsx_vslli_h:
6527 case Intrinsic::loongarch_lasx_xvslli_h:
6528 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6529 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
6530 case Intrinsic::loongarch_lsx_vslli_w:
6531 case Intrinsic::loongarch_lasx_xvslli_w:
6532 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6533 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6534 case Intrinsic::loongarch_lsx_vslli_d:
6535 case Intrinsic::loongarch_lasx_xvslli_d:
6536 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6537 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
6538 case Intrinsic::loongarch_lsx_vsrl_b:
6539 case Intrinsic::loongarch_lsx_vsrl_h:
6540 case Intrinsic::loongarch_lsx_vsrl_w:
6541 case Intrinsic::loongarch_lsx_vsrl_d:
6542 case Intrinsic::loongarch_lasx_xvsrl_b:
6543 case Intrinsic::loongarch_lasx_xvsrl_h:
6544 case Intrinsic::loongarch_lasx_xvsrl_w:
6545 case Intrinsic::loongarch_lasx_xvsrl_d:
6546 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6547 N2: truncateVecElts(Node: N, DAG));
6548 case Intrinsic::loongarch_lsx_vsrli_b:
6549 case Intrinsic::loongarch_lasx_xvsrli_b:
6550 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6551 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
6552 case Intrinsic::loongarch_lsx_vsrli_h:
6553 case Intrinsic::loongarch_lasx_xvsrli_h:
6554 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6555 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
6556 case Intrinsic::loongarch_lsx_vsrli_w:
6557 case Intrinsic::loongarch_lasx_xvsrli_w:
6558 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6559 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6560 case Intrinsic::loongarch_lsx_vsrli_d:
6561 case Intrinsic::loongarch_lasx_xvsrli_d:
6562 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6563 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
6564 case Intrinsic::loongarch_lsx_vsra_b:
6565 case Intrinsic::loongarch_lsx_vsra_h:
6566 case Intrinsic::loongarch_lsx_vsra_w:
6567 case Intrinsic::loongarch_lsx_vsra_d:
6568 case Intrinsic::loongarch_lasx_xvsra_b:
6569 case Intrinsic::loongarch_lasx_xvsra_h:
6570 case Intrinsic::loongarch_lasx_xvsra_w:
6571 case Intrinsic::loongarch_lasx_xvsra_d:
6572 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6573 N2: truncateVecElts(Node: N, DAG));
6574 case Intrinsic::loongarch_lsx_vsrai_b:
6575 case Intrinsic::loongarch_lasx_xvsrai_b:
6576 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6577 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
6578 case Intrinsic::loongarch_lsx_vsrai_h:
6579 case Intrinsic::loongarch_lasx_xvsrai_h:
6580 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6581 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
6582 case Intrinsic::loongarch_lsx_vsrai_w:
6583 case Intrinsic::loongarch_lasx_xvsrai_w:
6584 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6585 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6586 case Intrinsic::loongarch_lsx_vsrai_d:
6587 case Intrinsic::loongarch_lasx_xvsrai_d:
6588 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6589 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
6590 case Intrinsic::loongarch_lsx_vclz_b:
6591 case Intrinsic::loongarch_lsx_vclz_h:
6592 case Intrinsic::loongarch_lsx_vclz_w:
6593 case Intrinsic::loongarch_lsx_vclz_d:
6594 case Intrinsic::loongarch_lasx_xvclz_b:
6595 case Intrinsic::loongarch_lasx_xvclz_h:
6596 case Intrinsic::loongarch_lasx_xvclz_w:
6597 case Intrinsic::loongarch_lasx_xvclz_d:
6598 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
6599 case Intrinsic::loongarch_lsx_vpcnt_b:
6600 case Intrinsic::loongarch_lsx_vpcnt_h:
6601 case Intrinsic::loongarch_lsx_vpcnt_w:
6602 case Intrinsic::loongarch_lsx_vpcnt_d:
6603 case Intrinsic::loongarch_lasx_xvpcnt_b:
6604 case Intrinsic::loongarch_lasx_xvpcnt_h:
6605 case Intrinsic::loongarch_lasx_xvpcnt_w:
6606 case Intrinsic::loongarch_lasx_xvpcnt_d:
6607 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
6608 case Intrinsic::loongarch_lsx_vbitclr_b:
6609 case Intrinsic::loongarch_lsx_vbitclr_h:
6610 case Intrinsic::loongarch_lsx_vbitclr_w:
6611 case Intrinsic::loongarch_lsx_vbitclr_d:
6612 case Intrinsic::loongarch_lasx_xvbitclr_b:
6613 case Intrinsic::loongarch_lasx_xvbitclr_h:
6614 case Intrinsic::loongarch_lasx_xvbitclr_w:
6615 case Intrinsic::loongarch_lasx_xvbitclr_d:
6616 return lowerVectorBitClear(Node: N, DAG);
6617 case Intrinsic::loongarch_lsx_vbitclri_b:
6618 case Intrinsic::loongarch_lasx_xvbitclri_b:
6619 return lowerVectorBitClearImm<3>(Node: N, DAG);
6620 case Intrinsic::loongarch_lsx_vbitclri_h:
6621 case Intrinsic::loongarch_lasx_xvbitclri_h:
6622 return lowerVectorBitClearImm<4>(Node: N, DAG);
6623 case Intrinsic::loongarch_lsx_vbitclri_w:
6624 case Intrinsic::loongarch_lasx_xvbitclri_w:
6625 return lowerVectorBitClearImm<5>(Node: N, DAG);
6626 case Intrinsic::loongarch_lsx_vbitclri_d:
6627 case Intrinsic::loongarch_lasx_xvbitclri_d:
6628 return lowerVectorBitClearImm<6>(Node: N, DAG);
6629 case Intrinsic::loongarch_lsx_vbitset_b:
6630 case Intrinsic::loongarch_lsx_vbitset_h:
6631 case Intrinsic::loongarch_lsx_vbitset_w:
6632 case Intrinsic::loongarch_lsx_vbitset_d:
6633 case Intrinsic::loongarch_lasx_xvbitset_b:
6634 case Intrinsic::loongarch_lasx_xvbitset_h:
6635 case Intrinsic::loongarch_lasx_xvbitset_w:
6636 case Intrinsic::loongarch_lasx_xvbitset_d: {
6637 EVT VecTy = N->getValueType(ResNo: 0);
6638 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
6639 return DAG.getNode(
6640 Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
6641 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
6642 }
6643 case Intrinsic::loongarch_lsx_vbitseti_b:
6644 case Intrinsic::loongarch_lasx_xvbitseti_b:
6645 return lowerVectorBitSetImm<3>(Node: N, DAG);
6646 case Intrinsic::loongarch_lsx_vbitseti_h:
6647 case Intrinsic::loongarch_lasx_xvbitseti_h:
6648 return lowerVectorBitSetImm<4>(Node: N, DAG);
6649 case Intrinsic::loongarch_lsx_vbitseti_w:
6650 case Intrinsic::loongarch_lasx_xvbitseti_w:
6651 return lowerVectorBitSetImm<5>(Node: N, DAG);
6652 case Intrinsic::loongarch_lsx_vbitseti_d:
6653 case Intrinsic::loongarch_lasx_xvbitseti_d:
6654 return lowerVectorBitSetImm<6>(Node: N, DAG);
6655 case Intrinsic::loongarch_lsx_vbitrev_b:
6656 case Intrinsic::loongarch_lsx_vbitrev_h:
6657 case Intrinsic::loongarch_lsx_vbitrev_w:
6658 case Intrinsic::loongarch_lsx_vbitrev_d:
6659 case Intrinsic::loongarch_lasx_xvbitrev_b:
6660 case Intrinsic::loongarch_lasx_xvbitrev_h:
6661 case Intrinsic::loongarch_lasx_xvbitrev_w:
6662 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6663 EVT VecTy = N->getValueType(ResNo: 0);
6664 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
6665 return DAG.getNode(
6666 Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
6667 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
6668 }
6669 case Intrinsic::loongarch_lsx_vbitrevi_b:
6670 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6671 return lowerVectorBitRevImm<3>(Node: N, DAG);
6672 case Intrinsic::loongarch_lsx_vbitrevi_h:
6673 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6674 return lowerVectorBitRevImm<4>(Node: N, DAG);
6675 case Intrinsic::loongarch_lsx_vbitrevi_w:
6676 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6677 return lowerVectorBitRevImm<5>(Node: N, DAG);
6678 case Intrinsic::loongarch_lsx_vbitrevi_d:
6679 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6680 return lowerVectorBitRevImm<6>(Node: N, DAG);
6681 case Intrinsic::loongarch_lsx_vfadd_s:
6682 case Intrinsic::loongarch_lsx_vfadd_d:
6683 case Intrinsic::loongarch_lasx_xvfadd_s:
6684 case Intrinsic::loongarch_lasx_xvfadd_d:
6685 return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6686 N2: N->getOperand(Num: 2));
6687 case Intrinsic::loongarch_lsx_vfsub_s:
6688 case Intrinsic::loongarch_lsx_vfsub_d:
6689 case Intrinsic::loongarch_lasx_xvfsub_s:
6690 case Intrinsic::loongarch_lasx_xvfsub_d:
6691 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6692 N2: N->getOperand(Num: 2));
6693 case Intrinsic::loongarch_lsx_vfmul_s:
6694 case Intrinsic::loongarch_lsx_vfmul_d:
6695 case Intrinsic::loongarch_lasx_xvfmul_s:
6696 case Intrinsic::loongarch_lasx_xvfmul_d:
6697 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6698 N2: N->getOperand(Num: 2));
6699 case Intrinsic::loongarch_lsx_vfdiv_s:
6700 case Intrinsic::loongarch_lsx_vfdiv_d:
6701 case Intrinsic::loongarch_lasx_xvfdiv_s:
6702 case Intrinsic::loongarch_lasx_xvfdiv_d:
6703 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6704 N2: N->getOperand(Num: 2));
6705 case Intrinsic::loongarch_lsx_vfmadd_s:
6706 case Intrinsic::loongarch_lsx_vfmadd_d:
6707 case Intrinsic::loongarch_lasx_xvfmadd_s:
6708 case Intrinsic::loongarch_lasx_xvfmadd_d:
6709 return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6710 N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3));
6711 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6712 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6713 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6714 N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget));
6715 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6716 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6717 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6718 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6719 N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget));
6720 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6721 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6722 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6723 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6724 N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget));
6725 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6726 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6727 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6728 N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget));
6729 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6730 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6731 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6732 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6733 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6734 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6735 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6736 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6737 return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: 0),
6738 Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
6739 Operand: N->getOperand(Num: 1)));
6740 case Intrinsic::loongarch_lsx_vreplve_b:
6741 case Intrinsic::loongarch_lsx_vreplve_h:
6742 case Intrinsic::loongarch_lsx_vreplve_w:
6743 case Intrinsic::loongarch_lsx_vreplve_d:
6744 case Intrinsic::loongarch_lasx_xvreplve_b:
6745 case Intrinsic::loongarch_lasx_xvreplve_h:
6746 case Intrinsic::loongarch_lasx_xvreplve_w:
6747 case Intrinsic::loongarch_lasx_xvreplve_d:
6748 return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0),
6749 N1: N->getOperand(Num: 1),
6750 N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
6751 Operand: N->getOperand(Num: 2)));
6752 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6753 if (!Subtarget.is64Bit())
6754 return lowerVectorPickVE2GR<4>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6755 break;
6756 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6757 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6758 if (!Subtarget.is64Bit())
6759 return lowerVectorPickVE2GR<3>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6760 break;
6761 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6762 if (!Subtarget.is64Bit())
6763 return lowerVectorPickVE2GR<2>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6764 break;
6765 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6766 if (!Subtarget.is64Bit())
6767 return lowerVectorPickVE2GR<4>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6768 break;
6769 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6770 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6771 if (!Subtarget.is64Bit())
6772 return lowerVectorPickVE2GR<3>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6773 break;
6774 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6775 if (!Subtarget.is64Bit())
6776 return lowerVectorPickVE2GR<2>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6777 break;
6778 case Intrinsic::loongarch_lsx_bz_b:
6779 case Intrinsic::loongarch_lsx_bz_h:
6780 case Intrinsic::loongarch_lsx_bz_w:
6781 case Intrinsic::loongarch_lsx_bz_d:
6782 case Intrinsic::loongarch_lasx_xbz_b:
6783 case Intrinsic::loongarch_lasx_xbz_h:
6784 case Intrinsic::loongarch_lasx_xbz_w:
6785 case Intrinsic::loongarch_lasx_xbz_d:
6786 if (!Subtarget.is64Bit())
6787 return DAG.getNode(Opcode: LoongArchISD::VALL_ZERO, DL, VT: N->getValueType(ResNo: 0),
6788 Operand: N->getOperand(Num: 1));
6789 break;
6790 case Intrinsic::loongarch_lsx_bz_v:
6791 case Intrinsic::loongarch_lasx_xbz_v:
6792 if (!Subtarget.is64Bit())
6793 return DAG.getNode(Opcode: LoongArchISD::VANY_ZERO, DL, VT: N->getValueType(ResNo: 0),
6794 Operand: N->getOperand(Num: 1));
6795 break;
6796 case Intrinsic::loongarch_lsx_bnz_b:
6797 case Intrinsic::loongarch_lsx_bnz_h:
6798 case Intrinsic::loongarch_lsx_bnz_w:
6799 case Intrinsic::loongarch_lsx_bnz_d:
6800 case Intrinsic::loongarch_lasx_xbnz_b:
6801 case Intrinsic::loongarch_lasx_xbnz_h:
6802 case Intrinsic::loongarch_lasx_xbnz_w:
6803 case Intrinsic::loongarch_lasx_xbnz_d:
6804 if (!Subtarget.is64Bit())
6805 return DAG.getNode(Opcode: LoongArchISD::VALL_NONZERO, DL, VT: N->getValueType(ResNo: 0),
6806 Operand: N->getOperand(Num: 1));
6807 break;
6808 case Intrinsic::loongarch_lsx_bnz_v:
6809 case Intrinsic::loongarch_lasx_xbnz_v:
6810 if (!Subtarget.is64Bit())
6811 return DAG.getNode(Opcode: LoongArchISD::VANY_NONZERO, DL, VT: N->getValueType(ResNo: 0),
6812 Operand: N->getOperand(Num: 1));
6813 break;
6814 case Intrinsic::loongarch_lasx_concat_128_s:
6815 case Intrinsic::loongarch_lasx_concat_128_d:
6816 case Intrinsic::loongarch_lasx_concat_128:
6817 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: N->getValueType(ResNo: 0),
6818 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2));
6819 }
6820 return SDValue();
6821}
6822
6823static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
6824 TargetLowering::DAGCombinerInfo &DCI,
6825 const LoongArchSubtarget &Subtarget) {
6826 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6827 // conversion is unnecessary and can be replaced with the
6828 // MOVFR2GR_S_LA64 operand.
6829 SDValue Op0 = N->getOperand(Num: 0);
6830 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6831 return Op0.getOperand(i: 0);
6832 return SDValue();
6833}
6834
6835static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
6836 TargetLowering::DAGCombinerInfo &DCI,
6837 const LoongArchSubtarget &Subtarget) {
6838 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6839 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6840 // operand.
6841 SDValue Op0 = N->getOperand(Num: 0);
6842 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6843 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6844 "Unexpected value type!");
6845 return Op0.getOperand(i: 0);
6846 }
6847 return SDValue();
6848}
6849
6850static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
6851 TargetLowering::DAGCombinerInfo &DCI,
6852 const LoongArchSubtarget &Subtarget) {
6853 MVT VT = N->getSimpleValueType(ResNo: 0);
6854 unsigned NumBits = VT.getScalarSizeInBits();
6855
6856 // Simplify the inputs.
6857 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6858 APInt DemandedMask(APInt::getAllOnes(numBits: NumBits));
6859 if (TLI.SimplifyDemandedBits(Op: SDValue(N, 0), DemandedBits: DemandedMask, DCI))
6860 return SDValue(N, 0);
6861
6862 return SDValue();
6863}
6864
6865static SDValue
6866performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
6867 TargetLowering::DAGCombinerInfo &DCI,
6868 const LoongArchSubtarget &Subtarget) {
6869 SDValue Op0 = N->getOperand(Num: 0);
6870 SDLoc DL(N);
6871
6872 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6873 // redundant. Instead, use BuildPairF64's operands directly.
6874 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6875 return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1));
6876
6877 if (Op0->isUndef()) {
6878 SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
6879 SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
6880 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
6881 }
6882
6883 // It's cheaper to materialise two 32-bit integers than to load a double
6884 // from the constant pool and transfer it to integer registers through the
6885 // stack.
6886 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
6887 APInt V = C->getValueAPF().bitcastToAPInt();
6888 SDValue Lo = DAG.getConstant(Val: V.trunc(width: 32), DL, VT: MVT::i32);
6889 SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
6890 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
6891 }
6892
6893 return SDValue();
6894}
6895
6896/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6897static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
6898 TargetLowering::DAGCombinerInfo &DCI,
6899 const LoongArchSubtarget &Subtarget) {
6900 SDValue N0 = N->getOperand(Num: 0);
6901 SDValue N1 = N->getOperand(Num: 1);
6902 MVT VT = N->getSimpleValueType(ResNo: 0);
6903 SDLoc DL(N);
6904
6905 // VANDN(undef, x) -> 0
6906 // VANDN(x, undef) -> 0
6907 if (N0.isUndef() || N1.isUndef())
6908 return DAG.getConstant(Val: 0, DL, VT);
6909
6910 // VANDN(0, x) -> x
6911 if (ISD::isBuildVectorAllZeros(N: N0.getNode()))
6912 return N1;
6913
6914 // VANDN(x, 0) -> 0
6915 if (ISD::isBuildVectorAllZeros(N: N1.getNode()))
6916 return DAG.getConstant(Val: 0, DL, VT);
6917
6918 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6919 if (ISD::isBuildVectorAllOnes(N: N1.getNode()))
6920 return DAG.getNOT(DL, Val: N0, VT);
6921
6922 // Turn VANDN back to AND if input is inverted.
6923 if (SDValue Not = isNOT(V: N0, DAG))
6924 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: DAG.getBitcast(VT, V: Not), N2: N1);
6925
6926 // Folds for better commutativity:
6927 if (N1->hasOneUse()) {
6928 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6929 if (SDValue Not = isNOT(V: N1, DAG))
6930 return DAG.getNOT(
6931 DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)), VT);
6932
6933 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6934 // -> NOT(OR(x, SplatVector(-Imm))
6935 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6936 // gain benefits.
6937 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6938 N1.getOpcode() == ISD::BUILD_VECTOR) {
6939 if (SDValue SplatValue =
6940 cast<BuildVectorSDNode>(Val: N1.getNode())->getSplatValue()) {
6941 if (!N1->isOnlyUserOf(N: SplatValue.getNode()))
6942 return SDValue();
6943
6944 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
6945 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6946 SDValue Not =
6947 DAG.getSplat(VT, DL, Op: DAG.getTargetConstant(Val: NCVal, DL, VT: MVT::i8));
6948 return DAG.getNOT(
6949 DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)),
6950 VT);
6951 }
6952 }
6953 }
6954 }
6955
6956 return SDValue();
6957}
6958
6959static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
6960 TargetLowering::DAGCombinerInfo &DCI,
6961 const LoongArchSubtarget &Subtarget) {
6962 SDLoc DL(N);
6963 EVT VT = N->getValueType(ResNo: 0);
6964
6965 if (VT != MVT::f32 && VT != MVT::f64)
6966 return SDValue();
6967 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6968 return SDValue();
6969 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6970 return SDValue();
6971
6972 // Only optimize when the source and destination types have the same width.
6973 if (VT.getSizeInBits() != N->getOperand(Num: 0).getValueSizeInBits())
6974 return SDValue();
6975
6976 SDValue Src = N->getOperand(Num: 0);
6977 // If the result of an integer load is only used by an integer-to-float
6978 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6979 // (movgr2fr) instruction.
6980 if (ISD::isNormalLoad(N: Src.getNode()) && Src.hasOneUse() &&
6981 // Do not change the width of a volatile load. This condition check is
6982 // inspired by AArch64.
6983 !cast<LoadSDNode>(Val&: Src)->isVolatile()) {
6984 LoadSDNode *LN0 = cast<LoadSDNode>(Val&: Src);
6985 SDValue Load = DAG.getLoad(VT, dl: DL, Chain: LN0->getChain(), Ptr: LN0->getBasePtr(),
6986 PtrInfo: LN0->getPointerInfo(), Alignment: LN0->getAlign(),
6987 MMOFlags: LN0->getMemOperand()->getFlags());
6988
6989 // Make sure successors of the original load stay after it by updating them
6990 // to use the new Chain.
6991 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN0, 1), To: Load.getValue(R: 1));
6992 return DAG.getNode(Opcode: LoongArchISD::SITOF, DL: SDLoc(N), VT, Operand: Load);
6993 }
6994
6995 return SDValue();
6996}
6997
6998SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
6999 DAGCombinerInfo &DCI) const {
7000 SelectionDAG &DAG = DCI.DAG;
7001 switch (N->getOpcode()) {
7002 default:
7003 break;
7004 case ISD::AND:
7005 return performANDCombine(N, DAG, DCI, Subtarget);
7006 case ISD::OR:
7007 return performORCombine(N, DAG, DCI, Subtarget);
7008 case ISD::SETCC:
7009 return performSETCCCombine(N, DAG, DCI, Subtarget);
7010 case ISD::SRL:
7011 return performSRLCombine(N, DAG, DCI, Subtarget);
7012 case ISD::BITCAST:
7013 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7014 case ISD::SINT_TO_FP:
7015 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7016 case LoongArchISD::BITREV_W:
7017 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7018 case LoongArchISD::BR_CC:
7019 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7020 case LoongArchISD::SELECT_CC:
7021 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7022 case ISD::INTRINSIC_WO_CHAIN:
7023 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7024 case LoongArchISD::MOVGR2FR_W_LA64:
7025 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7026 case LoongArchISD::MOVFR2GR_S_LA64:
7027 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7028 case LoongArchISD::VMSKLTZ:
7029 case LoongArchISD::XVMSKLTZ:
7030 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7031 case LoongArchISD::SPLIT_PAIR_F64:
7032 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7033 case LoongArchISD::VANDN:
7034 return performVANDNCombine(N, DAG, DCI, Subtarget);
7035 }
7036 return SDValue();
7037}
7038
7039static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
7040 MachineBasicBlock *MBB) {
7041 if (!ZeroDivCheck)
7042 return MBB;
7043
7044 // Build instructions:
7045 // MBB:
7046 // div(or mod) $dst, $dividend, $divisor
7047 // bne $divisor, $zero, SinkMBB
7048 // BreakMBB:
7049 // break 7 // BRK_DIVZERO
7050 // SinkMBB:
7051 // fallthrough
7052 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7053 MachineFunction::iterator It = ++MBB->getIterator();
7054 MachineFunction *MF = MBB->getParent();
7055 auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
7056 auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
7057 MF->insert(MBBI: It, MBB: BreakMBB);
7058 MF->insert(MBBI: It, MBB: SinkMBB);
7059
7060 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7061 SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
7062 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
7063
7064 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7065 DebugLoc DL = MI.getDebugLoc();
7066 MachineOperand &Divisor = MI.getOperand(i: 2);
7067 Register DivisorReg = Divisor.getReg();
7068
7069 // MBB:
7070 BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE))
7071 .addReg(RegNo: DivisorReg, Flags: getKillRegState(B: Divisor.isKill()))
7072 .addReg(RegNo: LoongArch::R0)
7073 .addMBB(MBB: SinkMBB);
7074 MBB->addSuccessor(Succ: BreakMBB);
7075 MBB->addSuccessor(Succ: SinkMBB);
7076
7077 // BreakMBB:
7078 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7079 // definition of BRK_DIVZERO.
7080 BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/);
7081 BreakMBB->addSuccessor(Succ: SinkMBB);
7082
7083 // Clear Divisor's kill flag.
7084 Divisor.setIsKill(false);
7085
7086 return SinkMBB;
7087}
7088
7089static MachineBasicBlock *
7090emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
7091 const LoongArchSubtarget &Subtarget) {
7092 unsigned CondOpc;
7093 switch (MI.getOpcode()) {
7094 default:
7095 llvm_unreachable("Unexpected opcode");
7096 case LoongArch::PseudoVBZ:
7097 CondOpc = LoongArch::VSETEQZ_V;
7098 break;
7099 case LoongArch::PseudoVBZ_B:
7100 CondOpc = LoongArch::VSETANYEQZ_B;
7101 break;
7102 case LoongArch::PseudoVBZ_H:
7103 CondOpc = LoongArch::VSETANYEQZ_H;
7104 break;
7105 case LoongArch::PseudoVBZ_W:
7106 CondOpc = LoongArch::VSETANYEQZ_W;
7107 break;
7108 case LoongArch::PseudoVBZ_D:
7109 CondOpc = LoongArch::VSETANYEQZ_D;
7110 break;
7111 case LoongArch::PseudoVBNZ:
7112 CondOpc = LoongArch::VSETNEZ_V;
7113 break;
7114 case LoongArch::PseudoVBNZ_B:
7115 CondOpc = LoongArch::VSETALLNEZ_B;
7116 break;
7117 case LoongArch::PseudoVBNZ_H:
7118 CondOpc = LoongArch::VSETALLNEZ_H;
7119 break;
7120 case LoongArch::PseudoVBNZ_W:
7121 CondOpc = LoongArch::VSETALLNEZ_W;
7122 break;
7123 case LoongArch::PseudoVBNZ_D:
7124 CondOpc = LoongArch::VSETALLNEZ_D;
7125 break;
7126 case LoongArch::PseudoXVBZ:
7127 CondOpc = LoongArch::XVSETEQZ_V;
7128 break;
7129 case LoongArch::PseudoXVBZ_B:
7130 CondOpc = LoongArch::XVSETANYEQZ_B;
7131 break;
7132 case LoongArch::PseudoXVBZ_H:
7133 CondOpc = LoongArch::XVSETANYEQZ_H;
7134 break;
7135 case LoongArch::PseudoXVBZ_W:
7136 CondOpc = LoongArch::XVSETANYEQZ_W;
7137 break;
7138 case LoongArch::PseudoXVBZ_D:
7139 CondOpc = LoongArch::XVSETANYEQZ_D;
7140 break;
7141 case LoongArch::PseudoXVBNZ:
7142 CondOpc = LoongArch::XVSETNEZ_V;
7143 break;
7144 case LoongArch::PseudoXVBNZ_B:
7145 CondOpc = LoongArch::XVSETALLNEZ_B;
7146 break;
7147 case LoongArch::PseudoXVBNZ_H:
7148 CondOpc = LoongArch::XVSETALLNEZ_H;
7149 break;
7150 case LoongArch::PseudoXVBNZ_W:
7151 CondOpc = LoongArch::XVSETALLNEZ_W;
7152 break;
7153 case LoongArch::PseudoXVBNZ_D:
7154 CondOpc = LoongArch::XVSETALLNEZ_D;
7155 break;
7156 }
7157
7158 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7159 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7160 DebugLoc DL = MI.getDebugLoc();
7161 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7162 MachineFunction::iterator It = ++BB->getIterator();
7163
7164 MachineFunction *F = BB->getParent();
7165 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7166 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7167 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7168
7169 F->insert(MBBI: It, MBB: FalseBB);
7170 F->insert(MBBI: It, MBB: TrueBB);
7171 F->insert(MBBI: It, MBB: SinkBB);
7172
7173 // Transfer the remainder of MBB and its successor edges to Sink.
7174 SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
7175 SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
7176
7177 // Insert the real instruction to BB.
7178 Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
7179 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg());
7180
7181 // Insert branch.
7182 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
7183 BB->addSuccessor(Succ: FalseBB);
7184 BB->addSuccessor(Succ: TrueBB);
7185
7186 // FalseBB.
7187 Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7188 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
7189 .addReg(RegNo: LoongArch::R0)
7190 .addImm(Val: 0);
7191 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
7192 FalseBB->addSuccessor(Succ: SinkBB);
7193
7194 // TrueBB.
7195 Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7196 BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
7197 .addReg(RegNo: LoongArch::R0)
7198 .addImm(Val: 1);
7199 TrueBB->addSuccessor(Succ: SinkBB);
7200
7201 // SinkBB: merge the results.
7202 BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
7203 DestReg: MI.getOperand(i: 0).getReg())
7204 .addReg(RegNo: RD1)
7205 .addMBB(MBB: FalseBB)
7206 .addReg(RegNo: RD2)
7207 .addMBB(MBB: TrueBB);
7208
7209 // The pseudo instruction is gone now.
7210 MI.eraseFromParent();
7211 return SinkBB;
7212}
7213
7214static MachineBasicBlock *
7215emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
7216 const LoongArchSubtarget &Subtarget) {
7217 unsigned InsOp;
7218 unsigned BroadcastOp;
7219 unsigned HalfSize;
7220 switch (MI.getOpcode()) {
7221 default:
7222 llvm_unreachable("Unexpected opcode");
7223 case LoongArch::PseudoXVINSGR2VR_B:
7224 HalfSize = 16;
7225 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7226 InsOp = LoongArch::XVEXTRINS_B;
7227 break;
7228 case LoongArch::PseudoXVINSGR2VR_H:
7229 HalfSize = 8;
7230 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7231 InsOp = LoongArch::XVEXTRINS_H;
7232 break;
7233 }
7234 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7235 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7236 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7237 DebugLoc DL = MI.getDebugLoc();
7238 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7239 // XDst = vector_insert XSrc, Elt, Idx
7240 Register XDst = MI.getOperand(i: 0).getReg();
7241 Register XSrc = MI.getOperand(i: 1).getReg();
7242 Register Elt = MI.getOperand(i: 2).getReg();
7243 unsigned Idx = MI.getOperand(i: 3).getImm();
7244
7245 if (XSrc.isVirtual() && MRI.getVRegDef(Reg: XSrc)->isImplicitDef() &&
7246 Idx < HalfSize) {
7247 Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
7248 Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
7249
7250 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
7251 .addReg(RegNo: XSrc, Flags: {}, SubReg: LoongArch::sub_128);
7252 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7253 MCID: TII->get(Opcode: HalfSize == 8 ? LoongArch::VINSGR2VR_H
7254 : LoongArch::VINSGR2VR_B),
7255 DestReg: ScratchSubReg2)
7256 .addReg(RegNo: ScratchSubReg1)
7257 .addReg(RegNo: Elt)
7258 .addImm(Val: Idx);
7259
7260 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: XDst)
7261 .addImm(Val: 0)
7262 .addReg(RegNo: ScratchSubReg2)
7263 .addImm(Val: LoongArch::sub_128);
7264 } else {
7265 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
7266 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
7267
7268 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BroadcastOp), DestReg: ScratchReg1).addReg(RegNo: Elt);
7269
7270 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg2)
7271 .addReg(RegNo: ScratchReg1)
7272 .addReg(RegNo: XSrc)
7273 .addImm(Val: Idx >= HalfSize ? 48 : 18);
7274
7275 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: XDst)
7276 .addReg(RegNo: XSrc)
7277 .addReg(RegNo: ScratchReg2)
7278 .addImm(Val: (Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7279 }
7280
7281 MI.eraseFromParent();
7282 return BB;
7283}
7284
7285static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
7286 MachineBasicBlock *BB,
7287 const LoongArchSubtarget &Subtarget) {
7288 assert(Subtarget.hasExtLSX());
7289 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7290 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7291 DebugLoc DL = MI.getDebugLoc();
7292 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7293 Register Dst = MI.getOperand(i: 0).getReg();
7294 Register Src = MI.getOperand(i: 1).getReg();
7295 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
7296 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
7297 Register ScratchReg3 = MRI.createVirtualRegister(RegClass: RC);
7298
7299 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VLDI), DestReg: ScratchReg1).addImm(Val: 0);
7300 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7301 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7302 : LoongArch::VINSGR2VR_W),
7303 DestReg: ScratchReg2)
7304 .addReg(RegNo: ScratchReg1)
7305 .addReg(RegNo: Src)
7306 .addImm(Val: 0);
7307 BuildMI(
7308 BB&: *BB, I&: MI, MIMD: DL,
7309 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7310 DestReg: ScratchReg3)
7311 .addReg(RegNo: ScratchReg2);
7312 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7313 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7314 : LoongArch::VPICKVE2GR_W),
7315 DestReg: Dst)
7316 .addReg(RegNo: ScratchReg3)
7317 .addImm(Val: 0);
7318
7319 MI.eraseFromParent();
7320 return BB;
7321}
7322
7323static MachineBasicBlock *
7324emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
7325 const LoongArchSubtarget &Subtarget) {
7326 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7327 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7328 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7329 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7330 Register Dst = MI.getOperand(i: 0).getReg();
7331 Register Src = MI.getOperand(i: 1).getReg();
7332 DebugLoc DL = MI.getDebugLoc();
7333 unsigned EleBits = 8;
7334 unsigned NotOpc = 0;
7335 unsigned MskOpc;
7336
7337 switch (MI.getOpcode()) {
7338 default:
7339 llvm_unreachable("Unexpected opcode");
7340 case LoongArch::PseudoVMSKLTZ_B:
7341 MskOpc = LoongArch::VMSKLTZ_B;
7342 break;
7343 case LoongArch::PseudoVMSKLTZ_H:
7344 MskOpc = LoongArch::VMSKLTZ_H;
7345 EleBits = 16;
7346 break;
7347 case LoongArch::PseudoVMSKLTZ_W:
7348 MskOpc = LoongArch::VMSKLTZ_W;
7349 EleBits = 32;
7350 break;
7351 case LoongArch::PseudoVMSKLTZ_D:
7352 MskOpc = LoongArch::VMSKLTZ_D;
7353 EleBits = 64;
7354 break;
7355 case LoongArch::PseudoVMSKGEZ_B:
7356 MskOpc = LoongArch::VMSKGEZ_B;
7357 break;
7358 case LoongArch::PseudoVMSKEQZ_B:
7359 MskOpc = LoongArch::VMSKNZ_B;
7360 NotOpc = LoongArch::VNOR_V;
7361 break;
7362 case LoongArch::PseudoVMSKNEZ_B:
7363 MskOpc = LoongArch::VMSKNZ_B;
7364 break;
7365 case LoongArch::PseudoXVMSKLTZ_B:
7366 MskOpc = LoongArch::XVMSKLTZ_B;
7367 RC = &LoongArch::LASX256RegClass;
7368 break;
7369 case LoongArch::PseudoXVMSKLTZ_H:
7370 MskOpc = LoongArch::XVMSKLTZ_H;
7371 RC = &LoongArch::LASX256RegClass;
7372 EleBits = 16;
7373 break;
7374 case LoongArch::PseudoXVMSKLTZ_W:
7375 MskOpc = LoongArch::XVMSKLTZ_W;
7376 RC = &LoongArch::LASX256RegClass;
7377 EleBits = 32;
7378 break;
7379 case LoongArch::PseudoXVMSKLTZ_D:
7380 MskOpc = LoongArch::XVMSKLTZ_D;
7381 RC = &LoongArch::LASX256RegClass;
7382 EleBits = 64;
7383 break;
7384 case LoongArch::PseudoXVMSKGEZ_B:
7385 MskOpc = LoongArch::XVMSKGEZ_B;
7386 RC = &LoongArch::LASX256RegClass;
7387 break;
7388 case LoongArch::PseudoXVMSKEQZ_B:
7389 MskOpc = LoongArch::XVMSKNZ_B;
7390 NotOpc = LoongArch::XVNOR_V;
7391 RC = &LoongArch::LASX256RegClass;
7392 break;
7393 case LoongArch::PseudoXVMSKNEZ_B:
7394 MskOpc = LoongArch::XVMSKNZ_B;
7395 RC = &LoongArch::LASX256RegClass;
7396 break;
7397 }
7398
7399 Register Msk = MRI.createVirtualRegister(RegClass: RC);
7400 if (NotOpc) {
7401 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
7402 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src);
7403 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk)
7404 .addReg(RegNo: Tmp, Flags: RegState::Kill)
7405 .addReg(RegNo: Tmp, Flags: RegState::Kill);
7406 } else {
7407 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src);
7408 }
7409
7410 if (TRI->getRegSizeInBits(RC: *RC) > 128) {
7411 Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7412 Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7413 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo)
7414 .addReg(RegNo: Msk)
7415 .addImm(Val: 0);
7416 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi)
7417 .addReg(RegNo: Msk, Flags: RegState::Kill)
7418 .addImm(Val: 4);
7419 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7420 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7421 : LoongArch::BSTRINS_W),
7422 DestReg: Dst)
7423 .addReg(RegNo: Lo, Flags: RegState::Kill)
7424 .addReg(RegNo: Hi, Flags: RegState::Kill)
7425 .addImm(Val: 256 / EleBits - 1)
7426 .addImm(Val: 128 / EleBits);
7427 } else {
7428 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst)
7429 .addReg(RegNo: Msk, Flags: RegState::Kill)
7430 .addImm(Val: 0);
7431 }
7432
7433 MI.eraseFromParent();
7434 return BB;
7435}
7436
7437static MachineBasicBlock *
7438emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
7439 const LoongArchSubtarget &Subtarget) {
7440 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7441 "Unexpected instruction");
7442
7443 MachineFunction &MF = *BB->getParent();
7444 DebugLoc DL = MI.getDebugLoc();
7445 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7446 Register LoReg = MI.getOperand(i: 0).getReg();
7447 Register HiReg = MI.getOperand(i: 1).getReg();
7448 Register SrcReg = MI.getOperand(i: 2).getReg();
7449
7450 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg);
7451 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg)
7452 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
7453 MI.eraseFromParent(); // The pseudo instruction is gone now.
7454 return BB;
7455}
7456
7457static MachineBasicBlock *
7458emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
7459 const LoongArchSubtarget &Subtarget) {
7460 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7461 "Unexpected instruction");
7462
7463 MachineFunction &MF = *BB->getParent();
7464 DebugLoc DL = MI.getDebugLoc();
7465 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7466 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7467 Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass);
7468 Register DstReg = MI.getOperand(i: 0).getReg();
7469 Register LoReg = MI.getOperand(i: 1).getReg();
7470 Register HiReg = MI.getOperand(i: 2).getReg();
7471
7472 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg)
7473 .addReg(RegNo: LoReg, Flags: getKillRegState(B: MI.getOperand(i: 1).isKill()));
7474 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg)
7475 .addReg(RegNo: TmpReg, Flags: RegState::Kill)
7476 .addReg(RegNo: HiReg, Flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
7477 MI.eraseFromParent(); // The pseudo instruction is gone now.
7478 return BB;
7479}
7480
7481static bool isSelectPseudo(MachineInstr &MI) {
7482 switch (MI.getOpcode()) {
7483 default:
7484 return false;
7485 case LoongArch::Select_GPR_Using_CC_GPR:
7486 return true;
7487 }
7488}
7489
7490static MachineBasicBlock *
7491emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
7492 const LoongArchSubtarget &Subtarget) {
7493 // To "insert" Select_* instructions, we actually have to insert the triangle
7494 // control-flow pattern. The incoming instructions know the destination vreg
7495 // to set, the condition code register to branch on, the true/false values to
7496 // select between, and the condcode to use to select the appropriate branch.
7497 //
7498 // We produce the following control flow:
7499 // HeadMBB
7500 // | \
7501 // | IfFalseMBB
7502 // | /
7503 // TailMBB
7504 //
7505 // When we find a sequence of selects we attempt to optimize their emission
7506 // by sharing the control flow. Currently we only handle cases where we have
7507 // multiple selects with the exact same condition (same LHS, RHS and CC).
7508 // The selects may be interleaved with other instructions if the other
7509 // instructions meet some requirements we deem safe:
7510 // - They are not pseudo instructions.
7511 // - They are debug instructions. Otherwise,
7512 // - They do not have side-effects, do not access memory and their inputs do
7513 // not depend on the results of the select pseudo-instructions.
7514 // The TrueV/FalseV operands of the selects cannot depend on the result of
7515 // previous selects in the sequence.
7516 // These conditions could be further relaxed. See the X86 target for a
7517 // related approach and more information.
7518
7519 Register LHS = MI.getOperand(i: 1).getReg();
7520 Register RHS;
7521 if (MI.getOperand(i: 2).isReg())
7522 RHS = MI.getOperand(i: 2).getReg();
7523 auto CC = static_cast<unsigned>(MI.getOperand(i: 3).getImm());
7524
7525 SmallVector<MachineInstr *, 4> SelectDebugValues;
7526 SmallSet<Register, 4> SelectDests;
7527 SelectDests.insert(V: MI.getOperand(i: 0).getReg());
7528
7529 MachineInstr *LastSelectPseudo = &MI;
7530 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7531 SequenceMBBI != E; ++SequenceMBBI) {
7532 if (SequenceMBBI->isDebugInstr())
7533 continue;
7534 if (isSelectPseudo(MI&: *SequenceMBBI)) {
7535 if (SequenceMBBI->getOperand(i: 1).getReg() != LHS ||
7536 !SequenceMBBI->getOperand(i: 2).isReg() ||
7537 SequenceMBBI->getOperand(i: 2).getReg() != RHS ||
7538 SequenceMBBI->getOperand(i: 3).getImm() != CC ||
7539 SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) ||
7540 SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg()))
7541 break;
7542 LastSelectPseudo = &*SequenceMBBI;
7543 SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues);
7544 SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg());
7545 continue;
7546 }
7547 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7548 SequenceMBBI->mayLoadOrStore() ||
7549 SequenceMBBI->usesCustomInsertionHook())
7550 break;
7551 if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) {
7552 return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
7553 }))
7554 break;
7555 }
7556
7557 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7558 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7559 DebugLoc DL = MI.getDebugLoc();
7560 MachineFunction::iterator I = ++BB->getIterator();
7561
7562 MachineBasicBlock *HeadMBB = BB;
7563 MachineFunction *F = BB->getParent();
7564 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7565 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7566
7567 F->insert(MBBI: I, MBB: IfFalseMBB);
7568 F->insert(MBBI: I, MBB: TailMBB);
7569
7570 // Set the call frame size on entry to the new basic blocks.
7571 unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
7572 IfFalseMBB->setCallFrameSize(CallFrameSize);
7573 TailMBB->setCallFrameSize(CallFrameSize);
7574
7575 // Transfer debug instructions associated with the selects to TailMBB.
7576 for (MachineInstr *DebugInstr : SelectDebugValues) {
7577 TailMBB->push_back(MI: DebugInstr->removeFromParent());
7578 }
7579
7580 // Move all instructions after the sequence to TailMBB.
7581 TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
7582 From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
7583 // Update machine-CFG edges by transferring all successors of the current
7584 // block to the new block which will contain the Phi nodes for the selects.
7585 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
7586 // Set the successors for HeadMBB.
7587 HeadMBB->addSuccessor(Succ: IfFalseMBB);
7588 HeadMBB->addSuccessor(Succ: TailMBB);
7589
7590 // Insert appropriate branch.
7591 if (MI.getOperand(i: 2).isImm())
7592 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC))
7593 .addReg(RegNo: LHS)
7594 .addImm(Val: MI.getOperand(i: 2).getImm())
7595 .addMBB(MBB: TailMBB);
7596 else
7597 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB);
7598
7599 // IfFalseMBB just falls through to TailMBB.
7600 IfFalseMBB->addSuccessor(Succ: TailMBB);
7601
7602 // Create PHIs for all of the select pseudo-instructions.
7603 auto SelectMBBI = MI.getIterator();
7604 auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
7605 auto InsertionPoint = TailMBB->begin();
7606 while (SelectMBBI != SelectEnd) {
7607 auto Next = std::next(x: SelectMBBI);
7608 if (isSelectPseudo(MI&: *SelectMBBI)) {
7609 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7610 BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI->getDebugLoc(),
7611 MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI->getOperand(i: 0).getReg())
7612 .addReg(RegNo: SelectMBBI->getOperand(i: 4).getReg())
7613 .addMBB(MBB: HeadMBB)
7614 .addReg(RegNo: SelectMBBI->getOperand(i: 5).getReg())
7615 .addMBB(MBB: IfFalseMBB);
7616 SelectMBBI->eraseFromParent();
7617 }
7618 SelectMBBI = Next;
7619 }
7620
7621 F->getProperties().resetNoPHIs();
7622 return TailMBB;
7623}
7624
7625MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7626 MachineInstr &MI, MachineBasicBlock *BB) const {
7627 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7628 DebugLoc DL = MI.getDebugLoc();
7629
7630 switch (MI.getOpcode()) {
7631 default:
7632 llvm_unreachable("Unexpected instr type to insert");
7633 case LoongArch::DIV_W:
7634 case LoongArch::DIV_WU:
7635 case LoongArch::MOD_W:
7636 case LoongArch::MOD_WU:
7637 case LoongArch::DIV_D:
7638 case LoongArch::DIV_DU:
7639 case LoongArch::MOD_D:
7640 case LoongArch::MOD_DU:
7641 return insertDivByZeroTrap(MI, MBB: BB);
7642 break;
7643 case LoongArch::WRFCSR: {
7644 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
7645 DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm())
7646 .addReg(RegNo: MI.getOperand(i: 1).getReg());
7647 MI.eraseFromParent();
7648 return BB;
7649 }
7650 case LoongArch::RDFCSR: {
7651 MachineInstr *ReadFCSR =
7652 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
7653 DestReg: MI.getOperand(i: 0).getReg())
7654 .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm());
7655 ReadFCSR->getOperand(i: 1).setIsUndef();
7656 MI.eraseFromParent();
7657 return BB;
7658 }
7659 case LoongArch::Select_GPR_Using_CC_GPR:
7660 return emitSelectPseudo(MI, BB, Subtarget);
7661 case LoongArch::BuildPairF64Pseudo:
7662 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7663 case LoongArch::SplitPairF64Pseudo:
7664 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7665 case LoongArch::PseudoVBZ:
7666 case LoongArch::PseudoVBZ_B:
7667 case LoongArch::PseudoVBZ_H:
7668 case LoongArch::PseudoVBZ_W:
7669 case LoongArch::PseudoVBZ_D:
7670 case LoongArch::PseudoVBNZ:
7671 case LoongArch::PseudoVBNZ_B:
7672 case LoongArch::PseudoVBNZ_H:
7673 case LoongArch::PseudoVBNZ_W:
7674 case LoongArch::PseudoVBNZ_D:
7675 case LoongArch::PseudoXVBZ:
7676 case LoongArch::PseudoXVBZ_B:
7677 case LoongArch::PseudoXVBZ_H:
7678 case LoongArch::PseudoXVBZ_W:
7679 case LoongArch::PseudoXVBZ_D:
7680 case LoongArch::PseudoXVBNZ:
7681 case LoongArch::PseudoXVBNZ_B:
7682 case LoongArch::PseudoXVBNZ_H:
7683 case LoongArch::PseudoXVBNZ_W:
7684 case LoongArch::PseudoXVBNZ_D:
7685 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7686 case LoongArch::PseudoXVINSGR2VR_B:
7687 case LoongArch::PseudoXVINSGR2VR_H:
7688 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7689 case LoongArch::PseudoCTPOP:
7690 return emitPseudoCTPOP(MI, BB, Subtarget);
7691 case LoongArch::PseudoVMSKLTZ_B:
7692 case LoongArch::PseudoVMSKLTZ_H:
7693 case LoongArch::PseudoVMSKLTZ_W:
7694 case LoongArch::PseudoVMSKLTZ_D:
7695 case LoongArch::PseudoVMSKGEZ_B:
7696 case LoongArch::PseudoVMSKEQZ_B:
7697 case LoongArch::PseudoVMSKNEZ_B:
7698 case LoongArch::PseudoXVMSKLTZ_B:
7699 case LoongArch::PseudoXVMSKLTZ_H:
7700 case LoongArch::PseudoXVMSKLTZ_W:
7701 case LoongArch::PseudoXVMSKLTZ_D:
7702 case LoongArch::PseudoXVMSKGEZ_B:
7703 case LoongArch::PseudoXVMSKEQZ_B:
7704 case LoongArch::PseudoXVMSKNEZ_B:
7705 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7706 case TargetOpcode::STATEPOINT:
7707 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7708 // while bl call instruction (where statepoint will be lowered at the
7709 // end) has implicit def. This def is early-clobber as it will be set at
7710 // the moment of the call and earlier than any use is read.
7711 // Add this implicit dead def here as a workaround.
7712 MI.addOperand(MF&: *MI.getMF(),
7713 Op: MachineOperand::CreateReg(
7714 Reg: LoongArch::R1, /*isDef*/ true,
7715 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7716 /*isUndef*/ false, /*isEarlyClobber*/ true));
7717 if (!Subtarget.is64Bit())
7718 report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets");
7719 return emitPatchPoint(MI, MBB: BB);
7720 }
7721}
7722
7723bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
7724 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7725 unsigned *Fast) const {
7726 if (!Subtarget.hasUAL())
7727 return false;
7728
7729 // TODO: set reasonable speed number.
7730 if (Fast)
7731 *Fast = 1;
7732 return true;
7733}
7734
7735//===----------------------------------------------------------------------===//
7736// Calling Convention Implementation
7737//===----------------------------------------------------------------------===//
7738
7739// Eight general-purpose registers a0-a7 used for passing integer arguments,
7740// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7741// fixed-point arguments, and floating-point arguments when no FPR is available
7742// or with soft float ABI.
7743const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7744 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7745 LoongArch::R10, LoongArch::R11};
7746
7747// PreserveNone calling convention:
7748// Arguments may be passed in any general-purpose registers except:
7749// - R1 : return address register
7750// - R22 : frame pointer
7751// - R31 : base pointer
7752//
7753// All general-purpose registers are treated as caller-saved,
7754// except R1 (RA) and R22 (FP).
7755//
7756// Non-volatile registers are allocated first so that a function
7757// can call normal functions without having to spill and reload
7758// argument registers.
7759const MCPhysReg PreserveNoneArgGPRs[] = {
7760 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7761 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7762 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7763 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7764 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7765 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7766 LoongArch::R20};
7767
7768// Eight floating-point registers fa0-fa7 used for passing floating-point
7769// arguments, and fa0-fa1 are also used to return values.
7770const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7771 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7772 LoongArch::F6, LoongArch::F7};
7773// FPR32 and FPR64 alias each other.
7774const MCPhysReg ArgFPR64s[] = {
7775 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7776 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7777
7778const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7779 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7780 LoongArch::VR6, LoongArch::VR7};
7781
7782const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7783 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7784 LoongArch::XR6, LoongArch::XR7};
7785
7786static Register allocateArgGPR(CCState &State) {
7787 switch (State.getCallingConv()) {
7788 case CallingConv::PreserveNone:
7789 if (!State.isVarArg())
7790 return State.AllocateReg(Regs: PreserveNoneArgGPRs);
7791 [[fallthrough]];
7792 default:
7793 return State.AllocateReg(Regs: ArgGPRs);
7794 }
7795}
7796
7797// Pass a 2*GRLen argument that has been split into two GRLen values through
7798// registers or the stack as necessary.
7799static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7800 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7801 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7802 ISD::ArgFlagsTy ArgFlags2) {
7803 unsigned GRLenInBytes = GRLen / 8;
7804 if (Register Reg = allocateArgGPR(State)) {
7805 // At least one half can be passed via register.
7806 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg,
7807 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
7808 } else {
7809 // Both halves must be passed on the stack, with proper alignment.
7810 Align StackAlign =
7811 std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
7812 State.addLoc(
7813 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
7814 Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
7815 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
7816 State.addLoc(V: CCValAssign::getMem(
7817 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
7818 LocVT: LocVT2, HTP: CCValAssign::Full));
7819 return false;
7820 }
7821 if (Register Reg = allocateArgGPR(State)) {
7822 // The second half can also be passed via register.
7823 State.addLoc(
7824 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
7825 } else {
7826 // The second half is passed via the stack, without additional alignment.
7827 State.addLoc(V: CCValAssign::getMem(
7828 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
7829 LocVT: LocVT2, HTP: CCValAssign::Full));
7830 }
7831 return false;
7832}
7833
7834// Implements the LoongArch calling convention. Returns true upon failure.
7835static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
7836 unsigned ValNo, MVT ValVT,
7837 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7838 CCState &State, bool IsRet, Type *OrigTy) {
7839 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7840 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7841 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7842 MVT LocVT = ValVT;
7843
7844 // Any return value split into more than two values can't be returned
7845 // directly.
7846 if (IsRet && ValNo > 1)
7847 return true;
7848
7849 // If passing a variadic argument, or if no FPR is available.
7850 bool UseGPRForFloat = true;
7851
7852 switch (ABI) {
7853 default:
7854 llvm_unreachable("Unexpected ABI");
7855 break;
7856 case LoongArchABI::ABI_ILP32F:
7857 case LoongArchABI::ABI_LP64F:
7858 case LoongArchABI::ABI_ILP32D:
7859 case LoongArchABI::ABI_LP64D:
7860 UseGPRForFloat = ArgFlags.isVarArg();
7861 break;
7862 case LoongArchABI::ABI_ILP32S:
7863 case LoongArchABI::ABI_LP64S:
7864 break;
7865 }
7866
7867 // If this is a variadic argument, the LoongArch calling convention requires
7868 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7869 // byte alignment. An aligned register should be used regardless of whether
7870 // the original argument was split during legalisation or not. The argument
7871 // will not be passed by registers if the original type is larger than
7872 // 2*GRLen, so the register alignment rule does not apply.
7873 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7874 if (ArgFlags.isVarArg() &&
7875 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7876 DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
7877 unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
7878 // Skip 'odd' register if necessary.
7879 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7880 State.AllocateReg(Regs: ArgGPRs);
7881 }
7882
7883 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7884 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7885 State.getPendingArgFlags();
7886
7887 assert(PendingLocs.size() == PendingArgFlags.size() &&
7888 "PendingLocs and PendingArgFlags out of sync");
7889
7890 // FPR32 and FPR64 alias each other.
7891 if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
7892 UseGPRForFloat = true;
7893
7894 if (UseGPRForFloat && ValVT == MVT::f32) {
7895 LocVT = GRLenVT;
7896 LocInfo = CCValAssign::BCvt;
7897 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7898 LocVT = MVT::i64;
7899 LocInfo = CCValAssign::BCvt;
7900 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7901 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7902 // registers are exhausted.
7903 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7904 // Depending on available argument GPRS, f64 may be passed in a pair of
7905 // GPRs, split between a GPR and the stack, or passed completely on the
7906 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7907 // cases.
7908 MCRegister Reg = allocateArgGPR(State);
7909 if (!Reg) {
7910 int64_t StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8));
7911 State.addLoc(
7912 V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
7913 return false;
7914 }
7915 LocVT = MVT::i32;
7916 State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
7917 MCRegister HiReg = allocateArgGPR(State);
7918 if (HiReg) {
7919 State.addLoc(
7920 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo));
7921 } else {
7922 int64_t StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4));
7923 State.addLoc(
7924 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
7925 }
7926 return false;
7927 }
7928
7929 // Split arguments might be passed indirectly, so keep track of the pending
7930 // values.
7931 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7932 LocVT = GRLenVT;
7933 LocInfo = CCValAssign::Indirect;
7934 PendingLocs.push_back(
7935 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
7936 PendingArgFlags.push_back(Elt: ArgFlags);
7937 if (!ArgFlags.isSplitEnd()) {
7938 return false;
7939 }
7940 }
7941
7942 // If the split argument only had two elements, it should be passed directly
7943 // in registers or on the stack.
7944 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7945 PendingLocs.size() <= 2) {
7946 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7947 // Apply the normal calling convention rules to the first half of the
7948 // split argument.
7949 CCValAssign VA = PendingLocs[0];
7950 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7951 PendingLocs.clear();
7952 PendingArgFlags.clear();
7953 return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
7954 ArgFlags2: ArgFlags);
7955 }
7956
7957 // Allocate to a register if possible, or else a stack slot.
7958 Register Reg;
7959 unsigned StoreSizeBytes = GRLen / 8;
7960 Align StackAlign = Align(GRLen / 8);
7961
7962 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7963 Reg = State.AllocateReg(Regs: ArgFPR32s);
7964 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7965 Reg = State.AllocateReg(Regs: ArgFPR64s);
7966 } else if (ValVT.is128BitVector()) {
7967 Reg = State.AllocateReg(Regs: ArgVRs);
7968 UseGPRForFloat = false;
7969 StoreSizeBytes = 16;
7970 StackAlign = Align(16);
7971 } else if (ValVT.is256BitVector()) {
7972 Reg = State.AllocateReg(Regs: ArgXRs);
7973 UseGPRForFloat = false;
7974 StoreSizeBytes = 32;
7975 StackAlign = Align(32);
7976 } else {
7977 Reg = allocateArgGPR(State);
7978 }
7979
7980 unsigned StackOffset =
7981 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
7982
7983 // If we reach this point and PendingLocs is non-empty, we must be at the
7984 // end of a split argument that must be passed indirectly.
7985 if (!PendingLocs.empty()) {
7986 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7987 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7988 for (auto &It : PendingLocs) {
7989 if (Reg)
7990 It.convertToReg(Reg);
7991 else
7992 It.convertToMem(Offset: StackOffset);
7993 State.addLoc(V: It);
7994 }
7995 PendingLocs.clear();
7996 PendingArgFlags.clear();
7997 return false;
7998 }
7999 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8000 "Expected an GRLenVT at this stage");
8001
8002 if (Reg) {
8003 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8004 return false;
8005 }
8006
8007 // When a floating-point value is passed on the stack, no bit-cast is needed.
8008 if (ValVT.isFloatingPoint()) {
8009 LocVT = ValVT;
8010 LocInfo = CCValAssign::Full;
8011 }
8012
8013 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8014 return false;
8015}
8016
8017void LoongArchTargetLowering::analyzeInputArgs(
8018 MachineFunction &MF, CCState &CCInfo,
8019 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8020 LoongArchCCAssignFn Fn) const {
8021 FunctionType *FType = MF.getFunction().getFunctionType();
8022 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8023 MVT ArgVT = Ins[i].VT;
8024 Type *ArgTy = nullptr;
8025 if (IsRet)
8026 ArgTy = FType->getReturnType();
8027 else if (Ins[i].isOrigArg())
8028 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
8029 LoongArchABI::ABI ABI =
8030 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8031 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8032 CCInfo, IsRet, ArgTy)) {
8033 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8034 << '\n');
8035 llvm_unreachable("");
8036 }
8037 }
8038}
8039
8040void LoongArchTargetLowering::analyzeOutputArgs(
8041 MachineFunction &MF, CCState &CCInfo,
8042 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8043 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8044 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8045 MVT ArgVT = Outs[i].VT;
8046 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8047 LoongArchABI::ABI ABI =
8048 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8049 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8050 CCInfo, IsRet, OrigTy)) {
8051 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8052 << "\n");
8053 llvm_unreachable("");
8054 }
8055 }
8056}
8057
8058// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8059// values.
8060static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
8061 const CCValAssign &VA, const SDLoc &DL) {
8062 switch (VA.getLocInfo()) {
8063 default:
8064 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8065 case CCValAssign::Full:
8066 case CCValAssign::Indirect:
8067 break;
8068 case CCValAssign::BCvt:
8069 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8070 Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
8071 else
8072 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
8073 break;
8074 }
8075 return Val;
8076}
8077
8078static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
8079 const CCValAssign &VA, const SDLoc &DL,
8080 const ISD::InputArg &In,
8081 const LoongArchTargetLowering &TLI) {
8082 MachineFunction &MF = DAG.getMachineFunction();
8083 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8084 EVT LocVT = VA.getLocVT();
8085 SDValue Val;
8086 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
8087 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
8088 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
8089 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
8090
8091 // If input is sign extended from 32 bits, note it for the OptW pass.
8092 if (In.isOrigArg()) {
8093 Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
8094 if (OrigArg->getType()->isIntegerTy()) {
8095 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8096 // An input zero extended from i31 can also be considered sign extended.
8097 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8098 (BitWidth < 32 && In.Flags.isZExt())) {
8099 LoongArchMachineFunctionInfo *LAFI =
8100 MF.getInfo<LoongArchMachineFunctionInfo>();
8101 LAFI->addSExt32Register(Reg: VReg);
8102 }
8103 }
8104 }
8105
8106 return convertLocVTToValVT(DAG, Val, VA, DL);
8107}
8108
8109// The caller is responsible for loading the full value if the argument is
8110// passed with CCValAssign::Indirect.
8111static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
8112 const CCValAssign &VA, const SDLoc &DL) {
8113 MachineFunction &MF = DAG.getMachineFunction();
8114 MachineFrameInfo &MFI = MF.getFrameInfo();
8115 EVT ValVT = VA.getValVT();
8116 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
8117 /*IsImmutable=*/true);
8118 SDValue FIN = DAG.getFrameIndex(
8119 FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)));
8120
8121 ISD::LoadExtType ExtType;
8122 switch (VA.getLocInfo()) {
8123 default:
8124 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8125 case CCValAssign::Full:
8126 case CCValAssign::Indirect:
8127 case CCValAssign::BCvt:
8128 ExtType = ISD::NON_EXTLOAD;
8129 break;
8130 }
8131 return DAG.getExtLoad(
8132 ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
8133 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
8134}
8135
8136static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
8137 const CCValAssign &VA,
8138 const CCValAssign &HiVA,
8139 const SDLoc &DL) {
8140 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8141 "Unexpected VA");
8142 MachineFunction &MF = DAG.getMachineFunction();
8143 MachineFrameInfo &MFI = MF.getFrameInfo();
8144 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8145
8146 assert(VA.isRegLoc() && "Expected register VA assignment");
8147
8148 Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8149 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
8150 SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
8151 SDValue Hi;
8152 if (HiVA.isMemLoc()) {
8153 // Second half of f64 is passed on the stack.
8154 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(),
8155 /*IsImmutable=*/true);
8156 SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
8157 Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
8158 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
8159 } else {
8160 // Second half of f64 is passed in another GPR.
8161 Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8162 RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
8163 Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
8164 }
8165 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
8166}
8167
8168static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
8169 const CCValAssign &VA, const SDLoc &DL) {
8170 EVT LocVT = VA.getLocVT();
8171
8172 switch (VA.getLocInfo()) {
8173 default:
8174 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8175 case CCValAssign::Full:
8176 break;
8177 case CCValAssign::BCvt:
8178 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8179 Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
8180 else
8181 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
8182 break;
8183 }
8184 return Val;
8185}
8186
8187static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8188 CCValAssign::LocInfo LocInfo,
8189 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8190 CCState &State) {
8191 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8192 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8193 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8194 static const MCPhysReg GPRList[] = {
8195 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8196 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8197 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8198 if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) {
8199 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8200 return false;
8201 }
8202 }
8203
8204 if (LocVT == MVT::f32) {
8205 // Pass in STG registers: F1, F2, F3, F4
8206 // fs0,fs1,fs2,fs3
8207 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8208 LoongArch::F26, LoongArch::F27};
8209 if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) {
8210 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8211 return false;
8212 }
8213 }
8214
8215 if (LocVT == MVT::f64) {
8216 // Pass in STG registers: D1, D2, D3, D4
8217 // fs4,fs5,fs6,fs7
8218 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8219 LoongArch::F30_64, LoongArch::F31_64};
8220 if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) {
8221 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8222 return false;
8223 }
8224 }
8225
8226 report_fatal_error(reason: "No registers left in GHC calling convention");
8227 return true;
8228}
8229
8230// Transform physical registers into virtual registers.
8231SDValue LoongArchTargetLowering::LowerFormalArguments(
8232 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8233 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8234 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8235
8236 MachineFunction &MF = DAG.getMachineFunction();
8237 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8238
8239 switch (CallConv) {
8240 default:
8241 llvm_unreachable("Unsupported calling convention");
8242 case CallingConv::C:
8243 case CallingConv::Fast:
8244 case CallingConv::PreserveNone:
8245 case CallingConv::PreserveMost:
8246 break;
8247 case CallingConv::GHC:
8248 if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) ||
8249 !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
8250 report_fatal_error(
8251 reason: "GHC calling convention requires the F and D extensions");
8252 }
8253
8254 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8255 MVT GRLenVT = Subtarget.getGRLenVT();
8256 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8257 // Used with varargs to acumulate store chains.
8258 std::vector<SDValue> OutChains;
8259
8260 // Assign locations to all of the incoming arguments.
8261 SmallVector<CCValAssign> ArgLocs;
8262 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8263
8264 if (CallConv == CallingConv::GHC)
8265 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
8266 else
8267 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch);
8268
8269 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8270 CCValAssign &VA = ArgLocs[i];
8271 SDValue ArgValue;
8272 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8273 // case.
8274 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8275 assert(VA.needsCustom());
8276 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL);
8277 } else if (VA.isRegLoc())
8278 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this);
8279 else
8280 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8281 if (VA.getLocInfo() == CCValAssign::Indirect) {
8282 // If the original argument was split and passed by reference, we need to
8283 // load all parts of it here (using the same address).
8284 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
8285 PtrInfo: MachinePointerInfo()));
8286 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8287 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8288 assert(ArgPartOffset == 0);
8289 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8290 CCValAssign &PartVA = ArgLocs[i + 1];
8291 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8292 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
8293 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
8294 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
8295 PtrInfo: MachinePointerInfo()));
8296 ++i;
8297 ++InsIdx;
8298 }
8299 continue;
8300 }
8301 InVals.push_back(Elt: ArgValue);
8302 if (Ins[InsIdx].Flags.isByVal())
8303 LoongArchFI->addIncomingByValArgs(Val: ArgValue);
8304 }
8305
8306 if (IsVarArg) {
8307 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
8308 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
8309 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8310 MachineFrameInfo &MFI = MF.getFrameInfo();
8311 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8312
8313 // Offset of the first variable argument from stack pointer, and size of
8314 // the vararg save area. For now, the varargs save area is either zero or
8315 // large enough to hold a0-a7.
8316 int VaArgOffset, VarArgsSaveSize;
8317
8318 // If all registers are allocated, then all varargs must be passed on the
8319 // stack and we don't need to save any argregs.
8320 if (ArgRegs.size() == Idx) {
8321 VaArgOffset = CCInfo.getStackSize();
8322 VarArgsSaveSize = 0;
8323 } else {
8324 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8325 VaArgOffset = -VarArgsSaveSize;
8326 }
8327
8328 // Record the frame index of the first variable argument
8329 // which is a value necessary to VASTART.
8330 int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
8331 LoongArchFI->setVarArgsFrameIndex(FI);
8332
8333 // If saving an odd number of registers then create an extra stack slot to
8334 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8335 // offsets to even-numbered registered remain 2*GRLen-aligned.
8336 if (Idx % 2) {
8337 MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
8338 IsImmutable: true);
8339 VarArgsSaveSize += GRLenInBytes;
8340 }
8341
8342 // Copy the integer registers that may have been used for passing varargs
8343 // to the vararg save area.
8344 for (unsigned I = Idx; I < ArgRegs.size();
8345 ++I, VaArgOffset += GRLenInBytes) {
8346 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
8347 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
8348 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
8349 FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
8350 SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
8351 SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
8352 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
8353 cast<StoreSDNode>(Val: Store.getNode())
8354 ->getMemOperand()
8355 ->setValue((Value *)nullptr);
8356 OutChains.push_back(x: Store);
8357 }
8358 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8359 }
8360
8361 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8362
8363 // All stores are grouped in one node to allow the matching between
8364 // the size of Ins and InVals. This only happens for vararg functions.
8365 if (!OutChains.empty()) {
8366 OutChains.push_back(x: Chain);
8367 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
8368 }
8369
8370 return Chain;
8371}
8372
8373bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
8374 return CI->isTailCall();
8375}
8376
8377// Check if the return value is used as only a return value, as otherwise
8378// we can't perform a tail-call.
8379bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
8380 SDValue &Chain) const {
8381 if (N->getNumValues() != 1)
8382 return false;
8383 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
8384 return false;
8385
8386 SDNode *Copy = *N->user_begin();
8387 if (Copy->getOpcode() != ISD::CopyToReg)
8388 return false;
8389
8390 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8391 // isn't safe to perform a tail call.
8392 if (Copy->getGluedNode())
8393 return false;
8394
8395 // The copy must be used by a LoongArchISD::RET, and nothing else.
8396 bool HasRet = false;
8397 for (SDNode *Node : Copy->users()) {
8398 if (Node->getOpcode() != LoongArchISD::RET)
8399 return false;
8400 HasRet = true;
8401 }
8402
8403 if (!HasRet)
8404 return false;
8405
8406 Chain = Copy->getOperand(Num: 0);
8407 return true;
8408}
8409
8410// Check whether the call is eligible for tail call optimization.
8411bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8412 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8413 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8414
8415 auto CalleeCC = CLI.CallConv;
8416 auto &Outs = CLI.Outs;
8417 auto &Caller = MF.getFunction();
8418 auto CallerCC = Caller.getCallingConv();
8419 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8420
8421 // If the stack arguments for this call do not fit into our own save area then
8422 // the call cannot be made tail.
8423 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8424 return false;
8425
8426 // Do not tail call opt if any parameters need to be passed indirectly.
8427 for (auto &VA : ArgLocs)
8428 if (VA.getLocInfo() == CCValAssign::Indirect)
8429 return false;
8430
8431 // Do not tail call opt if either caller or callee uses struct return
8432 // semantics.
8433 auto IsCallerStructRet = Caller.hasStructRetAttr();
8434 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8435 if (IsCallerStructRet != IsCalleeStructRet)
8436 return false;
8437
8438 // Do not tail call opt if caller's and callee's byval arguments do not match.
8439 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8440 if (!Outs[i].Flags.isByVal())
8441 continue;
8442 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8443 return false;
8444 if (LoongArchFI->getIncomingByValArgs(Idx: i).getValueType() != Outs[i].ArgVT)
8445 return false;
8446 }
8447
8448 // The callee has to preserve all registers the caller needs to preserve.
8449 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8450 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8451 if (CalleeCC != CallerCC) {
8452 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8453 if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
8454 return false;
8455 }
8456
8457 // If the callee takes no arguments then go on to check the results of the
8458 // call.
8459 const MachineRegisterInfo &MRI = MF.getRegInfo();
8460 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8461 if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
8462 return false;
8463
8464 return true;
8465}
8466
8467static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
8468 return DAG.getDataLayout().getPrefTypeAlign(
8469 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
8470}
8471
8472// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8473// and output parameter nodes.
8474SDValue
8475LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
8476 SmallVectorImpl<SDValue> &InVals) const {
8477 SelectionDAG &DAG = CLI.DAG;
8478 SDLoc &DL = CLI.DL;
8479 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
8480 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8481 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
8482 SDValue Chain = CLI.Chain;
8483 SDValue Callee = CLI.Callee;
8484 CallingConv::ID CallConv = CLI.CallConv;
8485 bool IsVarArg = CLI.IsVarArg;
8486 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8487 MVT GRLenVT = Subtarget.getGRLenVT();
8488 bool &IsTailCall = CLI.IsTailCall;
8489
8490 MachineFunction &MF = DAG.getMachineFunction();
8491 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8492
8493 // Analyze the operands of the call, assigning locations to each operand.
8494 SmallVector<CCValAssign> ArgLocs;
8495 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8496
8497 if (CallConv == CallingConv::GHC)
8498 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
8499 else
8500 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch);
8501
8502 // Check if it's really possible to do a tail call.
8503 if (IsTailCall)
8504 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
8505
8506 if (IsTailCall)
8507 ++NumTailCalls;
8508 else if (CLI.CB && CLI.CB->isMustTailCall())
8509 report_fatal_error(reason: "failed to perform tail call elimination on a call "
8510 "site marked musttail");
8511
8512 // Get a count of how many bytes are to be pushed on the stack.
8513 unsigned NumBytes = ArgCCInfo.getStackSize();
8514
8515 // Create local copies for byval args.
8516 SmallVector<SDValue> ByValArgs;
8517 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8518 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8519 if (!Flags.isByVal())
8520 continue;
8521
8522 SDValue Arg = OutVals[i];
8523 unsigned Size = Flags.getByValSize();
8524 Align Alignment = Flags.getNonZeroByValAlign();
8525 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
8526 SDValue Dst;
8527
8528 if (IsTailCall) {
8529 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(Idx: j++);
8530 if (isa<GlobalAddressSDNode>(Val: Arg) || isa<ExternalSymbolSDNode>(Val: Arg) ||
8531 isa<FrameIndexSDNode>(Val: Arg))
8532 Dst = CallerArg;
8533 } else {
8534 int FI =
8535 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
8536 Dst = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
8537 }
8538 if (Dst) {
8539 Chain =
8540 DAG.getMemcpy(Chain, dl: DL, Dst, Src: Arg, Size: SizeNode, Alignment,
8541 /*IsVolatile=*/isVol: false,
8542 /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
8543 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
8544 ByValArgs.push_back(Elt: Dst);
8545 }
8546 }
8547
8548 if (!IsTailCall)
8549 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
8550
8551 // During a tail call, stores to the argument area must happen after all of
8552 // the function's incoming arguments have been loaded because they may alias.
8553 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8554 // there's no point in doing so repeatedly so this tracks whether that's
8555 // happened yet.
8556 bool AfterFormalArgLoads = false;
8557
8558 // Copy argument values to their designated locations.
8559 SmallVector<std::pair<Register, SDValue>> RegsToPass;
8560 SmallVector<SDValue> MemOpChains;
8561 SDValue StackPtr;
8562 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8563 ++i, ++OutIdx) {
8564 CCValAssign &VA = ArgLocs[i];
8565 SDValue ArgValue = OutVals[OutIdx];
8566 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8567
8568 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8569 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8570 assert(VA.isRegLoc() && "Expected register VA assignment");
8571 assert(VA.needsCustom());
8572 SDValue SplitF64 =
8573 DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
8574 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
8575 SDValue Lo = SplitF64.getValue(R: 0);
8576 SDValue Hi = SplitF64.getValue(R: 1);
8577
8578 Register RegLo = VA.getLocReg();
8579 RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
8580
8581 // Get the CCValAssign for the Hi part.
8582 CCValAssign &HiVA = ArgLocs[++i];
8583
8584 if (HiVA.isMemLoc()) {
8585 // Second half of f64 is passed on the stack.
8586 if (!StackPtr.getNode())
8587 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
8588 SDValue Address =
8589 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
8590 N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
8591 // Emit the store.
8592 MemOpChains.push_back(Elt: DAG.getStore(
8593 Chain, dl: DL, Val: Hi, Ptr: Address,
8594 PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
8595 } else {
8596 // Second half of f64 is passed in another GPR.
8597 Register RegHigh = HiVA.getLocReg();
8598 RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
8599 }
8600 continue;
8601 }
8602
8603 // Promote the value if needed.
8604 // For now, only handle fully promoted and indirect arguments.
8605 if (VA.getLocInfo() == CCValAssign::Indirect) {
8606 // Store the argument in a stack slot and pass its address.
8607 Align StackAlign =
8608 std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG),
8609 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
8610 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8611 // If the original argument was split and passed by reference, we need to
8612 // store the required parts of it here (and pass just one address).
8613 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8614 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8615 assert(ArgPartOffset == 0);
8616 // Calculate the total size to store. We don't have access to what we're
8617 // actually storing other than performing the loop and collecting the
8618 // info.
8619 SmallVector<std::pair<SDValue, SDValue>> Parts;
8620 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8621 SDValue PartValue = OutVals[OutIdx + 1];
8622 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8623 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
8624 EVT PartVT = PartValue.getValueType();
8625
8626 StoredSize += PartVT.getStoreSize();
8627 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
8628 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
8629 ++i;
8630 ++OutIdx;
8631 }
8632 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
8633 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
8634 MemOpChains.push_back(
8635 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
8636 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
8637 for (const auto &Part : Parts) {
8638 SDValue PartValue = Part.first;
8639 SDValue PartOffset = Part.second;
8640 SDValue Address =
8641 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
8642 MemOpChains.push_back(
8643 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
8644 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
8645 }
8646 ArgValue = SpillSlot;
8647 } else {
8648 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
8649 }
8650
8651 // Use local copy if it is a byval arg.
8652 if (Flags.isByVal()) {
8653 if (!IsTailCall || (isa<GlobalAddressSDNode>(Val: ArgValue) ||
8654 isa<ExternalSymbolSDNode>(Val: ArgValue) ||
8655 isa<FrameIndexSDNode>(Val: ArgValue)))
8656 ArgValue = ByValArgs[j++];
8657 }
8658
8659 if (VA.isRegLoc()) {
8660 // Queue up the argument copies and emit them at the end.
8661 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
8662 } else {
8663 assert(VA.isMemLoc() && "Argument not register or memory");
8664 SDValue DstAddr;
8665 MachinePointerInfo DstInfo;
8666 int32_t Offset = VA.getLocMemOffset();
8667
8668 // Work out the address of the stack slot.
8669 if (!StackPtr.getNode())
8670 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
8671
8672 if (IsTailCall) {
8673 unsigned OpSize = divideCeil(Numerator: VA.getValVT().getSizeInBits(), Denominator: 8);
8674 int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
8675 DstAddr = DAG.getFrameIndex(FI, VT: PtrVT);
8676 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8677 if (!AfterFormalArgLoads) {
8678 Chain = DAG.getStackArgumentTokenFactor(Chain);
8679 AfterFormalArgLoads = true;
8680 }
8681 } else {
8682 SDValue PtrOff = DAG.getIntPtrConstant(Val: Offset, DL);
8683 DstAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
8684 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8685 }
8686
8687 // Emit the store.
8688 MemOpChains.push_back(
8689 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: DstAddr, PtrInfo: DstInfo));
8690 }
8691 }
8692
8693 // Join the stores, which are independent of one another.
8694 if (!MemOpChains.empty())
8695 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
8696
8697 SDValue Glue;
8698
8699 // Build a sequence of copy-to-reg nodes, chained and glued together.
8700 for (auto &Reg : RegsToPass) {
8701 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
8702 Glue = Chain.getValue(R: 1);
8703 }
8704
8705 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8706 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8707 // split it and then direct call can be matched by PseudoCALL_SMALL.
8708 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
8709 const GlobalValue *GV = S->getGlobal();
8710 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8711 ? LoongArchII::MO_CALL
8712 : LoongArchII::MO_CALL_PLT;
8713 Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags);
8714 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
8715 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
8716 ? LoongArchII::MO_CALL
8717 : LoongArchII::MO_CALL_PLT;
8718 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
8719 }
8720
8721 // The first call operand is the chain and the second is the target address.
8722 SmallVector<SDValue> Ops;
8723 Ops.push_back(Elt: Chain);
8724 Ops.push_back(Elt: Callee);
8725
8726 // Add argument registers to the end of the list so that they are
8727 // known live into the call.
8728 for (auto &Reg : RegsToPass)
8729 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
8730
8731 if (!IsTailCall) {
8732 // Add a register mask operand representing the call-preserved registers.
8733 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8734 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8735 assert(Mask && "Missing call preserved mask for calling convention");
8736 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
8737 }
8738
8739 // Glue the call to the argument copies, if any.
8740 if (Glue.getNode())
8741 Ops.push_back(Elt: Glue);
8742
8743 // Emit the call.
8744 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
8745 unsigned Op;
8746 switch (DAG.getTarget().getCodeModel()) {
8747 default:
8748 report_fatal_error(reason: "Unsupported code model");
8749 case CodeModel::Small:
8750 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8751 break;
8752 case CodeModel::Medium:
8753 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8754 break;
8755 case CodeModel::Large:
8756 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8757 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8758 break;
8759 }
8760
8761 if (IsTailCall) {
8762 MF.getFrameInfo().setHasTailCall();
8763 SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
8764 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
8765 return Ret;
8766 }
8767
8768 Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
8769 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
8770 Glue = Chain.getValue(R: 1);
8771
8772 // Mark the end of the call, which is glued to the call itself.
8773 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
8774 Glue = Chain.getValue(R: 1);
8775
8776 // Assign locations to each value returned by this call.
8777 SmallVector<CCValAssign> RVLocs;
8778 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8779 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch);
8780
8781 // Copy all of the result registers out of their specified physreg.
8782 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8783 auto &VA = RVLocs[i];
8784 // Copy the value out.
8785 SDValue RetValue =
8786 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
8787 // Glue the RetValue to the end of the call sequence.
8788 Chain = RetValue.getValue(R: 1);
8789 Glue = RetValue.getValue(R: 2);
8790
8791 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8792 assert(VA.needsCustom());
8793 SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs[++i].getLocReg(),
8794 VT: MVT::i32, Glue);
8795 Chain = RetValue2.getValue(R: 1);
8796 Glue = RetValue2.getValue(R: 2);
8797 RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64,
8798 N1: RetValue, N2: RetValue2);
8799 } else
8800 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
8801
8802 InVals.push_back(Elt: RetValue);
8803 }
8804
8805 return Chain;
8806}
8807
8808bool LoongArchTargetLowering::CanLowerReturn(
8809 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8810 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8811 const Type *RetTy) const {
8812 SmallVector<CCValAssign> RVLocs;
8813 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8814
8815 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8816 LoongArchABI::ABI ABI =
8817 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8818 if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full,
8819 ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsRet=*/true, OrigTy: nullptr))
8820 return false;
8821 }
8822 return true;
8823}
8824
8825SDValue LoongArchTargetLowering::LowerReturn(
8826 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8827 const SmallVectorImpl<ISD::OutputArg> &Outs,
8828 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8829 SelectionDAG &DAG) const {
8830 // Stores the assignment of the return value to a location.
8831 SmallVector<CCValAssign> RVLocs;
8832
8833 // Info about the registers and stack slot.
8834 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8835 *DAG.getContext());
8836
8837 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8838 CLI: nullptr, Fn: CC_LoongArch);
8839 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8840 report_fatal_error(reason: "GHC functions return void only");
8841 SDValue Glue;
8842 SmallVector<SDValue, 4> RetOps(1, Chain);
8843
8844 // Copy the result values into the output registers.
8845 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8846 SDValue Val = OutVals[OutIdx];
8847 CCValAssign &VA = RVLocs[i];
8848 assert(VA.isRegLoc() && "Can only return in registers!");
8849
8850 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8851 // Handle returning f64 on LA32D with a soft float ABI.
8852 assert(VA.isRegLoc() && "Expected return via registers");
8853 assert(VA.needsCustom());
8854 SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
8855 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
8856 SDValue Lo = SplitF64.getValue(R: 0);
8857 SDValue Hi = SplitF64.getValue(R: 1);
8858 Register RegLo = VA.getLocReg();
8859 Register RegHi = RVLocs[++i].getLocReg();
8860
8861 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
8862 Glue = Chain.getValue(R: 1);
8863 RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
8864 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
8865 Glue = Chain.getValue(R: 1);
8866 RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
8867 } else {
8868 // Handle a 'normal' return.
8869 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8870 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
8871
8872 // Guarantee that all emitted copies are stuck together.
8873 Glue = Chain.getValue(R: 1);
8874 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
8875 }
8876 }
8877
8878 RetOps[0] = Chain; // Update chain.
8879
8880 // Add the glue node if we have it.
8881 if (Glue.getNode())
8882 RetOps.push_back(Elt: Glue);
8883
8884 return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
8885}
8886
8887// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8888// Note: The following prefixes are excluded:
8889// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8890// as they can be represented using [x]vrepli.[whb]
8891std::pair<bool, uint64_t> LoongArchTargetLowering::isImmVLDILegalForMode1(
8892 const APInt &SplatValue, const unsigned SplatBitSize) const {
8893 uint64_t RequiredImm = 0;
8894 uint64_t V = SplatValue.getZExtValue();
8895 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8896 // 4'b0101
8897 RequiredImm = (0b10101 << 8) | (V >> 8);
8898 return {true, RequiredImm};
8899 } else if (SplatBitSize == 32) {
8900 // 4'b0001
8901 if (!(V & 0xFFFF00FF)) {
8902 RequiredImm = (0b10001 << 8) | (V >> 8);
8903 return {true, RequiredImm};
8904 }
8905 // 4'b0010
8906 if (!(V & 0xFF00FFFF)) {
8907 RequiredImm = (0b10010 << 8) | (V >> 16);
8908 return {true, RequiredImm};
8909 }
8910 // 4'b0011
8911 if (!(V & 0x00FFFFFF)) {
8912 RequiredImm = (0b10011 << 8) | (V >> 24);
8913 return {true, RequiredImm};
8914 }
8915 // 4'b0110
8916 if ((V & 0xFFFF00FF) == 0xFF) {
8917 RequiredImm = (0b10110 << 8) | (V >> 8);
8918 return {true, RequiredImm};
8919 }
8920 // 4'b0111
8921 if ((V & 0xFF00FFFF) == 0xFFFF) {
8922 RequiredImm = (0b10111 << 8) | (V >> 16);
8923 return {true, RequiredImm};
8924 }
8925 // 4'b1010
8926 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8927 RequiredImm =
8928 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8929 return {true, RequiredImm};
8930 }
8931 } else if (SplatBitSize == 64) {
8932 // 4'b1011
8933 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8934 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8935 RequiredImm =
8936 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8937 return {true, RequiredImm};
8938 }
8939 // 4'b1100
8940 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8941 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8942 RequiredImm =
8943 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8944 return {true, RequiredImm};
8945 }
8946 // 4'b1001
8947 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8948 uint8_t res = 0;
8949 for (int i = 0; i < 8; ++i) {
8950 uint8_t byte = x & 0xFF;
8951 if (byte == 0 || byte == 0xFF)
8952 res |= ((byte & 1) << i);
8953 else
8954 return {false, 0};
8955 x >>= 8;
8956 }
8957 return {true, res};
8958 };
8959 auto [IsSame, Suffix] = sameBitsPreByte(V);
8960 if (IsSame) {
8961 RequiredImm = (0b11001 << 8) | Suffix;
8962 return {true, RequiredImm};
8963 }
8964 }
8965 return {false, RequiredImm};
8966}
8967
8968bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
8969 EVT VT) const {
8970 if (!Subtarget.hasExtLSX())
8971 return false;
8972
8973 if (VT == MVT::f32) {
8974 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8975 return (masked == 0x3e000000 || masked == 0x40000000);
8976 }
8977
8978 if (VT == MVT::f64) {
8979 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8980 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8981 }
8982
8983 return false;
8984}
8985
8986bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8987 bool ForCodeSize) const {
8988 // TODO: Maybe need more checks here after vector extension is supported.
8989 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8990 return false;
8991 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8992 return false;
8993 return (Imm.isZero() || Imm.isExactlyValue(V: 1.0) || isFPImmVLDILegal(Imm, VT));
8994}
8995
8996bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
8997 return true;
8998}
8999
9000bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
9001 return true;
9002}
9003
9004bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9005 const Instruction *I) const {
9006 if (!Subtarget.is64Bit())
9007 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I);
9008
9009 if (isa<LoadInst>(Val: I))
9010 return true;
9011
9012 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9013 // require fences beacuse we can use amswap_db.[w/d].
9014 Type *Ty = I->getOperand(i: 0)->getType();
9015 if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) {
9016 unsigned Size = Ty->getIntegerBitWidth();
9017 return (Size == 8 || Size == 16);
9018 }
9019
9020 return false;
9021}
9022
9023EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
9024 LLVMContext &Context,
9025 EVT VT) const {
9026 if (!VT.isVector())
9027 return getPointerTy(DL);
9028 return VT.changeVectorElementTypeToInteger();
9029}
9030
9031bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
9032 EVT VT = Y.getValueType();
9033
9034 if (VT.isVector())
9035 return Subtarget.hasExtLSX() && VT.isInteger();
9036
9037 return VT.isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
9038}
9039
9040void LoongArchTargetLowering::getTgtMemIntrinsic(
9041 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
9042 MachineFunction &MF, unsigned Intrinsic) const {
9043 switch (Intrinsic) {
9044 default:
9045 return;
9046 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9047 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9048 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9049 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9050 IntrinsicInfo Info;
9051 Info.opc = ISD::INTRINSIC_W_CHAIN;
9052 Info.memVT = MVT::i32;
9053 Info.ptrVal = I.getArgOperand(i: 0);
9054 Info.offset = 0;
9055 Info.align = Align(4);
9056 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
9057 MachineMemOperand::MOVolatile;
9058 Infos.push_back(Elt: Info);
9059 return;
9060 // TODO: Add more Intrinsics later.
9061 }
9062 }
9063}
9064
9065// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9066// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9067// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9068// regression, we need to implement it manually.
9069void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
9070 AtomicRMWInst::BinOp Op = AI->getOperation();
9071
9072 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
9073 Op == AtomicRMWInst::And) &&
9074 "Unable to expand");
9075 unsigned MinWordSize = 4;
9076
9077 IRBuilder<> Builder(AI);
9078 LLVMContext &Ctx = Builder.getContext();
9079 const DataLayout &DL = AI->getDataLayout();
9080 Type *ValueType = AI->getType();
9081 Type *WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize * 8);
9082
9083 Value *Addr = AI->getPointerOperand();
9084 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
9085 IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
9086
9087 Value *AlignedAddr = Builder.CreateIntrinsic(
9088 ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
9089 Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, FMFSource: nullptr,
9090 Name: "AlignedAddr");
9091
9092 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
9093 Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
9094 Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
9095 ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt");
9096 Value *Mask = Builder.CreateShl(
9097 LHS: ConstantInt::get(Ty: WordType,
9098 V: (1 << (DL.getTypeStoreSize(Ty: ValueType) * 8)) - 1),
9099 RHS: ShiftAmt, Name: "Mask");
9100 Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask");
9101 Value *ValOperand_Shifted =
9102 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType),
9103 RHS: ShiftAmt, Name: "ValOperand_Shifted");
9104 Value *NewOperand;
9105 if (Op == AtomicRMWInst::And)
9106 NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand");
9107 else
9108 NewOperand = ValOperand_Shifted;
9109
9110 AtomicRMWInst *NewAI =
9111 Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align(MinWordSize),
9112 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
9113
9114 Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted");
9115 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted");
9116 Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType);
9117 AI->replaceAllUsesWith(V: FinalOldResult);
9118 AI->eraseFromParent();
9119}
9120
9121TargetLowering::AtomicExpansionKind
9122LoongArchTargetLowering::shouldExpandAtomicRMWInIR(
9123 const AtomicRMWInst *AI) const {
9124 // TODO: Add more AtomicRMWInst that needs to be extended.
9125
9126 // Since floating-point operation requires a non-trivial set of data
9127 // operations, use CmpXChg to expand.
9128 if (AI->isFloatingPointOperation() ||
9129 AI->getOperation() == AtomicRMWInst::UIncWrap ||
9130 AI->getOperation() == AtomicRMWInst::UDecWrap ||
9131 AI->getOperation() == AtomicRMWInst::USubCond ||
9132 AI->getOperation() == AtomicRMWInst::USubSat)
9133 return AtomicExpansionKind::CmpXChg;
9134
9135 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9136 (AI->getOperation() == AtomicRMWInst::Xchg ||
9137 AI->getOperation() == AtomicRMWInst::Add ||
9138 AI->getOperation() == AtomicRMWInst::Sub)) {
9139 return AtomicExpansionKind::None;
9140 }
9141
9142 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9143 if (Subtarget.hasLAMCAS()) {
9144 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9145 AI->getOperation() == AtomicRMWInst::Or ||
9146 AI->getOperation() == AtomicRMWInst::Xor))
9147 return AtomicExpansionKind::CustomExpand;
9148 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9149 return AtomicExpansionKind::CmpXChg;
9150 }
9151
9152 if (Size == 8 || Size == 16)
9153 return AtomicExpansionKind::MaskedIntrinsic;
9154 return AtomicExpansionKind::None;
9155}
9156
9157static Intrinsic::ID
9158getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
9159 AtomicRMWInst::BinOp BinOp) {
9160 if (GRLen == 64) {
9161 switch (BinOp) {
9162 default:
9163 llvm_unreachable("Unexpected AtomicRMW BinOp");
9164 case AtomicRMWInst::Xchg:
9165 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9166 case AtomicRMWInst::Add:
9167 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9168 case AtomicRMWInst::Sub:
9169 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9170 case AtomicRMWInst::Nand:
9171 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9172 case AtomicRMWInst::UMax:
9173 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9174 case AtomicRMWInst::UMin:
9175 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9176 case AtomicRMWInst::Max:
9177 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9178 case AtomicRMWInst::Min:
9179 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9180 // TODO: support other AtomicRMWInst.
9181 }
9182 }
9183
9184 if (GRLen == 32) {
9185 switch (BinOp) {
9186 default:
9187 llvm_unreachable("Unexpected AtomicRMW BinOp");
9188 case AtomicRMWInst::Xchg:
9189 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9190 case AtomicRMWInst::Add:
9191 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9192 case AtomicRMWInst::Sub:
9193 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9194 case AtomicRMWInst::Nand:
9195 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9196 case AtomicRMWInst::UMax:
9197 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9198 case AtomicRMWInst::UMin:
9199 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9200 case AtomicRMWInst::Max:
9201 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9202 case AtomicRMWInst::Min:
9203 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9204 // TODO: support other AtomicRMWInst.
9205 }
9206 }
9207
9208 llvm_unreachable("Unexpected GRLen\n");
9209}
9210
9211TargetLowering::AtomicExpansionKind
9212LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
9213 const AtomicCmpXchgInst *CI) const {
9214
9215 if (Subtarget.hasLAMCAS())
9216 return AtomicExpansionKind::None;
9217
9218 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
9219 if (Size == 8 || Size == 16)
9220 return AtomicExpansionKind::MaskedIntrinsic;
9221 return AtomicExpansionKind::None;
9222}
9223
9224Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
9225 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9226 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9227 unsigned GRLen = Subtarget.getGRLen();
9228 AtomicOrdering FailOrd = CI->getFailureOrdering();
9229 Value *FailureOrdering =
9230 Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
9231 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9232 if (GRLen == 64) {
9233 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9234 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
9235 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
9236 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
9237 }
9238 Type *Tys[] = {AlignedAddr->getType()};
9239 Value *Result = Builder.CreateIntrinsic(
9240 ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9241 if (GRLen == 64)
9242 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
9243 return Result;
9244}
9245
9246Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
9247 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9248 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9249 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9250 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9251 // mask, as this produces better code than the LL/SC loop emitted by
9252 // int_loongarch_masked_atomicrmw_xchg.
9253 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9254 isa<ConstantInt>(Val: AI->getValOperand())) {
9255 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
9256 if (CVal->isZero())
9257 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
9258 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
9259 Align: AI->getAlign(), Ordering: Ord);
9260 if (CVal->isMinusOne())
9261 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
9262 Align: AI->getAlign(), Ordering: Ord);
9263 }
9264
9265 unsigned GRLen = Subtarget.getGRLen();
9266 Value *Ordering =
9267 Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
9268 Type *Tys[] = {AlignedAddr->getType()};
9269 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
9270 M: AI->getModule(),
9271 id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
9272
9273 if (GRLen == 64) {
9274 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
9275 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
9276 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
9277 }
9278
9279 Value *Result;
9280
9281 // Must pass the shift amount needed to sign extend the loaded value prior
9282 // to performing a signed comparison for min/max. ShiftAmt is the number of
9283 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9284 // is the number of bits to left+right shift the value in order to
9285 // sign-extend.
9286 if (AI->getOperation() == AtomicRMWInst::Min ||
9287 AI->getOperation() == AtomicRMWInst::Max) {
9288 const DataLayout &DL = AI->getDataLayout();
9289 unsigned ValWidth =
9290 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
9291 Value *SextShamt =
9292 Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
9293 Result = Builder.CreateCall(Callee: LlwOpScwLoop,
9294 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9295 } else {
9296 Result =
9297 Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
9298 }
9299
9300 if (GRLen == 64)
9301 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
9302 return Result;
9303}
9304
9305bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
9306 const MachineFunction &MF, EVT VT) const {
9307 VT = VT.getScalarType();
9308
9309 if (!VT.isSimple())
9310 return false;
9311
9312 switch (VT.getSimpleVT().SimpleTy) {
9313 case MVT::f32:
9314 case MVT::f64:
9315 return true;
9316 default:
9317 break;
9318 }
9319
9320 return false;
9321}
9322
9323Register LoongArchTargetLowering::getExceptionPointerRegister(
9324 const Constant *PersonalityFn) const {
9325 return LoongArch::R4;
9326}
9327
9328Register LoongArchTargetLowering::getExceptionSelectorRegister(
9329 const Constant *PersonalityFn) const {
9330 return LoongArch::R5;
9331}
9332
9333//===----------------------------------------------------------------------===//
9334// Target Optimization Hooks
9335//===----------------------------------------------------------------------===//
9336
9337static int getEstimateRefinementSteps(EVT VT,
9338 const LoongArchSubtarget &Subtarget) {
9339 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9340 // IEEE float has 23 digits and double has 52 digits.
9341 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9342 return RefinementSteps;
9343}
9344
9345SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
9346 SelectionDAG &DAG, int Enabled,
9347 int &RefinementSteps,
9348 bool &UseOneConstNR,
9349 bool Reciprocal) const {
9350 if (Subtarget.hasFrecipe()) {
9351 SDLoc DL(Operand);
9352 EVT VT = Operand.getValueType();
9353
9354 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9355 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9356 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9357 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9358 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9359
9360 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9361 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9362
9363 SDValue Estimate = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand);
9364 if (Reciprocal)
9365 Estimate = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Estimate);
9366
9367 return Estimate;
9368 }
9369 }
9370
9371 return SDValue();
9372}
9373
9374SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
9375 SelectionDAG &DAG,
9376 int Enabled,
9377 int &RefinementSteps) const {
9378 if (Subtarget.hasFrecipe()) {
9379 SDLoc DL(Operand);
9380 EVT VT = Operand.getValueType();
9381
9382 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9383 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9384 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9385 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9386 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9387
9388 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9389 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9390
9391 return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand);
9392 }
9393 }
9394
9395 return SDValue();
9396}
9397
9398//===----------------------------------------------------------------------===//
9399// LoongArch Inline Assembly Support
9400//===----------------------------------------------------------------------===//
9401
9402LoongArchTargetLowering::ConstraintType
9403LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9404 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9405 //
9406 // 'f': A floating-point register (if available).
9407 // 'k': A memory operand whose address is formed by a base register and
9408 // (optionally scaled) index register.
9409 // 'l': A signed 16-bit constant.
9410 // 'm': A memory operand whose address is formed by a base register and
9411 // offset that is suitable for use in instructions with the same
9412 // addressing mode as st.w and ld.w.
9413 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9414 // instruction)
9415 // 'I': A signed 12-bit constant (for arithmetic instructions).
9416 // 'J': Integer zero.
9417 // 'K': An unsigned 12-bit constant (for logic instructions).
9418 // "ZB": An address that is held in a general-purpose register. The offset is
9419 // zero.
9420 // "ZC": A memory operand whose address is formed by a base register and
9421 // offset that is suitable for use in instructions with the same
9422 // addressing mode as ll.w and sc.w.
9423 if (Constraint.size() == 1) {
9424 switch (Constraint[0]) {
9425 default:
9426 break;
9427 case 'f':
9428 case 'q':
9429 return C_RegisterClass;
9430 case 'l':
9431 case 'I':
9432 case 'J':
9433 case 'K':
9434 return C_Immediate;
9435 case 'k':
9436 return C_Memory;
9437 }
9438 }
9439
9440 if (Constraint == "ZC" || Constraint == "ZB")
9441 return C_Memory;
9442
9443 // 'm' is handled here.
9444 return TargetLowering::getConstraintType(Constraint);
9445}
9446
9447InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9448 StringRef ConstraintCode) const {
9449 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9450 .Case(S: "k", Value: InlineAsm::ConstraintCode::k)
9451 .Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
9452 .Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
9453 .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9454}
9455
9456std::pair<unsigned, const TargetRegisterClass *>
9457LoongArchTargetLowering::getRegForInlineAsmConstraint(
9458 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9459 // First, see if this is a constraint that directly corresponds to a LoongArch
9460 // register class.
9461 if (Constraint.size() == 1) {
9462 switch (Constraint[0]) {
9463 case 'r':
9464 // TODO: Support fixed vectors up to GRLen?
9465 if (VT.isVector())
9466 break;
9467 return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass);
9468 case 'q':
9469 return std::make_pair(x: 0U, y: &LoongArch::GPRNoR0R1RegClass);
9470 case 'f':
9471 if (Subtarget.hasBasicF() && VT == MVT::f32)
9472 return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass);
9473 if (Subtarget.hasBasicD() && VT == MVT::f64)
9474 return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass);
9475 if (Subtarget.hasExtLSX() &&
9476 TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
9477 return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass);
9478 if (Subtarget.hasExtLASX() &&
9479 TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
9480 return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass);
9481 break;
9482 default:
9483 break;
9484 }
9485 }
9486
9487 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9488 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9489 // constraints while the official register name is prefixed with a '$'. So we
9490 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9491 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9492 // case insensitive, so no need to convert the constraint to upper case here.
9493 //
9494 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9495 // decode the usage of register name aliases into their official names. And
9496 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9497 // official register names.
9498 if (Constraint.starts_with(Prefix: "{$r") || Constraint.starts_with(Prefix: "{$f") ||
9499 Constraint.starts_with(Prefix: "{$vr") || Constraint.starts_with(Prefix: "{$xr")) {
9500 bool IsFP = Constraint[2] == 'f';
9501 std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$');
9502 std::pair<unsigned, const TargetRegisterClass *> R;
9503 R = TargetLowering::getRegForInlineAsmConstraint(
9504 TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
9505 // Match those names to the widest floating point register type available.
9506 if (IsFP) {
9507 unsigned RegNo = R.first;
9508 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9509 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9510 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9511 return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
9512 }
9513 }
9514 }
9515 return R;
9516 }
9517
9518 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9519}
9520
9521void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9522 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9523 SelectionDAG &DAG) const {
9524 // Currently only support length 1 constraints.
9525 if (Constraint.size() == 1) {
9526 switch (Constraint[0]) {
9527 case 'l':
9528 // Validate & create a 16-bit signed immediate operand.
9529 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9530 uint64_t CVal = C->getSExtValue();
9531 if (isInt<16>(x: CVal))
9532 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
9533 VT: Subtarget.getGRLenVT()));
9534 }
9535 return;
9536 case 'I':
9537 // Validate & create a 12-bit signed immediate operand.
9538 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9539 uint64_t CVal = C->getSExtValue();
9540 if (isInt<12>(x: CVal))
9541 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
9542 VT: Subtarget.getGRLenVT()));
9543 }
9544 return;
9545 case 'J':
9546 // Validate & create an integer zero operand.
9547 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
9548 if (C->getZExtValue() == 0)
9549 Ops.push_back(
9550 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
9551 return;
9552 case 'K':
9553 // Validate & create a 12-bit unsigned immediate operand.
9554 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9555 uint64_t CVal = C->getZExtValue();
9556 if (isUInt<12>(x: CVal))
9557 Ops.push_back(
9558 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
9559 }
9560 return;
9561 default:
9562 break;
9563 }
9564 }
9565 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9566}
9567
9568#define GET_REGISTER_MATCHER
9569#include "LoongArchGenAsmMatcher.inc"
9570
9571Register
9572LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
9573 const MachineFunction &MF) const {
9574 std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$');
9575 std::string NewRegName = Name.second.str();
9576 Register Reg = MatchRegisterAltName(Name: NewRegName);
9577 if (!Reg)
9578 Reg = MatchRegisterName(Name: NewRegName);
9579 if (!Reg)
9580 return Reg;
9581 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9582 if (!ReservedRegs.test(Idx: Reg))
9583 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
9584 StringRef(RegName) + "\"."));
9585 return Reg;
9586}
9587
9588bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
9589 EVT VT, SDValue C) const {
9590 // TODO: Support vectors.
9591 if (!VT.isScalarInteger())
9592 return false;
9593
9594 // Omit the optimization if the data size exceeds GRLen.
9595 if (VT.getSizeInBits() > Subtarget.getGRLen())
9596 return false;
9597
9598 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
9599 const APInt &Imm = ConstNode->getAPIntValue();
9600 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9601 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9602 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9603 return true;
9604 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9605 if (ConstNode->hasOneUse() &&
9606 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9607 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9608 return true;
9609 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9610 // in which the immediate has two set bits. Or Break (MUL x, imm)
9611 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9612 // equals to (1 << s0) - (1 << s1).
9613 if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) {
9614 unsigned Shifts = Imm.countr_zero();
9615 // Reject immediates which can be composed via a single LUI.
9616 if (Shifts >= 12)
9617 return false;
9618 // Reject multiplications can be optimized to
9619 // (SLLI (ALSL x, x, 1/2/3/4), s).
9620 APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
9621 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9622 return false;
9623 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9624 // since it needs one more instruction than other 3 cases.
9625 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9626 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9627 (ImmSmall - Imm).isPowerOf2())
9628 return true;
9629 }
9630 }
9631
9632 return false;
9633}
9634
9635bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
9636 const AddrMode &AM,
9637 Type *Ty, unsigned AS,
9638 Instruction *I) const {
9639 // LoongArch has four basic addressing modes:
9640 // 1. reg
9641 // 2. reg + 12-bit signed offset
9642 // 3. reg + 14-bit signed offset left-shifted by 2
9643 // 4. reg1 + reg2
9644 // TODO: Add more checks after support vector extension.
9645
9646 // No global is ever allowed as a base.
9647 if (AM.BaseGV)
9648 return false;
9649
9650 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9651 // with `UAL` feature.
9652 if (!isInt<12>(x: AM.BaseOffs) &&
9653 !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL()))
9654 return false;
9655
9656 switch (AM.Scale) {
9657 case 0:
9658 // "r+i" or just "i", depending on HasBaseReg.
9659 break;
9660 case 1:
9661 // "r+r+i" is not allowed.
9662 if (AM.HasBaseReg && AM.BaseOffs)
9663 return false;
9664 // Otherwise we have "r+r" or "r+i".
9665 break;
9666 case 2:
9667 // "2*r+r" or "2*r+i" is not allowed.
9668 if (AM.HasBaseReg || AM.BaseOffs)
9669 return false;
9670 // Allow "2*r" as "r+r".
9671 break;
9672 default:
9673 return false;
9674 }
9675
9676 return true;
9677}
9678
9679bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
9680 return isInt<12>(x: Imm);
9681}
9682
9683bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
9684 return isInt<12>(x: Imm);
9685}
9686
9687bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
9688 // Zexts are free if they can be combined with a load.
9689 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9690 // poorly with type legalization of compares preferring sext.
9691 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9692 EVT MemVT = LD->getMemoryVT();
9693 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9694 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9695 LD->getExtensionType() == ISD::ZEXTLOAD))
9696 return true;
9697 }
9698
9699 return TargetLowering::isZExtFree(Val, VT2);
9700}
9701
9702bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
9703 EVT DstVT) const {
9704 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9705}
9706
9707bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
9708 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32);
9709}
9710
9711bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
9712 // TODO: Support vectors.
9713 if (Y.getValueType().isVector())
9714 return false;
9715
9716 return !isa<ConstantSDNode>(Val: Y);
9717}
9718
9719ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
9720 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9721 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9722}
9723
9724bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
9725 Type *Ty, bool IsSigned) const {
9726 if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: 32))
9727 return true;
9728
9729 return IsSigned;
9730}
9731
9732bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
9733 // Return false to suppress the unnecessary extensions if the LibCall
9734 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9735 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9736 Type.getSizeInBits() < Subtarget.getGRLen()))
9737 return false;
9738 return true;
9739}
9740
9741// memcpy, and other memory intrinsics, typically tries to use wider load/store
9742// if the source/dest is aligned and the copy size is large enough. We therefore
9743// want to align such objects passed to memory intrinsics.
9744bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
9745 unsigned &MinSize,
9746 Align &PrefAlign) const {
9747 if (!isa<MemIntrinsic>(Val: CI))
9748 return false;
9749
9750 if (Subtarget.is64Bit()) {
9751 MinSize = 8;
9752 PrefAlign = Align(8);
9753 } else {
9754 MinSize = 4;
9755 PrefAlign = Align(4);
9756 }
9757
9758 return true;
9759}
9760
9761TargetLoweringBase::LegalizeTypeAction
9762LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
9763 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
9764 VT.getVectorElementType() != MVT::i1)
9765 return TypeWidenVector;
9766
9767 return TargetLoweringBase::getPreferredVectorAction(VT);
9768}
9769
9770bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9771 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9772 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9773 bool IsABIRegCopy = CC.has_value();
9774 EVT ValueVT = Val.getValueType();
9775
9776 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9777 PartVT == MVT::f32) {
9778 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9779 // nan, and cast to f32.
9780 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
9781 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
9782 Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
9783 N2: DAG.getConstant(Val: 0xFFFF0000, DL, VT: MVT::i32));
9784 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val);
9785 Parts[0] = Val;
9786 return true;
9787 }
9788
9789 return false;
9790}
9791
9792SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9793 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9794 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9795 bool IsABIRegCopy = CC.has_value();
9796
9797 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9798 PartVT == MVT::f32) {
9799 SDValue Val = Parts[0];
9800
9801 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9802 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
9803 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
9804 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
9805 return Val;
9806 }
9807
9808 return SDValue();
9809}
9810
9811MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9812 CallingConv::ID CC,
9813 EVT VT) const {
9814 // Use f32 to pass f16.
9815 if (VT == MVT::f16 && Subtarget.hasBasicF())
9816 return MVT::f32;
9817
9818 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
9819}
9820
9821unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9822 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9823 // Use f32 to pass f16.
9824 if (VT == MVT::f16 && Subtarget.hasBasicF())
9825 return 1;
9826
9827 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
9828}
9829
9830bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
9831 SDValue Op, const APInt &OriginalDemandedBits,
9832 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9833 unsigned Depth) const {
9834 EVT VT = Op.getValueType();
9835 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9836 unsigned Opc = Op.getOpcode();
9837 switch (Opc) {
9838 default:
9839 break;
9840 case LoongArchISD::VMSKLTZ:
9841 case LoongArchISD::XVMSKLTZ: {
9842 SDValue Src = Op.getOperand(i: 0);
9843 MVT SrcVT = Src.getSimpleValueType();
9844 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9845 unsigned NumElts = SrcVT.getVectorNumElements();
9846
9847 // If we don't need the sign bits at all just return zero.
9848 if (OriginalDemandedBits.countr_zero() >= NumElts)
9849 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
9850
9851 // Only demand the vector elements of the sign bits we need.
9852 APInt KnownUndef, KnownZero;
9853 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts);
9854 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
9855 TLO, Depth: Depth + 1))
9856 return true;
9857
9858 Known.Zero = KnownZero.zext(width: BitWidth);
9859 Known.Zero.setHighBits(BitWidth - NumElts);
9860
9861 // [X]VMSKLTZ only uses the MSB from each vector element.
9862 KnownBits KnownSrc;
9863 APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits);
9864 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO,
9865 Depth: Depth + 1))
9866 return true;
9867
9868 if (KnownSrc.One[SrcBits - 1])
9869 Known.One.setLowBits(NumElts);
9870 else if (KnownSrc.Zero[SrcBits - 1])
9871 Known.Zero.setLowBits(NumElts);
9872
9873 // Attempt to avoid multi-use ops if we don't need anything from it.
9874 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
9875 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
9876 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT, Operand: NewSrc));
9877 return false;
9878 }
9879 }
9880
9881 return TargetLowering::SimplifyDemandedBitsForTargetNode(
9882 Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
9883}
9884
9885bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
9886 unsigned Opc = VecOp.getOpcode();
9887
9888 // Assume target opcodes can't be scalarized.
9889 // TODO - do we have any exceptions?
9890 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opcode: Opc))
9891 return false;
9892
9893 // If the vector op is not supported, try to convert to scalar.
9894 EVT VecVT = VecOp.getValueType();
9895 if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
9896 return true;
9897
9898 // If the vector op is supported, but the scalar op is not, the transform may
9899 // not be worthwhile.
9900 EVT ScalarVT = VecVT.getScalarType();
9901 return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
9902}
9903
9904bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
9905 unsigned Index) const {
9906 if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
9907 return false;
9908
9909 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9910 return Index == 0;
9911}
9912
9913bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
9914 unsigned Index) const {
9915 EVT EltVT = VT.getScalarType();
9916
9917 // Extract a scalar FP value from index 0 of a vector is free.
9918 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9919}
9920