1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSelectionDAGInfo.h"
19#include "LoongArchSubtarget.h"
20#include "MCTargetDesc/LoongArchBaseInfo.h"
21#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22#include "MCTargetDesc/LoongArchMatInt.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/CodeGen/ISDOpcodes.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/RuntimeLibcallUtil.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/CodeGen.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/KnownBits.h"
37#include "llvm/Support/MathExtras.h"
38#include <llvm/Analysis/VectorUtils.h>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
46enum MaterializeFPImm {
47 NoMaterializeFPImm = 0,
48 MaterializeFPImm2Ins = 2,
49 MaterializeFPImm3Ins = 3,
50 MaterializeFPImm4Ins = 4,
51 MaterializeFPImm5Ins = 5,
52 MaterializeFPImm6Ins = 6
53};
54
55static cl::opt<MaterializeFPImm> MaterializeFPImmInsNum(
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
60 cl::init(Val: MaterializeFPImm3Ins),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 clEnumValN(MaterializeFPImm2Ins, "2",
63 "Materialize FP immediate within 2 instructions"),
64 clEnumValN(MaterializeFPImm3Ins, "3",
65 "Materialize FP immediate within 3 instructions"),
66 clEnumValN(MaterializeFPImm4Ins, "4",
67 "Materialize FP immediate within 4 instructions"),
68 clEnumValN(MaterializeFPImm5Ins, "5",
69 "Materialize FP immediate within 5 instructions"),
70 clEnumValN(MaterializeFPImm6Ins, "6",
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(Val: false));
77
78LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
107 setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
108 MemVT: MVT::i1, Action: Promote);
109
110 setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
111 setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
112 setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
113 setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
114 setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
115 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
116
117 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
118 ISD::JumpTable, ISD::GlobalTLSAddress},
119 VT: GRLenVT, Action: Custom);
120
121 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
122
123 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
124 setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
125 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
126 setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
127
128 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
129 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
130
131 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
132 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
133 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
134
135 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
141 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
142 setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
147 setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
148 } else {
149 setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand);
150 setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand);
151 setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand);
152 setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand);
153 setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom);
154 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
155 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
156 }
157
158 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
159 setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
160 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom);
161 setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
162 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
163 setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
164
165 setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
166 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
171 setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
172 setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
173 setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
174 setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
175 setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
176 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
177 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
178 setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
179 setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
180 setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
181 setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
182 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
183 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
184 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
185 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
186 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
187 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
188
189 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
190 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
191 setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32,
192 Action: Custom);
193 setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
199 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
200 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
201 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
202 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
203 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
204 if (Subtarget.hasBasicD())
205 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
206 }
207
208 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
209
210 static const ISD::CondCode FPCCToExpand[] = {
211 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
212 ISD::SETGE, ISD::SETNE, ISD::SETGT};
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
218 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
219 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
220 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
221 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
222
223 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f32, Action: Custom);
224 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
225 setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
226 setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
227 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
228 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
229 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
230 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
231 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f32, Action: Legal);
232 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
233 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
234 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
235 setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
236 setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
237 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
238 setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
239 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: LibCall);
240 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32,
241 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
242 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32,
243 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
244 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
245 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
246 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
249 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
250
251 if (!Subtarget.hasBasicD()) {
252 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
253 if (Subtarget.is64Bit()) {
254 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
255 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
264 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
265 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
266 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
267 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
268 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
269 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
270
271 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f64, Action: Custom);
272 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
273 setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
274 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
275 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
276 setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
277 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
278 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
279 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
280 setOperationAction(Op: ISD::FCANONICALIZE, VT: MVT::f64, Action: Legal);
281 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
282 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
283 setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
284 setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
285 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
286 setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
287 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: LibCall);
288 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
289 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64,
290 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
291 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
292 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
293 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
296 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
302 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
306 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
307 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
308 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
313 setOperationAction(Op, VT, Action: Expand);
314 }
315
316 for (MVT VT : LSXVTs) {
317 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
318 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
319 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
320
321 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
322 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
323 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
324
325 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
326 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
327 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
328 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT, Action: Legal);
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
331 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
332 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
333 Action: Legal);
334 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
335 VT, Action: Legal);
336 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
337 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
338 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
339 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
340 setCondCodeAction(
341 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
342 Action: Expand);
343 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
344 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
345 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
346 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
347 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
348 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
349 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
350 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
351 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
352 setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
355 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
357 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
359 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
360 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
363 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
364 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
365 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
366 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
367 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
368 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
369 ISD::SETUGE, ISD::SETUGT},
370 VT, Action: Expand);
371 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
372 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
373 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
374 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
375 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
376 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
377 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
378 }
379 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal);
380 setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal);
381 setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal);
382 setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal);
383 setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
388 setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom);
389 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
390 setOperationAction(Op: ISD::VECREDUCE_AND, VT, Action: Custom);
391 setOperationAction(Op: ISD::VECREDUCE_OR, VT, Action: Custom);
392 setOperationAction(Op: ISD::VECREDUCE_XOR, VT, Action: Custom);
393 setOperationAction(Op: ISD::VECREDUCE_SMAX, VT, Action: Custom);
394 setOperationAction(Op: ISD::VECREDUCE_SMIN, VT, Action: Custom);
395 setOperationAction(Op: ISD::VECREDUCE_UMAX, VT, Action: Custom);
396 setOperationAction(Op: ISD::VECREDUCE_UMIN, VT, Action: Custom);
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
404 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
405 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
406 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
407
408 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
409 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
410 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
411 setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Custom);
412 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT, Action: Legal);
413
414 setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
415 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
416 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
419 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
420 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
421 Action: Legal);
422 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
423 VT, Action: Legal);
424 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
425 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
426 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
427 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
428 setCondCodeAction(
429 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
430 Action: Expand);
431 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
432 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
433 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
434 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
435 setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
436 setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
437 setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
438 setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
439 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
440 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
441 setOperationAction(Op: ISD::ROTR, VT, Action: Custom);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
444 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
446 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
448 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
449 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
452 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
453 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
454 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
455 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
456 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
457 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
458 ISD::SETUGE, ISD::SETUGT},
459 VT, Action: Expand);
460 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
461 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
462 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
463 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
464 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
465 setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
466 setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
472 setTargetDAGCombine(ISD::SINT_TO_FP);
473 }
474
475 setTargetDAGCombine(ISD::AND);
476 setTargetDAGCombine(ISD::OR);
477 setTargetDAGCombine(ISD::SRL);
478 setTargetDAGCombine(ISD::SETCC);
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
483 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
484 setTargetDAGCombine(ISD::BITCAST);
485 }
486
487 // Set DAG combine for 'LASX' feature.
488 if (Subtarget.hasExtLASX()) {
489 setTargetDAGCombine(ISD::ANY_EXTEND);
490 setTargetDAGCombine(ISD::ZERO_EXTEND);
491 setTargetDAGCombine(ISD::SIGN_EXTEND);
492 }
493
494 // Compute derived properties from the register classes.
495 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
496
497 setStackPointerRegisterToSaveRestore(LoongArch::R3);
498
499 setBooleanContents(ZeroOrOneBooleanContent);
500 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
501
502 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
503
504 setMinCmpXchgSizeInBits(32);
505
506 // Function alignments.
507 setMinFunctionAlignment(Align(4));
508 // Set preferred alignments.
509 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
510 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
511 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
512
513 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
514 if (Subtarget.hasLAMCAS())
515 setMinCmpXchgSizeInBits(8);
516
517 if (Subtarget.hasSCQ()) {
518 setMaxAtomicSizeInBitsSupported(128);
519 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom);
520 }
521
522 // Disable strict node mutation.
523 IsStrictFPEnabled = true;
524}
525
526bool LoongArchTargetLowering::isOffsetFoldingLegal(
527 const GlobalAddressSDNode *GA) const {
528 // In order to maximise the opportunity for common subexpression elimination,
529 // keep a separate ADD node for the global address offset instead of folding
530 // it in the global address node. Later peephole optimisations may choose to
531 // fold it back in when profitable.
532 return false;
533}
534
535SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
536 SelectionDAG &DAG) const {
537 switch (Op.getOpcode()) {
538 case ISD::ATOMIC_FENCE:
539 return lowerATOMIC_FENCE(Op, DAG);
540 case ISD::EH_DWARF_CFA:
541 return lowerEH_DWARF_CFA(Op, DAG);
542 case ISD::GlobalAddress:
543 return lowerGlobalAddress(Op, DAG);
544 case ISD::GlobalTLSAddress:
545 return lowerGlobalTLSAddress(Op, DAG);
546 case ISD::INTRINSIC_WO_CHAIN:
547 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
548 case ISD::INTRINSIC_W_CHAIN:
549 return lowerINTRINSIC_W_CHAIN(Op, DAG);
550 case ISD::INTRINSIC_VOID:
551 return lowerINTRINSIC_VOID(Op, DAG);
552 case ISD::BlockAddress:
553 return lowerBlockAddress(Op, DAG);
554 case ISD::JumpTable:
555 return lowerJumpTable(Op, DAG);
556 case ISD::SHL_PARTS:
557 return lowerShiftLeftParts(Op, DAG);
558 case ISD::SRA_PARTS:
559 return lowerShiftRightParts(Op, DAG, IsSRA: true);
560 case ISD::SRL_PARTS:
561 return lowerShiftRightParts(Op, DAG, IsSRA: false);
562 case ISD::ConstantPool:
563 return lowerConstantPool(Op, DAG);
564 case ISD::FP_TO_SINT:
565 return lowerFP_TO_SINT(Op, DAG);
566 case ISD::BITCAST:
567 return lowerBITCAST(Op, DAG);
568 case ISD::UINT_TO_FP:
569 return lowerUINT_TO_FP(Op, DAG);
570 case ISD::SINT_TO_FP:
571 return lowerSINT_TO_FP(Op, DAG);
572 case ISD::VASTART:
573 return lowerVASTART(Op, DAG);
574 case ISD::FRAMEADDR:
575 return lowerFRAMEADDR(Op, DAG);
576 case ISD::RETURNADDR:
577 return lowerRETURNADDR(Op, DAG);
578 case ISD::WRITE_REGISTER:
579 return lowerWRITE_REGISTER(Op, DAG);
580 case ISD::INSERT_VECTOR_ELT:
581 return lowerINSERT_VECTOR_ELT(Op, DAG);
582 case ISD::EXTRACT_VECTOR_ELT:
583 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
584 case ISD::BUILD_VECTOR:
585 return lowerBUILD_VECTOR(Op, DAG);
586 case ISD::CONCAT_VECTORS:
587 return lowerCONCAT_VECTORS(Op, DAG);
588 case ISD::VECTOR_SHUFFLE:
589 return lowerVECTOR_SHUFFLE(Op, DAG);
590 case ISD::BITREVERSE:
591 return lowerBITREVERSE(Op, DAG);
592 case ISD::SCALAR_TO_VECTOR:
593 return lowerSCALAR_TO_VECTOR(Op, DAG);
594 case ISD::PREFETCH:
595 return lowerPREFETCH(Op, DAG);
596 case ISD::SELECT:
597 return lowerSELECT(Op, DAG);
598 case ISD::BRCOND:
599 return lowerBRCOND(Op, DAG);
600 case ISD::FP_TO_FP16:
601 return lowerFP_TO_FP16(Op, DAG);
602 case ISD::FP16_TO_FP:
603 return lowerFP16_TO_FP(Op, DAG);
604 case ISD::FP_TO_BF16:
605 return lowerFP_TO_BF16(Op, DAG);
606 case ISD::BF16_TO_FP:
607 return lowerBF16_TO_FP(Op, DAG);
608 case ISD::VECREDUCE_ADD:
609 return lowerVECREDUCE_ADD(Op, DAG);
610 case ISD::ROTL:
611 case ISD::ROTR:
612 return lowerRotate(Op, DAG);
613 case ISD::VECREDUCE_AND:
614 case ISD::VECREDUCE_OR:
615 case ISD::VECREDUCE_XOR:
616 case ISD::VECREDUCE_SMAX:
617 case ISD::VECREDUCE_SMIN:
618 case ISD::VECREDUCE_UMAX:
619 case ISD::VECREDUCE_UMIN:
620 return lowerVECREDUCE(Op, DAG);
621 case ISD::ConstantFP:
622 return lowerConstantFP(Op, DAG);
623 case ISD::SETCC:
624 return lowerSETCC(Op, DAG);
625 }
626 return SDValue();
627}
628
629// Helper to attempt to return a cheaper, bit-inverted version of \p V.
630static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
631 // TODO: don't always ignore oneuse constraints.
632 V = peekThroughBitcasts(V);
633 EVT VT = V.getValueType();
634
635 // Match not(xor X, -1) -> X.
636 if (V.getOpcode() == ISD::XOR &&
637 (ISD::isBuildVectorAllOnes(N: V.getOperand(i: 1).getNode()) ||
638 isAllOnesConstant(V: V.getOperand(i: 1))))
639 return V.getOperand(i: 0);
640
641 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
642 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
643 (isNullConstant(V: V.getOperand(i: 1)) || V.getOperand(i: 0).hasOneUse())) {
644 if (SDValue Not = isNOT(V: V.getOperand(i: 0), DAG)) {
645 Not = DAG.getBitcast(VT: V.getOperand(i: 0).getValueType(), V: Not);
646 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: SDLoc(Not), VT, N1: Not,
647 N2: V.getOperand(i: 1));
648 }
649 }
650
651 // Match not(SplatVector(not(X)) -> SplatVector(X).
652 if (V.getOpcode() == ISD::BUILD_VECTOR) {
653 if (SDValue SplatValue =
654 cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
655 if (!V->isOnlyUserOf(N: SplatValue.getNode()))
656 return SDValue();
657
658 if (SDValue Not = isNOT(V: SplatValue, DAG)) {
659 Not = DAG.getBitcast(VT: V.getOperand(i: 0).getValueType(), V: Not);
660 return DAG.getSplat(VT, DL: SDLoc(Not), Op: Not);
661 }
662 }
663 }
664
665 // Match not(or(not(X),not(Y))) -> and(X, Y).
666 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
667 V.getOperand(i: 0).hasOneUse() && V.getOperand(i: 1).hasOneUse()) {
668 // TODO: Handle cases with single NOT operand -> VANDN
669 if (SDValue Op1 = isNOT(V: V.getOperand(i: 1), DAG))
670 if (SDValue Op0 = isNOT(V: V.getOperand(i: 0), DAG))
671 return DAG.getNode(Opcode: ISD::AND, DL: SDLoc(V), VT, N1: DAG.getBitcast(VT, V: Op0),
672 N2: DAG.getBitcast(VT, V: Op1));
673 }
674
675 // TODO: Add more matching patterns. Such as,
676 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
677 // not(slt(C, X)) -> slt(X - 1, C)
678
679 return SDValue();
680}
681
682SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
683 SelectionDAG &DAG) const {
684 EVT VT = Op.getValueType();
685 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Val&: Op);
686 const APFloat &FPVal = CFP->getValueAPF();
687 SDLoc DL(CFP);
688
689 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
690 (VT == MVT::f64 && Subtarget.hasBasicD()));
691
692 // If value is 0.0 or -0.0, just ignore it.
693 if (FPVal.isZero())
694 return SDValue();
695
696 // If lsx enabled, use cheaper 'vldi' instruction if possible.
697 if (isFPImmVLDILegal(Imm: FPVal, VT))
698 return SDValue();
699
700 // Construct as integer, and move to float register.
701 APInt INTVal = FPVal.bitcastToAPInt();
702
703 // If more than MaterializeFPImmInsNum instructions will be used to
704 // generate the INTVal and move it to float register, fallback to
705 // use floating point load from the constant pool.
706 auto Seq = LoongArchMatInt::generateInstSeq(Val: INTVal.getSExtValue());
707 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
708 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(V: +1.0))
709 return SDValue();
710
711 switch (VT.getSimpleVT().SimpleTy) {
712 default:
713 llvm_unreachable("Unexpected floating point type!");
714 break;
715 case MVT::f32: {
716 SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i32);
717 if (Subtarget.is64Bit())
718 NewVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: NewVal);
719 return DAG.getNode(Opcode: Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
720 : LoongArchISD::MOVGR2FR_W,
721 DL, VT, Operand: NewVal);
722 }
723 case MVT::f64: {
724 if (Subtarget.is64Bit()) {
725 SDValue NewVal = DAG.getConstant(Val: INTVal, DL, VT: MVT::i64);
726 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D, DL, VT, Operand: NewVal);
727 }
728 SDValue Lo = DAG.getConstant(Val: INTVal.trunc(width: 32), DL, VT: MVT::i32);
729 SDValue Hi = DAG.getConstant(Val: INTVal.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
730 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, N1: Lo, N2: Hi);
731 }
732 }
733
734 return SDValue();
735}
736
737// Ensure SETCC result and operand have the same bit width; isel does not
738// support mismatched widths.
739SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
740 SelectionDAG &DAG) const {
741 SDLoc DL(Op);
742 EVT ResultVT = Op.getValueType();
743 EVT OperandVT = Op.getOperand(i: 0).getValueType();
744
745 EVT SetCCResultVT =
746 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT);
747
748 if (ResultVT == SetCCResultVT)
749 return Op;
750
751 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
752 "SETCC operands must have the same type!");
753
754 SDValue SetCCNode =
755 DAG.getNode(Opcode: ISD::SETCC, DL, VT: SetCCResultVT, N1: Op.getOperand(i: 0),
756 N2: Op.getOperand(i: 1), N3: Op.getOperand(i: 2));
757
758 if (ResultVT.bitsGT(VT: SetCCResultVT))
759 SetCCNode = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: ResultVT, Operand: SetCCNode);
760 else if (ResultVT.bitsLT(VT: SetCCResultVT))
761 SetCCNode = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: SetCCNode);
762
763 return SetCCNode;
764}
765
766// Lower vecreduce_add using vhaddw instructions.
767// For Example:
768// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
769// can be lowered to:
770// VHADDW_D_W vr0, vr0, vr0
771// VHADDW_Q_D vr0, vr0, vr0
772// VPICKVE2GR_D a0, vr0, 0
773// ADDI_W a0, a0, 0
774SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
775 SelectionDAG &DAG) const {
776
777 SDLoc DL(Op);
778 MVT OpVT = Op.getSimpleValueType();
779 SDValue Val = Op.getOperand(i: 0);
780
781 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
782 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
783 unsigned ResBits = OpVT.getScalarSizeInBits();
784
785 unsigned LegalVecSize = 128;
786 bool isLASX256Vector =
787 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
788
789 // Ensure operand type legal or enable it legal.
790 while (!isTypeLegal(VT: Val.getSimpleValueType())) {
791 Val = DAG.WidenVector(N: Val, DL);
792 }
793
794 // NumEles is designed for iterations count, v4i32 for LSX
795 // and v8i32 for LASX should have the same count.
796 if (isLASX256Vector) {
797 NumEles /= 2;
798 LegalVecSize = 256;
799 }
800
801 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
802 MVT IntTy = MVT::getIntegerVT(BitWidth: EleBits);
803 MVT VecTy = MVT::getVectorVT(VT: IntTy, NumElements: LegalVecSize / EleBits);
804 Val = DAG.getNode(Opcode: LoongArchISD::VHADDW, DL, VT: VecTy, N1: Val, N2: Val);
805 }
806
807 if (isLASX256Vector) {
808 SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: Val,
809 N2: DAG.getConstant(Val: 2, DL, VT: Subtarget.getGRLenVT()));
810 Val = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i64, N1: Tmp, N2: Val);
811 }
812
813 Val = DAG.getBitcast(VT: MVT::getVectorVT(VT: OpVT, NumElements: LegalVecSize / ResBits), V: Val);
814 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
815 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()));
816}
817
818// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
819// For Example:
820// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
821// can be lowered to:
822// VBSRL_V vr1, vr0, 8
823// VMAX_W vr0, vr1, vr0
824// VBSRL_V vr1, vr0, 4
825// VMAX_W vr0, vr1, vr0
826// VPICKVE2GR_W a0, vr0, 0
827// For 256 bit vector, it is illegal and will be spilt into
828// two 128 bit vector by default then processed by this.
829SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
830 SelectionDAG &DAG) const {
831 SDLoc DL(Op);
832
833 MVT OpVT = Op.getSimpleValueType();
834 SDValue Val = Op.getOperand(i: 0);
835
836 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
837 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
838
839 // Ensure operand type legal or enable it legal.
840 while (!isTypeLegal(VT: Val.getSimpleValueType())) {
841 Val = DAG.WidenVector(N: Val, DL);
842 }
843
844 unsigned Opcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Op.getOpcode());
845 MVT VecTy = Val.getSimpleValueType();
846 MVT GRLenVT = Subtarget.getGRLenVT();
847
848 for (int i = NumEles; i > 1; i /= 2) {
849 SDValue ShiftAmt = DAG.getConstant(Val: i * EleBits / 16, DL, VT: GRLenVT);
850 SDValue Tmp = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: VecTy, N1: Val, N2: ShiftAmt);
851 Val = DAG.getNode(Opcode, DL, VT: VecTy, N1: Tmp, N2: Val);
852 }
853
854 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: OpVT, N1: Val,
855 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
856}
857
858SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
859 SelectionDAG &DAG) const {
860 unsigned IsData = Op.getConstantOperandVal(i: 4);
861
862 // We don't support non-data prefetch.
863 // Just preserve the chain.
864 if (!IsData)
865 return Op.getOperand(i: 0);
866
867 return Op;
868}
869
870SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
871 SelectionDAG &DAG) const {
872 MVT VT = Op.getSimpleValueType();
873 assert(VT.isVector() && "Unexpected type");
874
875 SDLoc DL(Op);
876 SDValue R = Op.getOperand(i: 0);
877 SDValue Amt = Op.getOperand(i: 1);
878 unsigned Opcode = Op.getOpcode();
879 unsigned EltSizeInBits = VT.getScalarSizeInBits();
880
881 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
882 if (V.getOpcode() != ISD::BUILD_VECTOR)
883 return false;
884 if (SDValue SplatValue =
885 cast<BuildVectorSDNode>(Val: V.getNode())->getSplatValue()) {
886 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
887 CstSplatValue = C->getAPIntValue();
888 return true;
889 }
890 }
891 return false;
892 };
893
894 // Check for constant splat rotation amount.
895 APInt CstSplatValue;
896 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
897 bool isROTL = Opcode == ISD::ROTL;
898
899 // Check for splat rotate by zero.
900 if (IsCstSplat && CstSplatValue.urem(RHS: EltSizeInBits) == 0)
901 return R;
902
903 // LoongArch targets always prefer ISD::ROTR.
904 if (isROTL) {
905 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
906 return DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: R,
907 N2: DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Amt));
908 }
909
910 // Rotate by a immediate.
911 if (IsCstSplat) {
912 // ISD::ROTR: Attemp to rotate by a positive immediate.
913 SDValue Bits = DAG.getConstant(Val: EltSizeInBits, DL, VT);
914 if (SDValue Urem =
915 DAG.FoldConstantArithmetic(Opcode: ISD::UREM, DL, VT, Ops: {Amt, Bits}))
916 return DAG.getNode(Opcode, DL, VT, N1: R, N2: Urem);
917 }
918
919 return Op;
920}
921
922// Return true if Val is equal to (setcc LHS, RHS, CC).
923// Return false if Val is the inverse of (setcc LHS, RHS, CC).
924// Otherwise, return std::nullopt.
925static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
926 ISD::CondCode CC, SDValue Val) {
927 assert(Val->getOpcode() == ISD::SETCC);
928 SDValue LHS2 = Val.getOperand(i: 0);
929 SDValue RHS2 = Val.getOperand(i: 1);
930 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get();
931
932 if (LHS == LHS2 && RHS == RHS2) {
933 if (CC == CC2)
934 return true;
935 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
936 return false;
937 } else if (LHS == RHS2 && RHS == LHS2) {
938 CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
939 if (CC == CC2)
940 return true;
941 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
942 return false;
943 }
944
945 return std::nullopt;
946}
947
948static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
949 const LoongArchSubtarget &Subtarget) {
950 SDValue CondV = N->getOperand(Num: 0);
951 SDValue TrueV = N->getOperand(Num: 1);
952 SDValue FalseV = N->getOperand(Num: 2);
953 MVT VT = N->getSimpleValueType(ResNo: 0);
954 SDLoc DL(N);
955
956 // (select c, -1, y) -> -c | y
957 if (isAllOnesConstant(V: TrueV)) {
958 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
959 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
960 }
961 // (select c, y, -1) -> (c-1) | y
962 if (isAllOnesConstant(V: FalseV)) {
963 SDValue Neg =
964 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
965 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
966 }
967
968 // (select c, 0, y) -> (c-1) & y
969 if (isNullConstant(V: TrueV)) {
970 SDValue Neg =
971 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
972 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
973 }
974 // (select c, y, 0) -> -c & y
975 if (isNullConstant(V: FalseV)) {
976 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
977 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
978 }
979
980 // select c, ~x, x --> xor -c, x
981 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
982 const APInt &TrueVal = TrueV->getAsAPIntVal();
983 const APInt &FalseVal = FalseV->getAsAPIntVal();
984 if (~TrueVal == FalseVal) {
985 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
986 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
987 }
988 }
989
990 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
991 // when both truev and falsev are also setcc.
992 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
993 FalseV.getOpcode() == ISD::SETCC) {
994 SDValue LHS = CondV.getOperand(i: 0);
995 SDValue RHS = CondV.getOperand(i: 1);
996 ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
997
998 // (select x, x, y) -> x | y
999 // (select !x, x, y) -> x & y
1000 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
1001 return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
1002 N2: DAG.getFreeze(V: FalseV));
1003 }
1004 // (select x, y, x) -> x & y
1005 // (select !x, y, x) -> x | y
1006 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
1007 return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
1008 N1: DAG.getFreeze(V: TrueV), N2: FalseV);
1009 }
1010 }
1011
1012 return SDValue();
1013}
1014
1015// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1016// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1017// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1018// being `0` or `-1`. In such cases we can replace `select` with `and`.
1019// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1020// than `c0`?
1021static SDValue
1022foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
1023 const LoongArchSubtarget &Subtarget) {
1024 unsigned SelOpNo = 0;
1025 SDValue Sel = BO->getOperand(Num: 0);
1026 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1027 SelOpNo = 1;
1028 Sel = BO->getOperand(Num: 1);
1029 }
1030
1031 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1032 return SDValue();
1033
1034 unsigned ConstSelOpNo = 1;
1035 unsigned OtherSelOpNo = 2;
1036 if (!isa<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) {
1037 ConstSelOpNo = 2;
1038 OtherSelOpNo = 1;
1039 }
1040 SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo);
1041 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
1042 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1043 return SDValue();
1044
1045 SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1);
1046 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
1047 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1048 return SDValue();
1049
1050 SDLoc DL(Sel);
1051 EVT VT = BO->getValueType(ResNo: 0);
1052
1053 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1054 if (SelOpNo == 1)
1055 std::swap(a&: NewConstOps[0], b&: NewConstOps[1]);
1056
1057 SDValue NewConstOp =
1058 DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
1059 if (!NewConstOp)
1060 return SDValue();
1061
1062 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1063 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1064 return SDValue();
1065
1066 SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo);
1067 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1068 if (SelOpNo == 1)
1069 std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]);
1070 SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
1071
1072 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1073 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1074 return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF);
1075}
1076
1077// Changes the condition code and swaps operands if necessary, so the SetCC
1078// operation matches one of the comparisons supported directly by branches
1079// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1080// compare with 1/-1.
1081static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1082 ISD::CondCode &CC, SelectionDAG &DAG) {
1083 // If this is a single bit test that can't be handled by ANDI, shift the
1084 // bit to be tested to the MSB and perform a signed compare with 0.
1085 if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
1086 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1087 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) {
1088 uint64_t Mask = LHS.getConstantOperandVal(i: 1);
1089 if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) {
1090 unsigned ShAmt = 0;
1091 if (isPowerOf2_64(Value: Mask)) {
1092 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1093 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask);
1094 } else {
1095 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
1096 }
1097
1098 LHS = LHS.getOperand(i: 0);
1099 if (ShAmt != 0)
1100 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
1101 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
1102 return;
1103 }
1104 }
1105
1106 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
1107 int64_t C = RHSC->getSExtValue();
1108 switch (CC) {
1109 default:
1110 break;
1111 case ISD::SETGT:
1112 // Convert X > -1 to X >= 0.
1113 if (C == -1) {
1114 RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
1115 CC = ISD::SETGE;
1116 return;
1117 }
1118 break;
1119 case ISD::SETLT:
1120 // Convert X < 1 to 0 >= X.
1121 if (C == 1) {
1122 RHS = LHS;
1123 LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
1124 CC = ISD::SETGE;
1125 return;
1126 }
1127 break;
1128 }
1129 }
1130
1131 switch (CC) {
1132 default:
1133 break;
1134 case ISD::SETGT:
1135 case ISD::SETLE:
1136 case ISD::SETUGT:
1137 case ISD::SETULE:
1138 CC = ISD::getSetCCSwappedOperands(Operation: CC);
1139 std::swap(a&: LHS, b&: RHS);
1140 break;
1141 }
1142}
1143
1144SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1145 SelectionDAG &DAG) const {
1146 SDValue CondV = Op.getOperand(i: 0);
1147 SDValue TrueV = Op.getOperand(i: 1);
1148 SDValue FalseV = Op.getOperand(i: 2);
1149 SDLoc DL(Op);
1150 MVT VT = Op.getSimpleValueType();
1151 MVT GRLenVT = Subtarget.getGRLenVT();
1152
1153 if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
1154 return V;
1155
1156 if (Op.hasOneUse()) {
1157 unsigned UseOpc = Op->user_begin()->getOpcode();
1158 if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
1159 SDNode *BinOp = *Op->user_begin();
1160 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->user_begin(),
1161 DAG, Subtarget)) {
1162 DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
1163 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1164 // may return a constant node and cause crash in lowerSELECT.
1165 if (NewSel.getOpcode() == ISD::SELECT)
1166 return lowerSELECT(Op: NewSel, DAG);
1167 return NewSel;
1168 }
1169 }
1170 }
1171
1172 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1173 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1174 // (select condv, truev, falsev)
1175 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1176 if (CondV.getOpcode() != ISD::SETCC ||
1177 CondV.getOperand(i: 0).getSimpleValueType() != GRLenVT) {
1178 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: GRLenVT);
1179 SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
1180
1181 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1182
1183 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1184 }
1185
1186 // If the CondV is the output of a SETCC node which operates on GRLenVT
1187 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1188 // to take advantage of the integer compare+branch instructions. i.e.: (select
1189 // (setcc lhs, rhs, cc), truev, falsev)
1190 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1191 SDValue LHS = CondV.getOperand(i: 0);
1192 SDValue RHS = CondV.getOperand(i: 1);
1193 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1194
1195 // Special case for a select of 2 constants that have a difference of 1.
1196 // Normally this is done by DAGCombine, but if the select is introduced by
1197 // type legalization or op legalization, we miss it. Restricting to SETLT
1198 // case for now because that is what signed saturating add/sub need.
1199 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1200 // but we would probably want to swap the true/false values if the condition
1201 // is SETGE/SETLE to avoid an XORI.
1202 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
1203 CCVal == ISD::SETLT) {
1204 const APInt &TrueVal = TrueV->getAsAPIntVal();
1205 const APInt &FalseVal = FalseV->getAsAPIntVal();
1206 if (TrueVal - 1 == FalseVal)
1207 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
1208 if (TrueVal + 1 == FalseVal)
1209 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
1210 }
1211
1212 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1213 // 1 < x ? x : 1 -> 0 < x ? x : 1
1214 if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1215 RHS == TrueV && LHS == FalseV) {
1216 LHS = DAG.getConstant(Val: 0, DL, VT);
1217 // 0 <u x is the same as x != 0.
1218 if (CCVal == ISD::SETULT) {
1219 std::swap(a&: LHS, b&: RHS);
1220 CCVal = ISD::SETNE;
1221 }
1222 }
1223
1224 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1225 if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1226 RHS == FalseV) {
1227 RHS = DAG.getConstant(Val: 0, DL, VT);
1228 }
1229
1230 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1231
1232 if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
1233 // (select (setcc lhs, rhs, CC), constant, falsev)
1234 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1235 std::swap(a&: TrueV, b&: FalseV);
1236 TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
1237 }
1238
1239 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1240 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
1241}
1242
1243SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1244 SelectionDAG &DAG) const {
1245 SDValue CondV = Op.getOperand(i: 1);
1246 SDLoc DL(Op);
1247 MVT GRLenVT = Subtarget.getGRLenVT();
1248
1249 if (CondV.getOpcode() == ISD::SETCC) {
1250 if (CondV.getOperand(i: 0).getValueType() == GRLenVT) {
1251 SDValue LHS = CondV.getOperand(i: 0);
1252 SDValue RHS = CondV.getOperand(i: 1);
1253 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
1254
1255 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
1256
1257 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
1258 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1259 N1: Op.getOperand(i: 0), N2: LHS, N3: RHS, N4: TargetCC,
1260 N5: Op.getOperand(i: 2));
1261 } else if (CondV.getOperand(i: 0).getValueType().isFloatingPoint()) {
1262 return DAG.getNode(Opcode: LoongArchISD::BRCOND, DL, VT: Op.getValueType(),
1263 N1: Op.getOperand(i: 0), N2: CondV, N3: Op.getOperand(i: 2));
1264 }
1265 }
1266
1267 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: Op.getValueType(),
1268 N1: Op.getOperand(i: 0), N2: CondV, N3: DAG.getConstant(Val: 0, DL, VT: GRLenVT),
1269 N4: DAG.getCondCode(Cond: ISD::SETNE), N5: Op.getOperand(i: 2));
1270}
1271
1272SDValue
1273LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1274 SelectionDAG &DAG) const {
1275 SDLoc DL(Op);
1276 MVT OpVT = Op.getSimpleValueType();
1277
1278 SDValue Vector = DAG.getUNDEF(VT: OpVT);
1279 SDValue Val = Op.getOperand(i: 0);
1280 SDValue Idx = DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT());
1281
1282 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx);
1283}
1284
1285SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1286 SelectionDAG &DAG) const {
1287 EVT ResTy = Op->getValueType(ResNo: 0);
1288 SDValue Src = Op->getOperand(Num: 0);
1289 SDLoc DL(Op);
1290
1291 // LoongArchISD::BITREV_8B is not supported on LA32.
1292 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1293 return SDValue();
1294
1295 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1296 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1297 unsigned int NewEltNum = NewVT.getVectorNumElements();
1298
1299 SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src);
1300
1301 SmallVector<SDValue, 8> Ops;
1302 for (unsigned int i = 0; i < NewEltNum; i++) {
1303 SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc,
1304 N2: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
1305 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1306 ? (unsigned)LoongArchISD::BITREV_8B
1307 : (unsigned)ISD::BITREVERSE;
1308 Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op));
1309 }
1310 SDValue Res =
1311 DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops));
1312
1313 switch (ResTy.getSimpleVT().SimpleTy) {
1314 default:
1315 return SDValue();
1316 case MVT::v16i8:
1317 case MVT::v32i8:
1318 return Res;
1319 case MVT::v8i16:
1320 case MVT::v16i16:
1321 case MVT::v4i32:
1322 case MVT::v8i32: {
1323 SmallVector<int, 32> Mask;
1324 for (unsigned int i = 0; i < NewEltNum; i++)
1325 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1326 Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i);
1327 return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask);
1328 }
1329 }
1330}
1331
1332// Widen element type to get a new mask value (if possible).
1333// For example:
1334// shufflevector <4 x i32> %a, <4 x i32> %b,
1335// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1336// is equivalent to:
1337// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1338// can be lowered to:
1339// VPACKOD_D vr0, vr0, vr1
1340static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1341 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1342 unsigned EltBits = VT.getScalarSizeInBits();
1343
1344 if (EltBits > 32 || EltBits == 1)
1345 return SDValue();
1346
1347 SmallVector<int, 8> NewMask;
1348 if (widenShuffleMaskElts(M: Mask, NewMask)) {
1349 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * 2)
1350 : MVT::getIntegerVT(BitWidth: EltBits * 2);
1351 MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / 2);
1352 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) {
1353 SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1);
1354 SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2);
1355 return DAG.getBitcast(
1356 VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask));
1357 }
1358 }
1359
1360 return SDValue();
1361}
1362
1363/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1364/// instruction.
1365// The funciton matches elements from one of the input vector shuffled to the
1366// left or right with zeroable elements 'shifted in'. It handles both the
1367// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1368// lane.
1369// Mostly copied from X86.
1370static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1371 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1372 int MaskOffset, const APInt &Zeroable) {
1373 int Size = Mask.size();
1374 unsigned SizeInBits = Size * ScalarSizeInBits;
1375
1376 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1377 for (int i = 0; i < Size; i += Scale)
1378 for (int j = 0; j < Shift; ++j)
1379 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1380 return false;
1381
1382 return true;
1383 };
1384
1385 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1386 int Step = 1) {
1387 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1388 if (!(Mask[i] == -1 || Mask[i] == Low))
1389 return false;
1390 return true;
1391 };
1392
1393 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1394 for (int i = 0; i != Size; i += Scale) {
1395 unsigned Pos = Left ? i + Shift : i;
1396 unsigned Low = Left ? i : i + Shift;
1397 unsigned Len = Scale - Shift;
1398 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1399 return -1;
1400 }
1401
1402 int ShiftEltBits = ScalarSizeInBits * Scale;
1403 bool ByteShift = ShiftEltBits > 64;
1404 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1405 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1406 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1407
1408 // Normalize the scale for byte shifts to still produce an i64 element
1409 // type.
1410 Scale = ByteShift ? Scale / 2 : Scale;
1411
1412 // We need to round trip through the appropriate type for the shift.
1413 MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale);
1414 ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / 8)
1415 : MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale);
1416 return (int)ShiftAmt;
1417 };
1418
1419 unsigned MaxWidth = 128;
1420 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1421 for (int Shift = 1; Shift != Scale; ++Shift)
1422 for (bool Left : {true, false})
1423 if (CheckZeros(Shift, Scale, Left)) {
1424 int ShiftAmt = MatchShift(Shift, Scale, Left);
1425 if (0 < ShiftAmt)
1426 return ShiftAmt;
1427 }
1428
1429 // no match
1430 return -1;
1431}
1432
1433/// Lower VECTOR_SHUFFLE as shift (if possible).
1434///
1435/// For example:
1436/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1437/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1438/// is lowered to:
1439/// (VBSLL_V $v0, $v0, 4)
1440///
1441/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1442/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1443/// is lowered to:
1444/// (VSLLI_D $v0, $v0, 32)
1445static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1446 MVT VT, SDValue V1, SDValue V2,
1447 SelectionDAG &DAG,
1448 const LoongArchSubtarget &Subtarget,
1449 const APInt &Zeroable) {
1450 int Size = Mask.size();
1451 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1452
1453 MVT ShiftVT;
1454 SDValue V = V1;
1455 unsigned Opcode;
1456
1457 // Try to match shuffle against V1 shift.
1458 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1459 Mask, MaskOffset: 0, Zeroable);
1460
1461 // If V1 failed, try to match shuffle against V2 shift.
1462 if (ShiftAmt < 0) {
1463 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1464 Mask, MaskOffset: Size, Zeroable);
1465 V = V2;
1466 }
1467
1468 if (ShiftAmt < 0)
1469 return SDValue();
1470
1471 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1472 "Illegal integer vector type");
1473 V = DAG.getBitcast(VT: ShiftVT, V);
1474 V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V,
1475 N2: DAG.getConstant(Val: ShiftAmt, DL, VT: Subtarget.getGRLenVT()));
1476 return DAG.getBitcast(VT, V);
1477}
1478
1479/// Determine whether a range fits a regular pattern of values.
1480/// This function accounts for the possibility of jumping over the End iterator.
1481template <typename ValType>
1482static bool
1483fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1484 unsigned CheckStride,
1485 typename SmallVectorImpl<ValType>::const_iterator End,
1486 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1487 auto &I = Begin;
1488
1489 while (I != End) {
1490 if (*I != -1 && *I != ExpectedIndex)
1491 return false;
1492 ExpectedIndex += ExpectedIndexStride;
1493
1494 // Incrementing past End is undefined behaviour so we must increment one
1495 // step at a time and check for End at each step.
1496 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1497 ; // Empty loop body.
1498 }
1499 return true;
1500}
1501
1502/// Compute whether each element of a shuffle is zeroable.
1503///
1504/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1505static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1506 SDValue V2, APInt &KnownUndef,
1507 APInt &KnownZero) {
1508 int Size = Mask.size();
1509 KnownUndef = KnownZero = APInt::getZero(numBits: Size);
1510
1511 V1 = peekThroughBitcasts(V: V1);
1512 V2 = peekThroughBitcasts(V: V2);
1513
1514 bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode());
1515 bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode());
1516
1517 int VectorSizeInBits = V1.getValueSizeInBits();
1518 int ScalarSizeInBits = VectorSizeInBits / Size;
1519 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1520 (void)ScalarSizeInBits;
1521
1522 for (int i = 0; i < Size; ++i) {
1523 int M = Mask[i];
1524 if (M < 0) {
1525 KnownUndef.setBit(i);
1526 continue;
1527 }
1528 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1529 KnownZero.setBit(i);
1530 continue;
1531 }
1532 }
1533}
1534
1535/// Test whether a shuffle mask is equivalent within each sub-lane.
1536///
1537/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1538/// non-trivial to compute in the face of undef lanes. The representation is
1539/// suitable for use with existing 128-bit shuffles as entries from the second
1540/// vector have been remapped to [LaneSize, 2*LaneSize).
1541static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1542 ArrayRef<int> Mask,
1543 SmallVectorImpl<int> &RepeatedMask) {
1544 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1545 RepeatedMask.assign(NumElts: LaneSize, Elt: -1);
1546 int Size = Mask.size();
1547 for (int i = 0; i < Size; ++i) {
1548 assert(Mask[i] == -1 || Mask[i] >= 0);
1549 if (Mask[i] < 0)
1550 continue;
1551 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1552 // This entry crosses lanes, so there is no way to model this shuffle.
1553 return false;
1554
1555 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1556 // Adjust second vector indices to start at LaneSize instead of Size.
1557 int LocalM =
1558 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1559 if (RepeatedMask[i % LaneSize] < 0)
1560 // This is the first non-undef entry in this slot of a 128-bit lane.
1561 RepeatedMask[i % LaneSize] = LocalM;
1562 else if (RepeatedMask[i % LaneSize] != LocalM)
1563 // Found a mismatch with the repeated mask.
1564 return false;
1565 }
1566 return true;
1567}
1568
1569/// Attempts to match vector shuffle as byte rotation.
1570static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1571 ArrayRef<int> Mask) {
1572
1573 SDValue Lo, Hi;
1574 SmallVector<int, 16> RepeatedMask;
1575
1576 if (!isRepeatedShuffleMask(LaneSizeInBits: 128, VT, Mask, RepeatedMask))
1577 return -1;
1578
1579 int NumElts = RepeatedMask.size();
1580 int Rotation = 0;
1581 int Scale = 16 / NumElts;
1582
1583 for (int i = 0; i < NumElts; ++i) {
1584 int M = RepeatedMask[i];
1585 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1586 "Unexpected mask index.");
1587 if (M < 0)
1588 continue;
1589
1590 // Determine where a rotated vector would have started.
1591 int StartIdx = i - (M % NumElts);
1592 if (StartIdx == 0)
1593 return -1;
1594
1595 // If we found the tail of a vector the rotation must be the missing
1596 // front. If we found the head of a vector, it must be how much of the
1597 // head.
1598 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1599
1600 if (Rotation == 0)
1601 Rotation = CandidateRotation;
1602 else if (Rotation != CandidateRotation)
1603 return -1;
1604
1605 // Compute which value this mask is pointing at.
1606 SDValue MaskV = M < NumElts ? V1 : V2;
1607
1608 // Compute which of the two target values this index should be assigned
1609 // to. This reflects whether the high elements are remaining or the low
1610 // elements are remaining.
1611 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1612
1613 // Either set up this value if we've not encountered it before, or check
1614 // that it remains consistent.
1615 if (!TargetV)
1616 TargetV = MaskV;
1617 else if (TargetV != MaskV)
1618 return -1;
1619 }
1620
1621 // Check that we successfully analyzed the mask, and normalize the results.
1622 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1623 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1624 if (!Lo)
1625 Lo = Hi;
1626 else if (!Hi)
1627 Hi = Lo;
1628
1629 V1 = Lo;
1630 V2 = Hi;
1631
1632 return Rotation * Scale;
1633}
1634
1635/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1636///
1637/// For example:
1638/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1639/// <2 x i32> <i32 3, i32 0>
1640/// is lowered to:
1641/// (VBSRL_V $v1, $v1, 8)
1642/// (VBSLL_V $v0, $v0, 8)
1643/// (VOR_V $v0, $V0, $v1)
1644static SDValue
1645lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1646 SDValue V1, SDValue V2, SelectionDAG &DAG,
1647 const LoongArchSubtarget &Subtarget) {
1648
1649 SDValue Lo = V1, Hi = V2;
1650 int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask);
1651 if (ByteRotation <= 0)
1652 return SDValue();
1653
1654 MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / 8);
1655 Lo = DAG.getBitcast(VT: ByteVT, V: Lo);
1656 Hi = DAG.getBitcast(VT: ByteVT, V: Hi);
1657
1658 int LoByteShift = 16 - ByteRotation;
1659 int HiByteShift = ByteRotation;
1660 MVT GRLenVT = Subtarget.getGRLenVT();
1661
1662 SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo,
1663 N2: DAG.getConstant(Val: LoByteShift, DL, VT: GRLenVT));
1664 SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi,
1665 N2: DAG.getConstant(Val: HiByteShift, DL, VT: GRLenVT));
1666 return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift));
1667}
1668
1669/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1670///
1671/// For example:
1672/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1673/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1674/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1675/// is lowered to:
1676/// (VREPLI $v1, 0)
1677/// (VILVL $v0, $v1, $v0)
1678static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1679 ArrayRef<int> Mask, MVT VT,
1680 SDValue V1, SDValue V2,
1681 SelectionDAG &DAG,
1682 const APInt &Zeroable) {
1683 int Bits = VT.getSizeInBits();
1684 int EltBits = VT.getScalarSizeInBits();
1685 int NumElements = VT.getVectorNumElements();
1686
1687 if (Zeroable.isAllOnes())
1688 return DAG.getConstant(Val: 0, DL, VT);
1689
1690 // Define a helper function to check a particular ext-scale and lower to it if
1691 // valid.
1692 auto Lower = [&](int Scale) -> SDValue {
1693 SDValue InputV;
1694 bool AnyExt = true;
1695 int Offset = 0;
1696 for (int i = 0; i < NumElements; i++) {
1697 int M = Mask[i];
1698 if (M < 0)
1699 continue;
1700 if (i % Scale != 0) {
1701 // Each of the extended elements need to be zeroable.
1702 if (!Zeroable[i])
1703 return SDValue();
1704
1705 AnyExt = false;
1706 continue;
1707 }
1708
1709 // Each of the base elements needs to be consecutive indices into the
1710 // same input vector.
1711 SDValue V = M < NumElements ? V1 : V2;
1712 M = M % NumElements;
1713 if (!InputV) {
1714 InputV = V;
1715 Offset = M - (i / Scale);
1716
1717 // These offset can't be handled
1718 if (Offset % (NumElements / Scale))
1719 return SDValue();
1720 } else if (InputV != V)
1721 return SDValue();
1722
1723 if (M != (Offset + (i / Scale)))
1724 return SDValue(); // Non-consecutive strided elements.
1725 }
1726
1727 // If we fail to find an input, we have a zero-shuffle which should always
1728 // have already been handled.
1729 if (!InputV)
1730 return SDValue();
1731
1732 do {
1733 unsigned VilVLoHi = LoongArchISD::VILVL;
1734 if (Offset >= (NumElements / 2)) {
1735 VilVLoHi = LoongArchISD::VILVH;
1736 Offset -= (NumElements / 2);
1737 }
1738
1739 MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements);
1740 SDValue Ext =
1741 AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: 0, DL, VT: InputVT);
1742 InputV = DAG.getBitcast(VT: InputVT, V: InputV);
1743 InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV);
1744 Scale /= 2;
1745 EltBits *= 2;
1746 NumElements /= 2;
1747 } while (Scale > 1);
1748 return DAG.getBitcast(VT, V: InputV);
1749 };
1750
1751 // Each iteration, try extending the elements half as much, but into twice as
1752 // many elements.
1753 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1754 NumExtElements *= 2) {
1755 if (SDValue V = Lower(NumElements / NumExtElements))
1756 return V;
1757 }
1758 return SDValue();
1759}
1760
1761/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1762///
1763/// VREPLVEI performs vector broadcast based on an element specified by an
1764/// integer immediate, with its mask being similar to:
1765/// <x, x, x, ...>
1766/// where x is any valid index.
1767///
1768/// When undef's appear in the mask they are treated as if they were whatever
1769/// value is necessary in order to fit the above form.
1770static SDValue
1771lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1772 SDValue V1, SelectionDAG &DAG,
1773 const LoongArchSubtarget &Subtarget) {
1774 int SplatIndex = -1;
1775 for (const auto &M : Mask) {
1776 if (M != -1) {
1777 SplatIndex = M;
1778 break;
1779 }
1780 }
1781
1782 if (SplatIndex == -1)
1783 return DAG.getUNDEF(VT);
1784
1785 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1786 if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) {
1787 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1788 N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
1789 }
1790
1791 return SDValue();
1792}
1793
1794/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1795///
1796/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1797/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1798///
1799/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1800/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1801/// When undef's appear they are treated as if they were whatever value is
1802/// necessary in order to fit the above forms.
1803///
1804/// For example:
1805/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1806/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1807/// i32 7, i32 6, i32 5, i32 4>
1808/// is lowered to:
1809/// (VSHUF4I_H $v0, $v1, 27)
1810/// where the 27 comes from:
1811/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1812static SDValue
1813lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1814 SDValue V1, SDValue V2, SelectionDAG &DAG,
1815 const LoongArchSubtarget &Subtarget) {
1816
1817 unsigned SubVecSize = 4;
1818 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1819 SubVecSize = 2;
1820
1821 int SubMask[4] = {-1, -1, -1, -1};
1822 for (unsigned i = 0; i < SubVecSize; ++i) {
1823 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1824 int M = Mask[j];
1825
1826 // Convert from vector index to 4-element subvector index
1827 // If an index refers to an element outside of the subvector then give up
1828 if (M != -1) {
1829 M -= 4 * (j / SubVecSize);
1830 if (M < 0 || M >= 4)
1831 return SDValue();
1832 }
1833
1834 // If the mask has an undef, replace it with the current index.
1835 // Note that it might still be undef if the current index is also undef
1836 if (SubMask[i] == -1)
1837 SubMask[i] = M;
1838 // Check that non-undef values are the same as in the mask. If they
1839 // aren't then give up
1840 else if (M != -1 && M != SubMask[i])
1841 return SDValue();
1842 }
1843 }
1844
1845 // Calculate the immediate. Replace any remaining undefs with zero
1846 int Imm = 0;
1847 for (int i = SubVecSize - 1; i >= 0; --i) {
1848 int M = SubMask[i];
1849
1850 if (M == -1)
1851 M = 0;
1852
1853 Imm <<= 2;
1854 Imm |= M & 0x3;
1855 }
1856
1857 MVT GRLenVT = Subtarget.getGRLenVT();
1858
1859 // Return vshuf4i.d
1860 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1861 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I_D, DL, VT, N1: V1, N2: V2,
1862 N3: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
1863
1864 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
1865 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
1866}
1867
1868/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1869///
1870/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1871/// reverse whose mask likes:
1872/// <7, 6, 5, 4, 3, 2, 1, 0>
1873///
1874/// When undef's appear in the mask they are treated as if they were whatever
1875/// value is necessary in order to fit the above forms.
1876static SDValue
1877lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1878 SDValue V1, SelectionDAG &DAG,
1879 const LoongArchSubtarget &Subtarget) {
1880 // Only vectors with i8/i16 elements which cannot match other patterns
1881 // directly needs to do this.
1882 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1883 VT != MVT::v16i16)
1884 return SDValue();
1885
1886 if (!ShuffleVectorInst::isReverseMask(Mask, NumSrcElts: Mask.size()))
1887 return SDValue();
1888
1889 int WidenNumElts = VT.getVectorNumElements() / 4;
1890 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1891 for (int i = 0; i < WidenNumElts; ++i)
1892 WidenMask[i] = WidenNumElts - 1 - i;
1893
1894 MVT WidenVT = MVT::getVectorVT(
1895 VT: VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, NumElements: WidenNumElts);
1896 SDValue NewV1 = DAG.getBitcast(VT: WidenVT, V: V1);
1897 SDValue WidenRev = DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: NewV1,
1898 N2: DAG.getUNDEF(VT: WidenVT), Mask: WidenMask);
1899
1900 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT,
1901 N1: DAG.getBitcast(VT, V: WidenRev),
1902 N2: DAG.getConstant(Val: 27, DL, VT: Subtarget.getGRLenVT()));
1903}
1904
1905/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1906///
1907/// VPACKEV interleaves the even elements from each vector.
1908///
1909/// It is possible to lower into VPACKEV when the mask consists of two of the
1910/// following forms interleaved:
1911/// <0, 2, 4, ...>
1912/// <n, n+2, n+4, ...>
1913/// where n is the number of elements in the vector.
1914/// For example:
1915/// <0, 0, 2, 2, 4, 4, ...>
1916/// <0, n, 2, n+2, 4, n+4, ...>
1917///
1918/// When undef's appear in the mask they are treated as if they were whatever
1919/// value is necessary in order to fit the above forms.
1920static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1921 MVT VT, SDValue V1, SDValue V2,
1922 SelectionDAG &DAG) {
1923
1924 const auto &Begin = Mask.begin();
1925 const auto &End = Mask.end();
1926 SDValue OriV1 = V1, OriV2 = V2;
1927
1928 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1929 V1 = OriV1;
1930 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1931 V1 = OriV2;
1932 else
1933 return SDValue();
1934
1935 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1936 V2 = OriV1;
1937 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1938 V2 = OriV2;
1939 else
1940 return SDValue();
1941
1942 return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
1943}
1944
1945/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1946///
1947/// VPACKOD interleaves the odd elements from each vector.
1948///
1949/// It is possible to lower into VPACKOD when the mask consists of two of the
1950/// following forms interleaved:
1951/// <1, 3, 5, ...>
1952/// <n+1, n+3, n+5, ...>
1953/// where n is the number of elements in the vector.
1954/// For example:
1955/// <1, 1, 3, 3, 5, 5, ...>
1956/// <1, n+1, 3, n+3, 5, n+5, ...>
1957///
1958/// When undef's appear in the mask they are treated as if they were whatever
1959/// value is necessary in order to fit the above forms.
1960static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1961 MVT VT, SDValue V1, SDValue V2,
1962 SelectionDAG &DAG) {
1963
1964 const auto &Begin = Mask.begin();
1965 const auto &End = Mask.end();
1966 SDValue OriV1 = V1, OriV2 = V2;
1967
1968 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1969 V1 = OriV1;
1970 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1971 V1 = OriV2;
1972 else
1973 return SDValue();
1974
1975 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1976 V2 = OriV1;
1977 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1978 V2 = OriV2;
1979 else
1980 return SDValue();
1981
1982 return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
1983}
1984
1985/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1986///
1987/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1988/// of each vector.
1989///
1990/// It is possible to lower into VILVH when the mask consists of two of the
1991/// following forms interleaved:
1992/// <x, x+1, x+2, ...>
1993/// <n+x, n+x+1, n+x+2, ...>
1994/// where n is the number of elements in the vector and x is half n.
1995/// For example:
1996/// <x, x, x+1, x+1, x+2, x+2, ...>
1997/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1998///
1999/// When undef's appear in the mask they are treated as if they were whatever
2000/// value is necessary in order to fit the above forms.
2001static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
2002 MVT VT, SDValue V1, SDValue V2,
2003 SelectionDAG &DAG) {
2004
2005 const auto &Begin = Mask.begin();
2006 const auto &End = Mask.end();
2007 unsigned HalfSize = Mask.size() / 2;
2008 SDValue OriV1 = V1, OriV2 = V2;
2009
2010 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2011 V1 = OriV1;
2012 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2013 V1 = OriV2;
2014 else
2015 return SDValue();
2016
2017 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2018 V2 = OriV1;
2019 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize,
2020 ExpectedIndexStride: 1))
2021 V2 = OriV2;
2022 else
2023 return SDValue();
2024
2025 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2026}
2027
2028/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2029///
2030/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2031/// of each vector.
2032///
2033/// It is possible to lower into VILVL when the mask consists of two of the
2034/// following forms interleaved:
2035/// <0, 1, 2, ...>
2036/// <n, n+1, n+2, ...>
2037/// where n is the number of elements in the vector.
2038/// For example:
2039/// <0, 0, 1, 1, 2, 2, ...>
2040/// <0, n, 1, n+1, 2, n+2, ...>
2041///
2042/// When undef's appear in the mask they are treated as if they were whatever
2043/// value is necessary in order to fit the above forms.
2044static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047
2048 const auto &Begin = Mask.begin();
2049 const auto &End = Mask.end();
2050 SDValue OriV1 = V1, OriV2 = V2;
2051
2052 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2053 V1 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
2055 V1 = OriV2;
2056 else
2057 return SDValue();
2058
2059 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2060 V2 = OriV1;
2061 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
2062 V2 = OriV2;
2063 else
2064 return SDValue();
2065
2066 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2067}
2068
2069/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2070///
2071/// VPICKEV copies the even elements of each vector into the result vector.
2072///
2073/// It is possible to lower into VPICKEV when the mask consists of two of the
2074/// following forms concatenated:
2075/// <0, 2, 4, ...>
2076/// <n, n+2, n+4, ...>
2077/// where n is the number of elements in the vector.
2078/// For example:
2079/// <0, 2, 4, ..., 0, 2, 4, ...>
2080/// <0, 2, 4, ..., n, n+2, n+4, ...>
2081///
2082/// When undef's appear in the mask they are treated as if they were whatever
2083/// value is necessary in order to fit the above forms.
2084static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2085 MVT VT, SDValue V1, SDValue V2,
2086 SelectionDAG &DAG) {
2087
2088 const auto &Begin = Mask.begin();
2089 const auto &Mid = Mask.begin() + Mask.size() / 2;
2090 const auto &End = Mask.end();
2091 SDValue OriV1 = V1, OriV2 = V2;
2092
2093 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
2094 V1 = OriV1;
2095 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2096 V1 = OriV2;
2097 else
2098 return SDValue();
2099
2100 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2101 V2 = OriV1;
2102 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
2103 V2 = OriV2;
2104
2105 else
2106 return SDValue();
2107
2108 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2109}
2110
2111/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2112///
2113/// VPICKOD copies the odd elements of each vector into the result vector.
2114///
2115/// It is possible to lower into VPICKOD when the mask consists of two of the
2116/// following forms concatenated:
2117/// <1, 3, 5, ...>
2118/// <n+1, n+3, n+5, ...>
2119/// where n is the number of elements in the vector.
2120/// For example:
2121/// <1, 3, 5, ..., 1, 3, 5, ...>
2122/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2123///
2124/// When undef's appear in the mask they are treated as if they were whatever
2125/// value is necessary in order to fit the above forms.
2126static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2127 MVT VT, SDValue V1, SDValue V2,
2128 SelectionDAG &DAG) {
2129
2130 const auto &Begin = Mask.begin();
2131 const auto &Mid = Mask.begin() + Mask.size() / 2;
2132 const auto &End = Mask.end();
2133 SDValue OriV1 = V1, OriV2 = V2;
2134
2135 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
2136 V1 = OriV1;
2137 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2138 V1 = OriV2;
2139 else
2140 return SDValue();
2141
2142 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2143 V2 = OriV1;
2144 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
2145 V2 = OriV2;
2146 else
2147 return SDValue();
2148
2149 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2150}
2151
2152/// Lower VECTOR_SHUFFLE into VSHUF.
2153///
2154/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2155/// adding it as an operand to the resulting VSHUF.
2156static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2157 MVT VT, SDValue V1, SDValue V2,
2158 SelectionDAG &DAG,
2159 const LoongArchSubtarget &Subtarget) {
2160
2161 SmallVector<SDValue, 16> Ops;
2162 for (auto M : Mask)
2163 Ops.push_back(Elt: DAG.getSignedConstant(Val: M, DL, VT: Subtarget.getGRLenVT()));
2164
2165 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2166 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
2167
2168 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2169 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2170 // VSHF concatenates the vectors in a bitwise fashion:
2171 // <0b00, 0b01> + <0b10, 0b11> ->
2172 // 0b0100 + 0b1110 -> 0b01001110
2173 // <0b10, 0b11, 0b00, 0b01>
2174 // We must therefore swap the operands to get the correct result.
2175 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2176}
2177
2178/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2179///
2180/// This routine breaks down the specific type of 128-bit shuffle and
2181/// dispatches to the lowering routines accordingly.
2182static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2183 SDValue V1, SDValue V2, SelectionDAG &DAG,
2184 const LoongArchSubtarget &Subtarget) {
2185 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2186 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2187 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2188 "Vector type is unsupported for lsx!");
2189 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2190 "Two operands have different types!");
2191 assert(VT.getVectorNumElements() == Mask.size() &&
2192 "Unexpected mask size for shuffle!");
2193 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2194
2195 APInt KnownUndef, KnownZero;
2196 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2197 APInt Zeroable = KnownUndef | KnownZero;
2198
2199 SDValue Result;
2200 // TODO: Add more comparison patterns.
2201 if (V2.isUndef()) {
2202 if ((Result =
2203 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2204 return Result;
2205 if ((Result =
2206 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2207 return Result;
2208 if ((Result =
2209 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2210 return Result;
2211
2212 // TODO: This comment may be enabled in the future to better match the
2213 // pattern for instruction selection.
2214 /* V2 = V1; */
2215 }
2216
2217 // It is recommended not to change the pattern comparison order for better
2218 // performance.
2219 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2220 return Result;
2221 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2222 return Result;
2223 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2224 return Result;
2225 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2226 return Result;
2227 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2228 return Result;
2229 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2230 return Result;
2231 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2232 (Result =
2233 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2234 return Result;
2235 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2236 Zeroable)))
2237 return Result;
2238 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2239 Zeroable)))
2240 return Result;
2241 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2242 Subtarget)))
2243 return Result;
2244 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2245 return NewShuffle;
2246 if ((Result =
2247 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2248 return Result;
2249 return SDValue();
2250}
2251
2252/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2253///
2254/// It is a XVREPLVEI when the mask is:
2255/// <x, x, x, ..., x+n, x+n, x+n, ...>
2256/// where the number of x is equal to n and n is half the length of vector.
2257///
2258/// When undef's appear in the mask they are treated as if they were whatever
2259/// value is necessary in order to fit the above form.
2260static SDValue
2261lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2262 SDValue V1, SelectionDAG &DAG,
2263 const LoongArchSubtarget &Subtarget) {
2264 int SplatIndex = -1;
2265 for (const auto &M : Mask) {
2266 if (M != -1) {
2267 SplatIndex = M;
2268 break;
2269 }
2270 }
2271
2272 if (SplatIndex == -1)
2273 return DAG.getUNDEF(VT);
2274
2275 const auto &Begin = Mask.begin();
2276 const auto &End = Mask.end();
2277 int HalfSize = Mask.size() / 2;
2278
2279 if (SplatIndex >= HalfSize)
2280 return SDValue();
2281
2282 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2283 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) &&
2284 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize,
2285 ExpectedIndexStride: 0)) {
2286 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
2287 N2: DAG.getConstant(Val: SplatIndex, DL, VT: Subtarget.getGRLenVT()));
2288 }
2289
2290 return SDValue();
2291}
2292
2293/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2294static SDValue
2295lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2296 SDValue V1, SDValue V2, SelectionDAG &DAG,
2297 const LoongArchSubtarget &Subtarget) {
2298 // When the size is less than or equal to 4, lower cost instructions may be
2299 // used.
2300 if (Mask.size() <= 4)
2301 return SDValue();
2302 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2303}
2304
2305/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2306static SDValue
2307lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2308 SDValue V1, SelectionDAG &DAG,
2309 const LoongArchSubtarget &Subtarget) {
2310 // Only consider XVPERMI_D.
2311 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2312 return SDValue();
2313
2314 unsigned MaskImm = 0;
2315 for (unsigned i = 0; i < Mask.size(); ++i) {
2316 if (Mask[i] == -1)
2317 continue;
2318 MaskImm |= Mask[i] << (i * 2);
2319 }
2320
2321 return DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT, N1: V1,
2322 N2: DAG.getConstant(Val: MaskImm, DL, VT: Subtarget.getGRLenVT()));
2323}
2324
2325/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2326static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
2327 MVT VT, SDValue V1, SelectionDAG &DAG,
2328 const LoongArchSubtarget &Subtarget) {
2329 // LoongArch LASX only have XVPERM_W.
2330 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2331 return SDValue();
2332
2333 unsigned NumElts = VT.getVectorNumElements();
2334 unsigned HalfSize = NumElts / 2;
2335 bool FrontLo = true, FrontHi = true;
2336 bool BackLo = true, BackHi = true;
2337
2338 auto inRange = [](int val, int low, int high) {
2339 return (val == -1) || (val >= low && val < high);
2340 };
2341
2342 for (unsigned i = 0; i < HalfSize; ++i) {
2343 int Fronti = Mask[i];
2344 int Backi = Mask[i + HalfSize];
2345
2346 FrontLo &= inRange(Fronti, 0, HalfSize);
2347 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2348 BackLo &= inRange(Backi, 0, HalfSize);
2349 BackHi &= inRange(Backi, HalfSize, NumElts);
2350 }
2351
2352 // If both the lower and upper 128-bit parts access only one half of the
2353 // vector (either lower or upper), avoid using xvperm.w. The latency of
2354 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2355 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2356 return SDValue();
2357
2358 SmallVector<SDValue, 8> Masks;
2359 MVT GRLenVT = Subtarget.getGRLenVT();
2360 for (unsigned i = 0; i < NumElts; ++i)
2361 Masks.push_back(Elt: Mask[i] == -1 ? DAG.getUNDEF(VT: GRLenVT)
2362 : DAG.getConstant(Val: Mask[i], DL, VT: GRLenVT));
2363 SDValue MaskVec = DAG.getBuildVector(VT: MVT::v8i32, DL, Ops: Masks);
2364
2365 return DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT, N1: V1, N2: MaskVec);
2366}
2367
2368/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2369static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
2370 MVT VT, SDValue V1, SDValue V2,
2371 SelectionDAG &DAG) {
2372 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2373}
2374
2375/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2376static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
2377 MVT VT, SDValue V1, SDValue V2,
2378 SelectionDAG &DAG) {
2379 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2380}
2381
2382/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2383static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
2384 MVT VT, SDValue V1, SDValue V2,
2385 SelectionDAG &DAG) {
2386
2387 const auto &Begin = Mask.begin();
2388 const auto &End = Mask.end();
2389 unsigned HalfSize = Mask.size() / 2;
2390 unsigned LeftSize = HalfSize / 2;
2391 SDValue OriV1 = V1, OriV2 = V2;
2392
2393 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2394 ExpectedIndexStride: 1) &&
2395 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1))
2396 V1 = OriV1;
2397 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize,
2398 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
2399 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
2400 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
2401 V1 = OriV2;
2402 else
2403 return SDValue();
2404
2405 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
2406 ExpectedIndexStride: 1) &&
2407 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize,
2408 ExpectedIndexStride: 1))
2409 V2 = OriV1;
2410 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize,
2411 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
2412 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
2413 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
2414 V2 = OriV2;
2415 else
2416 return SDValue();
2417
2418 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
2419}
2420
2421/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2422static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
2423 MVT VT, SDValue V1, SDValue V2,
2424 SelectionDAG &DAG) {
2425
2426 const auto &Begin = Mask.begin();
2427 const auto &End = Mask.end();
2428 unsigned HalfSize = Mask.size() / 2;
2429 SDValue OriV1 = V1, OriV2 = V2;
2430
2431 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
2432 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2433 V1 = OriV1;
2434 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) &&
2435 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
2436 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2437 V1 = OriV2;
2438 else
2439 return SDValue();
2440
2441 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
2442 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2443 V2 = OriV1;
2444 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(),
2445 ExpectedIndexStride: 1) &&
2446 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
2447 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
2448 V2 = OriV2;
2449 else
2450 return SDValue();
2451
2452 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
2453}
2454
2455/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2456static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
2457 MVT VT, SDValue V1, SDValue V2,
2458 SelectionDAG &DAG) {
2459
2460 const auto &Begin = Mask.begin();
2461 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2462 const auto &Mid = Mask.begin() + Mask.size() / 2;
2463 const auto &RightMid = Mask.end() - Mask.size() / 4;
2464 const auto &End = Mask.end();
2465 unsigned HalfSize = Mask.size() / 2;
2466 SDValue OriV1 = V1, OriV2 = V2;
2467
2468 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
2469 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
2470 V1 = OriV1;
2471 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
2472 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
2473 V1 = OriV2;
2474 else
2475 return SDValue();
2476
2477 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
2478 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
2479 V2 = OriV1;
2480 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
2481 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
2482 V2 = OriV2;
2483
2484 else
2485 return SDValue();
2486
2487 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
2488}
2489
2490/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2491static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
2492 MVT VT, SDValue V1, SDValue V2,
2493 SelectionDAG &DAG) {
2494
2495 const auto &Begin = Mask.begin();
2496 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2497 const auto &Mid = Mask.begin() + Mask.size() / 2;
2498 const auto &RightMid = Mask.end() - Mask.size() / 4;
2499 const auto &End = Mask.end();
2500 unsigned HalfSize = Mask.size() / 2;
2501 SDValue OriV1 = V1, OriV2 = V2;
2502
2503 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
2504 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
2505 V1 = OriV1;
2506 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
2507 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1,
2508 ExpectedIndexStride: 2))
2509 V1 = OriV2;
2510 else
2511 return SDValue();
2512
2513 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
2514 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
2515 V2 = OriV1;
2516 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
2517 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1,
2518 ExpectedIndexStride: 2))
2519 V2 = OriV2;
2520 else
2521 return SDValue();
2522
2523 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
2524}
2525
2526/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2527static SDValue
2528lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2529 SDValue V1, SDValue V2, SelectionDAG &DAG,
2530 const LoongArchSubtarget &Subtarget) {
2531 // LoongArch LASX only supports xvinsve0.{w/d}.
2532 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2533 VT != MVT::v4f64)
2534 return SDValue();
2535
2536 MVT GRLenVT = Subtarget.getGRLenVT();
2537 int MaskSize = Mask.size();
2538 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2539
2540 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2541 // all other elements are either 'Base + i' or undef (-1). On success, return
2542 // the index of the replaced element. Otherwise, just return -1.
2543 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2544 int Idx = -1;
2545 for (int i = 0; i < MaskSize; ++i) {
2546 if (Mask[i] == Base + i || Mask[i] == -1)
2547 continue;
2548 if (Mask[i] != Replaced)
2549 return -1;
2550 if (Idx == -1)
2551 Idx = i;
2552 else
2553 return -1;
2554 }
2555 return Idx;
2556 };
2557
2558 // Case 1: the lowest element of V2 replaces one element in V1.
2559 int Idx = checkReplaceOne(0, MaskSize);
2560 if (Idx != -1)
2561 return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V1, N2: V2,
2562 N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
2563
2564 // Case 2: the lowest element of V1 replaces one element in V2.
2565 Idx = checkReplaceOne(MaskSize, 0);
2566 if (Idx != -1)
2567 return DAG.getNode(Opcode: LoongArchISD::XVINSVE0, DL, VT, N1: V2, N2: V1,
2568 N3: DAG.getConstant(Val: Idx, DL, VT: GRLenVT));
2569
2570 return SDValue();
2571}
2572
2573/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2574static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2575 MVT VT, SDValue V1, SDValue V2,
2576 SelectionDAG &DAG) {
2577
2578 int MaskSize = Mask.size();
2579 int HalfSize = Mask.size() / 2;
2580 const auto &Begin = Mask.begin();
2581 const auto &Mid = Mask.begin() + HalfSize;
2582 const auto &End = Mask.end();
2583
2584 // VECTOR_SHUFFLE concatenates the vectors:
2585 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2586 // shuffling ->
2587 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2588 //
2589 // XVSHUF concatenates the vectors:
2590 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2591 // shuffling ->
2592 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2593 SmallVector<SDValue, 8> MaskAlloc;
2594 for (auto it = Begin; it < Mid; it++) {
2595 if (*it < 0) // UNDEF
2596 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
2597 else if ((*it >= 0 && *it < HalfSize) ||
2598 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2599 int M = *it < HalfSize ? *it : *it - HalfSize;
2600 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2601 } else
2602 return SDValue();
2603 }
2604 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2605
2606 for (auto it = Mid; it < End; it++) {
2607 if (*it < 0) // UNDEF
2608 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
2609 else if ((*it >= HalfSize && *it < MaskSize) ||
2610 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2611 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2612 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2613 } else
2614 return SDValue();
2615 }
2616 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2617
2618 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2619 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
2620 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2621}
2622
2623/// Shuffle vectors by lane to generate more optimized instructions.
2624/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2625///
2626/// Therefore, except for the following four cases, other cases are regarded
2627/// as cross-lane shuffles, where optimization is relatively limited.
2628///
2629/// - Shuffle high, low lanes of two inputs vector
2630/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2631/// - Shuffle low, high lanes of two inputs vector
2632/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2633/// - Shuffle low, low lanes of two inputs vector
2634/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2635/// - Shuffle high, high lanes of two inputs vector
2636/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2637///
2638/// The first case is the closest to LoongArch instructions and the other
2639/// cases need to be converted to it for processing.
2640///
2641/// This function will return true for the last three cases above and will
2642/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2643/// cross-lane shuffle cases.
2644static bool canonicalizeShuffleVectorByLane(
2645 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2646 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2647
2648 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2649
2650 int MaskSize = Mask.size();
2651 int HalfSize = Mask.size() / 2;
2652 MVT GRLenVT = Subtarget.getGRLenVT();
2653
2654 HalfMaskType preMask = None, postMask = None;
2655
2656 if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2657 return M < 0 || (M >= 0 && M < HalfSize) ||
2658 (M >= MaskSize && M < MaskSize + HalfSize);
2659 }))
2660 preMask = HighLaneTy;
2661 else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2662 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2663 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2664 }))
2665 preMask = LowLaneTy;
2666
2667 if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2668 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2669 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2670 }))
2671 postMask = LowLaneTy;
2672 else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2673 return M < 0 || (M >= 0 && M < HalfSize) ||
2674 (M >= MaskSize && M < MaskSize + HalfSize);
2675 }))
2676 postMask = HighLaneTy;
2677
2678 // The pre-half of mask is high lane type, and the post-half of mask
2679 // is low lane type, which is closest to the LoongArch instructions.
2680 //
2681 // Note: In the LoongArch architecture, the high lane of mask corresponds
2682 // to the lower 128-bit of vector register, and the low lane of mask
2683 // corresponds the higher 128-bit of vector register.
2684 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2685 return false;
2686 }
2687 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2688 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2689 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2690 N2: DAG.getConstant(Val: 0b01001110, DL, VT: GRLenVT));
2691 V1 = DAG.getBitcast(VT, V: V1);
2692
2693 if (!V2.isUndef()) {
2694 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2695 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2696 N2: DAG.getConstant(Val: 0b01001110, DL, VT: GRLenVT));
2697 V2 = DAG.getBitcast(VT, V: V2);
2698 }
2699
2700 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2701 *it = *it < 0 ? *it : *it - HalfSize;
2702 }
2703 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2704 *it = *it < 0 ? *it : *it + HalfSize;
2705 }
2706 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2707 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2708 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2709 N2: DAG.getConstant(Val: 0b11101110, DL, VT: GRLenVT));
2710 V1 = DAG.getBitcast(VT, V: V1);
2711
2712 if (!V2.isUndef()) {
2713 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2714 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2715 N2: DAG.getConstant(Val: 0b11101110, DL, VT: GRLenVT));
2716 V2 = DAG.getBitcast(VT, V: V2);
2717 }
2718
2719 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2720 *it = *it < 0 ? *it : *it - HalfSize;
2721 }
2722 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2723 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2724 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2725 N2: DAG.getConstant(Val: 0b01000100, DL, VT: GRLenVT));
2726 V1 = DAG.getBitcast(VT, V: V1);
2727
2728 if (!V2.isUndef()) {
2729 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2730 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2731 N2: DAG.getConstant(Val: 0b01000100, DL, VT: GRLenVT));
2732 V2 = DAG.getBitcast(VT, V: V2);
2733 }
2734
2735 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2736 *it = *it < 0 ? *it : *it + HalfSize;
2737 }
2738 } else { // cross-lane
2739 return false;
2740 }
2741
2742 return true;
2743}
2744
2745/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2746/// Only for 256-bit vector.
2747///
2748/// For example:
2749/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2750/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2751/// is lowerded to:
2752/// (XVPERMI $xr2, $xr0, 78)
2753/// (XVSHUF $xr1, $xr2, $xr0)
2754/// (XVORI $xr0, $xr1, 0)
2755static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2756 ArrayRef<int> Mask,
2757 MVT VT, SDValue V1,
2758 SDValue V2,
2759 SelectionDAG &DAG) {
2760 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2761 int Size = Mask.size();
2762 int LaneSize = Size / 2;
2763
2764 bool LaneCrossing[2] = {false, false};
2765 for (int i = 0; i < Size; ++i)
2766 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2767 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2768
2769 // Ensure that all lanes ared involved.
2770 if (!LaneCrossing[0] && !LaneCrossing[1])
2771 return SDValue();
2772
2773 SmallVector<int> InLaneMask;
2774 InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end());
2775 for (int i = 0; i < Size; ++i) {
2776 int &M = InLaneMask[i];
2777 if (M < 0)
2778 continue;
2779 if (((M % Size) / LaneSize) != (i / LaneSize))
2780 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2781 }
2782
2783 SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2784 Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped,
2785 N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {2, 3, 0, 1});
2786 Flipped = DAG.getBitcast(VT, V: Flipped);
2787 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask);
2788}
2789
2790/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2791///
2792/// This routine breaks down the specific type of 256-bit shuffle and
2793/// dispatches to the lowering routines accordingly.
2794static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2795 SDValue V1, SDValue V2, SelectionDAG &DAG,
2796 const LoongArchSubtarget &Subtarget) {
2797 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2798 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2799 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2800 "Vector type is unsupported for lasx!");
2801 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2802 "Two operands have different types!");
2803 assert(VT.getVectorNumElements() == Mask.size() &&
2804 "Unexpected mask size for shuffle!");
2805 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2806 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2807
2808 APInt KnownUndef, KnownZero;
2809 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2810 APInt Zeroable = KnownUndef | KnownZero;
2811
2812 SDValue Result;
2813 // TODO: Add more comparison patterns.
2814 if (V2.isUndef()) {
2815 if ((Result =
2816 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2817 return Result;
2818 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2819 Subtarget)))
2820 return Result;
2821 // Try to widen vectors to gain more optimization opportunities.
2822 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2823 return NewShuffle;
2824 if ((Result =
2825 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2826 return Result;
2827 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2828 return Result;
2829 if ((Result =
2830 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2831 return Result;
2832
2833 // TODO: This comment may be enabled in the future to better match the
2834 // pattern for instruction selection.
2835 /* V2 = V1; */
2836 }
2837
2838 // It is recommended not to change the pattern comparison order for better
2839 // performance.
2840 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2841 return Result;
2842 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2843 return Result;
2844 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2845 return Result;
2846 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2847 return Result;
2848 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2849 return Result;
2850 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2851 return Result;
2852 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2853 Zeroable)))
2854 return Result;
2855 if ((Result =
2856 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2857 return Result;
2858 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2859 Subtarget)))
2860 return Result;
2861
2862 // canonicalize non cross-lane shuffle vector
2863 SmallVector<int> NewMask(Mask);
2864 if (canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget))
2865 return lower256BitShuffle(DL, Mask: NewMask, VT, V1, V2, DAG, Subtarget);
2866
2867 // FIXME: Handling the remaining cases earlier can degrade performance
2868 // in some situations. Further analysis is required to enable more
2869 // effective optimizations.
2870 if (V2.isUndef()) {
2871 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT,
2872 V1, V2, DAG)))
2873 return Result;
2874 }
2875
2876 if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG))
2877 return NewShuffle;
2878 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
2879 return Result;
2880
2881 return SDValue();
2882}
2883
2884SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2885 SelectionDAG &DAG) const {
2886 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
2887 ArrayRef<int> OrigMask = SVOp->getMask();
2888 SDValue V1 = Op.getOperand(i: 0);
2889 SDValue V2 = Op.getOperand(i: 1);
2890 MVT VT = Op.getSimpleValueType();
2891 int NumElements = VT.getVectorNumElements();
2892 SDLoc DL(Op);
2893
2894 bool V1IsUndef = V1.isUndef();
2895 bool V2IsUndef = V2.isUndef();
2896 if (V1IsUndef && V2IsUndef)
2897 return DAG.getUNDEF(VT);
2898
2899 // When we create a shuffle node we put the UNDEF node to second operand,
2900 // but in some cases the first operand may be transformed to UNDEF.
2901 // In this case we should just commute the node.
2902 if (V1IsUndef)
2903 return DAG.getCommutedVectorShuffle(SV: *SVOp);
2904
2905 // Check for non-undef masks pointing at an undef vector and make the masks
2906 // undef as well. This makes it easier to match the shuffle based solely on
2907 // the mask.
2908 if (V2IsUndef &&
2909 any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
2910 SmallVector<int, 8> NewMask(OrigMask);
2911 for (int &M : NewMask)
2912 if (M >= NumElements)
2913 M = -1;
2914 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
2915 }
2916
2917 // Check for illegal shuffle mask element index values.
2918 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2919 (void)MaskUpperLimit;
2920 assert(llvm::all_of(OrigMask,
2921 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2922 "Out of bounds shuffle index");
2923
2924 // For each vector width, delegate to a specialized lowering routine.
2925 if (VT.is128BitVector())
2926 return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
2927
2928 if (VT.is256BitVector())
2929 return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG, Subtarget);
2930
2931 return SDValue();
2932}
2933
2934SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2935 SelectionDAG &DAG) const {
2936 // Custom lower to ensure the libcall return is passed in an FPR on hard
2937 // float ABIs.
2938 SDLoc DL(Op);
2939 MakeLibCallOptions CallOptions;
2940 SDValue Op0 = Op.getOperand(i: 0);
2941 SDValue Chain = SDValue();
2942 RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
2943 SDValue Res;
2944 std::tie(args&: Res, args&: Chain) =
2945 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
2946 if (Subtarget.is64Bit())
2947 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2948 return DAG.getBitcast(VT: MVT::i32, V: Res);
2949}
2950
2951SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2952 SelectionDAG &DAG) const {
2953 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2954 // float ABIs.
2955 SDLoc DL(Op);
2956 MakeLibCallOptions CallOptions;
2957 SDValue Op0 = Op.getOperand(i: 0);
2958 SDValue Chain = SDValue();
2959 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2960 DL, VT: MVT::f32, Operand: Op0)
2961 : DAG.getBitcast(VT: MVT::f32, V: Op0);
2962 SDValue Res;
2963 std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
2964 CallOptions, dl: DL, Chain);
2965 return Res;
2966}
2967
2968SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2969 SelectionDAG &DAG) const {
2970 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2971 SDLoc DL(Op);
2972 MakeLibCallOptions CallOptions;
2973 RTLIB::Libcall LC =
2974 RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::bf16);
2975 SDValue Res =
2976 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first;
2977 if (Subtarget.is64Bit())
2978 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2979 return DAG.getBitcast(VT: MVT::i32, V: Res);
2980}
2981
2982SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2983 SelectionDAG &DAG) const {
2984 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2985 MVT VT = Op.getSimpleValueType();
2986 SDLoc DL(Op);
2987 Op = DAG.getNode(
2988 Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0),
2989 N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL));
2990 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2991 DL, VT: MVT::f32, Operand: Op)
2992 : DAG.getBitcast(VT: MVT::f32, V: Op);
2993 if (VT != MVT::f32)
2994 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
2995 return Res;
2996}
2997
2998// Lower BUILD_VECTOR as broadcast load (if possible).
2999// For example:
3000// %a = load i8, ptr %ptr
3001// %b = build_vector %a, %a, %a, %a
3002// is lowered to :
3003// (VLDREPL_B $a0, 0)
3004static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
3005 const SDLoc &DL,
3006 SelectionDAG &DAG) {
3007 MVT VT = BVOp->getSimpleValueType(ResNo: 0);
3008 int NumOps = BVOp->getNumOperands();
3009
3010 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3011 "Unsupported vector type for broadcast.");
3012
3013 SDValue IdentitySrc;
3014 bool IsIdeneity = true;
3015
3016 for (int i = 0; i != NumOps; i++) {
3017 SDValue Op = BVOp->getOperand(Num: i);
3018 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3019 IsIdeneity = false;
3020 break;
3021 }
3022 IdentitySrc = BVOp->getOperand(Num: 0);
3023 }
3024
3025 // make sure that this load is valid and only has one user.
3026 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(N: IdentitySrc.getNode()))
3027 return SDValue();
3028
3029 auto *LN = cast<LoadSDNode>(Val&: IdentitySrc);
3030 auto ExtType = LN->getExtensionType();
3031
3032 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3033 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3034 // Indexed loads and stores are not supported on LoongArch.
3035 assert(LN->isUnindexed() && "Unexpected indexed load.");
3036
3037 SDVTList Tys = DAG.getVTList(VT1: VT, VT2: MVT::Other);
3038 // The offset operand of unindexed load is always undefined, so there is
3039 // no need to pass it to VLDREPL.
3040 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3041 SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops);
3042 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN, 1), To: BCast.getValue(R: 1));
3043 return BCast;
3044 }
3045 return SDValue();
3046}
3047
3048// Sequentially insert elements from Ops into Vector, from low to high indices.
3049// Note: Ops can have fewer elements than Vector.
3050static void fillVector(ArrayRef<SDValue> Ops, SelectionDAG &DAG, SDLoc DL,
3051 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3052 EVT ResTy) {
3053 assert(Ops.size() <= ResTy.getVectorNumElements());
3054
3055 SDValue Op0 = Ops[0];
3056 if (!Op0.isUndef())
3057 Vector = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: ResTy, Operand: Op0);
3058 for (unsigned i = 1; i < Ops.size(); ++i) {
3059 SDValue Opi = Ops[i];
3060 if (Opi.isUndef())
3061 continue;
3062 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, N2: Opi,
3063 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3064 }
3065}
3066
3067// Build a ResTy subvector from Node, taking NumElts elements starting at index
3068// 'first'.
3069static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
3070 SelectionDAG &DAG, SDLoc DL,
3071 const LoongArchSubtarget &Subtarget,
3072 EVT ResTy, unsigned first) {
3073 unsigned NumElts = ResTy.getVectorNumElements();
3074
3075 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3076
3077 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3078 Node->op_begin() + first + NumElts);
3079 SDValue Vector = DAG.getUNDEF(VT: ResTy);
3080 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3081 return Vector;
3082}
3083
3084SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3085 SelectionDAG &DAG) const {
3086 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
3087 MVT VT = Node->getSimpleValueType(ResNo: 0);
3088 EVT ResTy = Op->getValueType(ResNo: 0);
3089 unsigned NumElts = ResTy.getVectorNumElements();
3090 SDLoc DL(Op);
3091 APInt SplatValue, SplatUndef;
3092 unsigned SplatBitSize;
3093 bool HasAnyUndefs;
3094 bool IsConstant = false;
3095 bool UseSameConstant = true;
3096 SDValue ConstantValue;
3097 bool Is128Vec = ResTy.is128BitVector();
3098 bool Is256Vec = ResTy.is256BitVector();
3099
3100 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3101 (!Subtarget.hasExtLASX() || !Is256Vec))
3102 return SDValue();
3103
3104 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG))
3105 return Result;
3106
3107 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3108 /*MinSplatBits=*/8) &&
3109 SplatBitSize <= 64) {
3110 // We can only cope with 8, 16, 32, or 64-bit elements.
3111 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3112 SplatBitSize != 64)
3113 return SDValue();
3114
3115 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3116 // We can only handle 64-bit elements that are within
3117 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3118 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3119 if (!SplatValue.isSignedIntN(N: 10) &&
3120 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3121 return SDValue();
3122 if ((Is128Vec && ResTy == MVT::v4i32) ||
3123 (Is256Vec && ResTy == MVT::v8i32))
3124 return Op;
3125 }
3126
3127 EVT ViaVecTy;
3128
3129 switch (SplatBitSize) {
3130 default:
3131 return SDValue();
3132 case 8:
3133 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3134 break;
3135 case 16:
3136 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3137 break;
3138 case 32:
3139 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3140 break;
3141 case 64:
3142 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3143 break;
3144 }
3145
3146 // SelectionDAG::getConstant will promote SplatValue appropriately.
3147 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
3148
3149 // Bitcast to the type we originally wanted.
3150 if (ViaVecTy != ResTy)
3151 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
3152
3153 return Result;
3154 }
3155
3156 if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false))
3157 return Op;
3158
3159 for (unsigned i = 0; i < NumElts; ++i) {
3160 SDValue Opi = Node->getOperand(Num: i);
3161 if (isIntOrFPConstant(V: Opi)) {
3162 IsConstant = true;
3163 if (!ConstantValue.getNode())
3164 ConstantValue = Opi;
3165 else if (ConstantValue != Opi)
3166 UseSameConstant = false;
3167 }
3168 }
3169
3170 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3171 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3172 SDValue Result = DAG.getSplatBuildVector(VT: ResTy, DL, Op: ConstantValue);
3173 for (unsigned i = 0; i < NumElts; ++i) {
3174 SDValue Opi = Node->getOperand(Num: i);
3175 if (!isIntOrFPConstant(V: Opi))
3176 Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Result, N2: Opi,
3177 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3178 }
3179 return Result;
3180 }
3181
3182 if (!IsConstant) {
3183 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3184 // the sub-sequence of the vector and then broadcast the sub-sequence.
3185 //
3186 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3187 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3188 // generates worse code in some cases. This could be further optimized
3189 // with more consideration.
3190 SmallVector<SDValue> Sequence;
3191 BitVector UndefElements;
3192 if (Node->getRepeatedSequence(Sequence, UndefElements: &UndefElements) &&
3193 UndefElements.count() == 0) {
3194 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3195 // because the high part can be simply treated as undef.
3196 SDValue Vector = DAG.getUNDEF(VT: ResTy);
3197 EVT FillTy = Is256Vec
3198 ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext())
3199 : ResTy;
3200 SDValue FillVec =
3201 Is256Vec ? DAG.getExtractSubvector(DL, VT: FillTy, Vec: Vector, Idx: 0) : Vector;
3202
3203 fillVector(Ops: Sequence, DAG, DL, Subtarget, Vector&: FillVec, ResTy: FillTy);
3204
3205 unsigned SeqLen = Sequence.size();
3206 unsigned SplatLen = NumElts / SeqLen;
3207 MVT SplatEltTy = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits() * SeqLen);
3208 MVT SplatTy = MVT::getVectorVT(VT: SplatEltTy, NumElements: SplatLen);
3209
3210 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3211 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3212 if (SplatEltTy == MVT::i128)
3213 SplatTy = MVT::v4i64;
3214
3215 SDValue SplatVec;
3216 SDValue SrcVec = DAG.getBitcast(
3217 VT: SplatTy,
3218 V: Is256Vec ? DAG.getInsertSubvector(DL, Vec: Vector, SubVec: FillVec, Idx: 0) : FillVec);
3219 if (Is256Vec) {
3220 SplatVec =
3221 DAG.getNode(Opcode: (SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3222 : LoongArchISD::XVREPLVE0,
3223 DL, VT: SplatTy, Operand: SrcVec);
3224 } else {
3225 SplatVec = DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT: SplatTy, N1: SrcVec,
3226 N2: DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()));
3227 }
3228
3229 return DAG.getBitcast(VT: ResTy, V: SplatVec);
3230 }
3231
3232 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3233 // using memory operations is much lower.
3234 //
3235 // For 256-bit vectors, normally split into two halves and concatenate.
3236 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3237 // one non-undef element, skip spliting to avoid a worse result.
3238 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3239 ResTy == MVT::v4f64) {
3240 unsigned NonUndefCount = 0;
3241 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3242 if (!Node->getOperand(Num: i).isUndef()) {
3243 ++NonUndefCount;
3244 if (NonUndefCount > 1)
3245 break;
3246 }
3247 }
3248 if (NonUndefCount == 1)
3249 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, first: 0);
3250 }
3251
3252 EVT VecTy =
3253 Is256Vec ? ResTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext()) : ResTy;
3254 SDValue Vector =
3255 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy: VecTy, first: 0);
3256
3257 if (Is128Vec)
3258 return Vector;
3259
3260 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3261 ResTy: VecTy, first: NumElts / 2);
3262
3263 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResTy, N1: Vector, N2: VectorHi);
3264 }
3265
3266 return SDValue();
3267}
3268
3269SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3270 SelectionDAG &DAG) const {
3271 SDLoc DL(Op);
3272 MVT ResVT = Op.getSimpleValueType();
3273 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3274
3275 unsigned NumOperands = Op.getNumOperands();
3276 unsigned NumFreezeUndef = 0;
3277 unsigned NumZero = 0;
3278 unsigned NumNonZero = 0;
3279 unsigned NonZeros = 0;
3280 SmallSet<SDValue, 4> Undefs;
3281 for (unsigned i = 0; i != NumOperands; ++i) {
3282 SDValue SubVec = Op.getOperand(i);
3283 if (SubVec.isUndef())
3284 continue;
3285 if (ISD::isFreezeUndef(N: SubVec.getNode())) {
3286 // If the freeze(undef) has multiple uses then we must fold to zero.
3287 if (SubVec.hasOneUse()) {
3288 ++NumFreezeUndef;
3289 } else {
3290 ++NumZero;
3291 Undefs.insert(V: SubVec);
3292 }
3293 } else if (ISD::isBuildVectorAllZeros(N: SubVec.getNode()))
3294 ++NumZero;
3295 else {
3296 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3297 NonZeros |= 1 << i;
3298 ++NumNonZero;
3299 }
3300 }
3301
3302 // If we have more than 2 non-zeros, build each half separately.
3303 if (NumNonZero > 2) {
3304 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3305 ArrayRef<SDUse> Ops = Op->ops();
3306 SDValue Lo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3307 Ops: Ops.slice(N: 0, M: NumOperands / 2));
3308 SDValue Hi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: HalfVT,
3309 Ops: Ops.slice(N: NumOperands / 2));
3310 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResVT, N1: Lo, N2: Hi);
3311 }
3312
3313 // Otherwise, build it up through insert_subvectors.
3314 SDValue Vec = NumZero ? DAG.getConstant(Val: 0, DL, VT: ResVT)
3315 : (NumFreezeUndef ? DAG.getFreeze(V: DAG.getUNDEF(VT: ResVT))
3316 : DAG.getUNDEF(VT: ResVT));
3317
3318 // Replace Undef operands with ZeroVector.
3319 for (SDValue U : Undefs)
3320 DAG.ReplaceAllUsesWith(From: U, To: DAG.getConstant(Val: 0, DL, VT: U.getSimpleValueType()));
3321
3322 MVT SubVT = Op.getOperand(i: 0).getSimpleValueType();
3323 unsigned NumSubElems = SubVT.getVectorNumElements();
3324 for (unsigned i = 0; i != NumOperands; ++i) {
3325 if ((NonZeros & (1 << i)) == 0)
3326 continue;
3327
3328 Vec = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: ResVT, N1: Vec, N2: Op.getOperand(i),
3329 N3: DAG.getVectorIdxConstant(Val: i * NumSubElems, DL));
3330 }
3331
3332 return Vec;
3333}
3334
3335SDValue
3336LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3337 SelectionDAG &DAG) const {
3338 MVT EltVT = Op.getSimpleValueType();
3339 SDValue Vec = Op->getOperand(Num: 0);
3340 EVT VecTy = Vec->getValueType(ResNo: 0);
3341 SDValue Idx = Op->getOperand(Num: 1);
3342 SDLoc DL(Op);
3343 MVT GRLenVT = Subtarget.getGRLenVT();
3344
3345 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3346
3347 if (isa<ConstantSDNode>(Val: Idx))
3348 return Op;
3349
3350 switch (VecTy.getSimpleVT().SimpleTy) {
3351 default:
3352 llvm_unreachable("Unexpected type");
3353 case MVT::v32i8:
3354 case MVT::v16i16:
3355 case MVT::v4i64:
3356 case MVT::v4f64: {
3357 // Extract the high half subvector and place it to the low half of a new
3358 // vector. It doesn't matter what the high half of the new vector is.
3359 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
3360 SDValue VecHi =
3361 DAG.getExtractSubvector(DL, VT: HalfTy, Vec, Idx: HalfTy.getVectorNumElements());
3362 SDValue TmpVec =
3363 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: VecTy, N1: DAG.getUNDEF(VT: VecTy),
3364 N2: VecHi, N3: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3365
3366 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3367 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3368 // desired element.
3369 SDValue IdxCp =
3370 Subtarget.is64Bit()
3371 ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Idx)
3372 : DAG.getBitcast(VT: MVT::f32, V: Idx);
3373 SDValue IdxVec = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v8f32, Operand: IdxCp);
3374 SDValue MaskVec =
3375 DAG.getBitcast(VT: (VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, V: IdxVec);
3376 SDValue ResVec =
3377 DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT: VecTy, N1: MaskVec, N2: TmpVec, N3: Vec);
3378
3379 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: ResVec,
3380 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3381 }
3382 case MVT::v8i32:
3383 case MVT::v8f32: {
3384 SDValue SplatIdx = DAG.getSplatBuildVector(VT: MVT::v8i32, DL, Op: Idx);
3385 SDValue SplatValue =
3386 DAG.getNode(Opcode: LoongArchISD::XVPERM, DL, VT: VecTy, N1: Vec, N2: SplatIdx);
3387
3388 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: SplatValue,
3389 N2: DAG.getConstant(Val: 0, DL, VT: GRLenVT));
3390 }
3391 }
3392}
3393
3394SDValue
3395LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3396 SelectionDAG &DAG) const {
3397 MVT VT = Op.getSimpleValueType();
3398 MVT EltVT = VT.getVectorElementType();
3399 unsigned NumElts = VT.getVectorNumElements();
3400 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3401 SDLoc DL(Op);
3402 SDValue Op0 = Op.getOperand(i: 0);
3403 SDValue Op1 = Op.getOperand(i: 1);
3404 SDValue Op2 = Op.getOperand(i: 2);
3405
3406 if (isa<ConstantSDNode>(Val: Op2))
3407 return Op;
3408
3409 MVT IdxTy = MVT::getIntegerVT(BitWidth: EltSizeInBits);
3410 MVT IdxVTy = MVT::getVectorVT(VT: IdxTy, NumElements: NumElts);
3411
3412 if (!isTypeLegal(VT) || !isTypeLegal(VT: IdxVTy))
3413 return SDValue();
3414
3415 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op: Op1);
3416 SmallVector<SDValue, 32> RawIndices;
3417 SDValue SplatIdx;
3418 SDValue Indices;
3419
3420 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3421 MVT PairVTy = MVT::getVectorVT(VT: MVT::i32, NumElements: NumElts * 2);
3422 for (unsigned i = 0; i < NumElts; ++i) {
3423 RawIndices.push_back(Elt: Op2);
3424 RawIndices.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
3425 }
3426 SplatIdx = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3427 SplatIdx = DAG.getBitcast(VT: IdxVTy, V: SplatIdx);
3428
3429 RawIndices.clear();
3430 for (unsigned i = 0; i < NumElts; ++i) {
3431 RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: MVT::i32));
3432 RawIndices.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
3433 }
3434 Indices = DAG.getBuildVector(VT: PairVTy, DL, Ops: RawIndices);
3435 Indices = DAG.getBitcast(VT: IdxVTy, V: Indices);
3436 } else {
3437 SplatIdx = DAG.getSplatBuildVector(VT: IdxVTy, DL, Op: Op2);
3438
3439 for (unsigned i = 0; i < NumElts; ++i)
3440 RawIndices.push_back(Elt: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
3441 Indices = DAG.getBuildVector(VT: IdxVTy, DL, Ops: RawIndices);
3442 }
3443
3444 // insert vec, elt, idx
3445 // =>
3446 // select (splatidx == {0,1,2...}) ? splatelt : vec
3447 SDValue SelectCC =
3448 DAG.getSetCC(DL, VT: IdxVTy, LHS: SplatIdx, RHS: Indices, Cond: ISD::CondCode::SETEQ);
3449 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT, N1: SelectCC, N2: SplatElt, N3: Op0);
3450}
3451
3452SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3453 SelectionDAG &DAG) const {
3454 SDLoc DL(Op);
3455 SyncScope::ID FenceSSID =
3456 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
3457
3458 // singlethread fences only synchronize with signal handlers on the same
3459 // thread and thus only need to preserve instruction order, not actually
3460 // enforce memory ordering.
3461 if (FenceSSID == SyncScope::SingleThread)
3462 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3463 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
3464
3465 return Op;
3466}
3467
3468SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3469 SelectionDAG &DAG) const {
3470
3471 if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) {
3472 DAG.getContext()->emitError(
3473 ErrorStr: "On LA64, only 64-bit registers can be written.");
3474 return Op.getOperand(i: 0);
3475 }
3476
3477 if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) {
3478 DAG.getContext()->emitError(
3479 ErrorStr: "On LA32, only 32-bit registers can be written.");
3480 return Op.getOperand(i: 0);
3481 }
3482
3483 return Op;
3484}
3485
3486SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3487 SelectionDAG &DAG) const {
3488 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
3489 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
3490 "be a constant integer");
3491 return SDValue();
3492 }
3493
3494 MachineFunction &MF = DAG.getMachineFunction();
3495 MF.getFrameInfo().setFrameAddressIsTaken(true);
3496 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3497 EVT VT = Op.getValueType();
3498 SDLoc DL(Op);
3499 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
3500 unsigned Depth = Op.getConstantOperandVal(i: 0);
3501 int GRLenInBytes = Subtarget.getGRLen() / 8;
3502
3503 while (Depth--) {
3504 int Offset = -(GRLenInBytes * 2);
3505 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
3506 N2: DAG.getSignedConstant(Val: Offset, DL, VT));
3507 FrameAddr =
3508 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
3509 }
3510 return FrameAddr;
3511}
3512
3513SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3514 SelectionDAG &DAG) const {
3515 // Currently only support lowering return address for current frame.
3516 if (Op.getConstantOperandVal(i: 0) != 0) {
3517 DAG.getContext()->emitError(
3518 ErrorStr: "return address can only be determined for the current frame");
3519 return SDValue();
3520 }
3521
3522 MachineFunction &MF = DAG.getMachineFunction();
3523 MF.getFrameInfo().setReturnAddressIsTaken(true);
3524 MVT GRLenVT = Subtarget.getGRLenVT();
3525
3526 // Return the value of the return address register, marking it an implicit
3527 // live-in.
3528 Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
3529 RC: getRegClassFor(VT: GRLenVT));
3530 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT);
3531}
3532
3533SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3534 SelectionDAG &DAG) const {
3535 MachineFunction &MF = DAG.getMachineFunction();
3536 auto Size = Subtarget.getGRLen() / 8;
3537 auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false);
3538 return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
3539}
3540
3541SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3542 SelectionDAG &DAG) const {
3543 MachineFunction &MF = DAG.getMachineFunction();
3544 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3545
3546 SDLoc DL(Op);
3547 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
3548 VT: getPointerTy(DL: MF.getDataLayout()));
3549
3550 // vastart just stores the address of the VarArgsFrameIndex slot into the
3551 // memory location argument.
3552 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
3553 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
3554 PtrInfo: MachinePointerInfo(SV));
3555}
3556
3557SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3558 SelectionDAG &DAG) const {
3559 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3560 !Subtarget.hasBasicD() && "unexpected target features");
3561
3562 SDLoc DL(Op);
3563 SDValue Op0 = Op.getOperand(i: 0);
3564 if (Op0->getOpcode() == ISD::AND) {
3565 auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
3566 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3567 return Op;
3568 }
3569
3570 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3571 Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) &&
3572 Op0.getConstantOperandVal(i: 2) == UINT64_C(0))
3573 return Op;
3574
3575 if (Op0.getOpcode() == ISD::AssertZext &&
3576 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32))
3577 return Op;
3578
3579 EVT OpVT = Op0.getValueType();
3580 EVT RetVT = Op.getValueType();
3581 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3582 MakeLibCallOptions CallOptions;
3583 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
3584 SDValue Chain = SDValue();
3585 SDValue Result;
3586 std::tie(args&: Result, args&: Chain) =
3587 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
3588 return Result;
3589}
3590
3591SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3592 SelectionDAG &DAG) const {
3593 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3594 !Subtarget.hasBasicD() && "unexpected target features");
3595
3596 SDLoc DL(Op);
3597 SDValue Op0 = Op.getOperand(i: 0);
3598
3599 if ((Op0.getOpcode() == ISD::AssertSext ||
3600 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
3601 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32))
3602 return Op;
3603
3604 EVT OpVT = Op0.getValueType();
3605 EVT RetVT = Op.getValueType();
3606 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3607 MakeLibCallOptions CallOptions;
3608 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT);
3609 SDValue Chain = SDValue();
3610 SDValue Result;
3611 std::tie(args&: Result, args&: Chain) =
3612 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
3613 return Result;
3614}
3615
3616SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3617 SelectionDAG &DAG) const {
3618
3619 SDLoc DL(Op);
3620 EVT VT = Op.getValueType();
3621 SDValue Op0 = Op.getOperand(i: 0);
3622 EVT Op0VT = Op0.getValueType();
3623
3624 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3625 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3626 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
3627 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
3628 }
3629 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3630 SDValue Lo, Hi;
3631 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
3632 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
3633 }
3634 return Op;
3635}
3636
3637SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3638 SelectionDAG &DAG) const {
3639
3640 SDLoc DL(Op);
3641 SDValue Op0 = Op.getOperand(i: 0);
3642
3643 if (Op0.getValueType() == MVT::f16)
3644 Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
3645
3646 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3647 !Subtarget.hasBasicD()) {
3648 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0);
3649 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
3650 }
3651
3652 EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
3653 SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0);
3654 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
3655}
3656
3657static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
3658 SelectionDAG &DAG, unsigned Flags) {
3659 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
3660}
3661
3662static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
3663 SelectionDAG &DAG, unsigned Flags) {
3664 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
3665 TargetFlags: Flags);
3666}
3667
3668static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
3669 SelectionDAG &DAG, unsigned Flags) {
3670 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
3671 Offset: N->getOffset(), TargetFlags: Flags);
3672}
3673
3674static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
3675 SelectionDAG &DAG, unsigned Flags) {
3676 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
3677}
3678
3679template <class NodeTy>
3680SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3681 CodeModel::Model M,
3682 bool IsLocal) const {
3683 SDLoc DL(N);
3684 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3685 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3686 SDValue Load;
3687
3688 switch (M) {
3689 default:
3690 report_fatal_error(reason: "Unsupported code model");
3691
3692 case CodeModel::Large: {
3693 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3694
3695 // This is not actually used, but is necessary for successfully matching
3696 // the PseudoLA_*_LARGE nodes.
3697 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3698 if (IsLocal) {
3699 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3700 // eventually becomes the desired 5-insn code sequence.
3701 Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
3702 Op1: Tmp, Op2: Addr),
3703 0);
3704 } else {
3705 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3706 // eventually becomes the desired 5-insn code sequence.
3707 Load = SDValue(
3708 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
3709 0);
3710 }
3711 break;
3712 }
3713
3714 case CodeModel::Small:
3715 case CodeModel::Medium:
3716 if (IsLocal) {
3717 // This generates the pattern (PseudoLA_PCREL sym), which
3718 //
3719 // for la32r expands to:
3720 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3721 //
3722 // for la32s and la64 expands to:
3723 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3724 Load = SDValue(
3725 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0);
3726 } else {
3727 // This generates the pattern (PseudoLA_GOT sym), which
3728 //
3729 // for la32r expands to:
3730 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3731 //
3732 // for la32s and la64 expands to:
3733 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3734 Load =
3735 SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0);
3736 }
3737 }
3738
3739 if (!IsLocal) {
3740 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3741 MachineFunction &MF = DAG.getMachineFunction();
3742 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3743 PtrInfo: MachinePointerInfo::getGOT(MF),
3744 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3745 MachineMemOperand::MOInvariant,
3746 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
3747 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
3748 }
3749
3750 return Load;
3751}
3752
3753SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3754 SelectionDAG &DAG) const {
3755 return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
3756 M: DAG.getTarget().getCodeModel());
3757}
3758
3759SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3760 SelectionDAG &DAG) const {
3761 return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
3762 M: DAG.getTarget().getCodeModel());
3763}
3764
3765SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
3768 M: DAG.getTarget().getCodeModel());
3769}
3770
3771SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3772 SelectionDAG &DAG) const {
3773 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
3774 assert(N->getOffset() == 0 && "unexpected offset in global node");
3775 auto CM = DAG.getTarget().getCodeModel();
3776 const GlobalValue *GV = N->getGlobal();
3777
3778 if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
3779 if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
3780 CM = *GCM;
3781 }
3782
3783 return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
3784}
3785
3786SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3787 SelectionDAG &DAG,
3788 unsigned Opc, bool UseGOT,
3789 bool Large) const {
3790 SDLoc DL(N);
3791 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3792 MVT GRLenVT = Subtarget.getGRLenVT();
3793
3794 // This is not actually used, but is necessary for successfully matching the
3795 // PseudoLA_*_LARGE nodes.
3796 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3797 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
3798
3799 // Only IE needs an extra argument for large code model.
3800 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3801 ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
3802 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
3803
3804 // If it is LE for normal/medium code model, the add tp operation will occur
3805 // during the pseudo-instruction expansion.
3806 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3807 return Offset;
3808
3809 if (UseGOT) {
3810 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3811 MachineFunction &MF = DAG.getMachineFunction();
3812 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3813 PtrInfo: MachinePointerInfo::getGOT(MF),
3814 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
3815 MachineMemOperand::MOInvariant,
3816 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
3817 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
3818 }
3819
3820 // Add the thread pointer.
3821 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
3822 N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
3823}
3824
3825SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3826 SelectionDAG &DAG,
3827 unsigned Opc,
3828 bool Large) const {
3829 SDLoc DL(N);
3830 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3831 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
3832
3833 // This is not actually used, but is necessary for successfully matching the
3834 // PseudoLA_*_LARGE nodes.
3835 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3836
3837 // Use a PC-relative addressing mode to access the dynamic GOT address.
3838 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
3839 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
3840 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
3841
3842 // Prepare argument list to generate call.
3843 ArgListTy Args;
3844 Args.emplace_back(args&: Load, args&: CallTy);
3845
3846 // Setup call to __tls_get_addr.
3847 TargetLowering::CallLoweringInfo CLI(DAG);
3848 CLI.setDebugLoc(DL)
3849 .setChain(DAG.getEntryNode())
3850 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
3851 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
3852 ArgsList: std::move(Args));
3853
3854 return LowerCallTo(CLI).first;
3855}
3856
3857SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3858 SelectionDAG &DAG, unsigned Opc,
3859 bool Large) const {
3860 SDLoc DL(N);
3861 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3862 const GlobalValue *GV = N->getGlobal();
3863
3864 // This is not actually used, but is necessary for successfully matching the
3865 // PseudoLA_*_LARGE nodes.
3866 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
3867
3868 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3869 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3870 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
3871 return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
3872 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
3873}
3874
3875SDValue
3876LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3877 SelectionDAG &DAG) const {
3878 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3879 CallingConv::GHC)
3880 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
3881
3882 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3883 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3884
3885 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
3886 assert(N->getOffset() == 0 && "unexpected offset in global node");
3887
3888 if (DAG.getTarget().useEmulatedTLS())
3889 reportFatalUsageError(reason: "the emulated TLS is prohibited");
3890
3891 bool IsDesc = DAG.getTarget().useTLSDESC();
3892
3893 switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
3894 case TLSModel::GeneralDynamic:
3895 // In this model, application code calls the dynamic linker function
3896 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3897 // runtime.
3898 if (!IsDesc)
3899 return getDynamicTLSAddr(N, DAG,
3900 Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3901 : LoongArch::PseudoLA_TLS_GD,
3902 Large);
3903 break;
3904 case TLSModel::LocalDynamic:
3905 // Same as GeneralDynamic, except for assembly modifiers and relocation
3906 // records.
3907 if (!IsDesc)
3908 return getDynamicTLSAddr(N, DAG,
3909 Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3910 : LoongArch::PseudoLA_TLS_LD,
3911 Large);
3912 break;
3913 case TLSModel::InitialExec:
3914 // This model uses the GOT to resolve TLS offsets.
3915 return getStaticTLSAddr(N, DAG,
3916 Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3917 : LoongArch::PseudoLA_TLS_IE,
3918 /*UseGOT=*/true, Large);
3919 case TLSModel::LocalExec:
3920 // This model is used when static linking as the TLS offsets are resolved
3921 // during program linking.
3922 //
3923 // This node doesn't need an extra argument for the large code model.
3924 return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
3925 /*UseGOT=*/false, Large);
3926 }
3927
3928 return getTLSDescAddr(N, DAG,
3929 Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3930 : LoongArch::PseudoLA_TLS_DESC,
3931 Large);
3932}
3933
3934template <unsigned N>
3935static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3936 SelectionDAG &DAG, bool IsSigned = false) {
3937 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
3938 // Check the ImmArg.
3939 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3940 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3941 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) +
3942 ": argument out of range.");
3943 return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType());
3944 }
3945 return SDValue();
3946}
3947
3948SDValue
3949LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3950 SelectionDAG &DAG) const {
3951 switch (Op.getConstantOperandVal(i: 0)) {
3952 default:
3953 return SDValue(); // Don't custom lower most intrinsics.
3954 case Intrinsic::thread_pointer: {
3955 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3956 return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
3957 }
3958 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3959 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3960 case Intrinsic::loongarch_lsx_vreplvei_d:
3961 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3962 return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG);
3963 case Intrinsic::loongarch_lsx_vreplvei_w:
3964 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3965 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3966 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3967 case Intrinsic::loongarch_lasx_xvpickve_d:
3968 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3969 return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG);
3970 case Intrinsic::loongarch_lasx_xvinsve0_d:
3971 return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG);
3972 case Intrinsic::loongarch_lsx_vsat_b:
3973 case Intrinsic::loongarch_lsx_vsat_bu:
3974 case Intrinsic::loongarch_lsx_vrotri_b:
3975 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3976 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3977 case Intrinsic::loongarch_lsx_vsrlri_b:
3978 case Intrinsic::loongarch_lsx_vsrari_b:
3979 case Intrinsic::loongarch_lsx_vreplvei_h:
3980 case Intrinsic::loongarch_lasx_xvsat_b:
3981 case Intrinsic::loongarch_lasx_xvsat_bu:
3982 case Intrinsic::loongarch_lasx_xvrotri_b:
3983 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3984 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3985 case Intrinsic::loongarch_lasx_xvsrlri_b:
3986 case Intrinsic::loongarch_lasx_xvsrari_b:
3987 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3988 case Intrinsic::loongarch_lasx_xvpickve_w:
3989 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3990 return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG);
3991 case Intrinsic::loongarch_lasx_xvinsve0_w:
3992 return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG);
3993 case Intrinsic::loongarch_lsx_vsat_h:
3994 case Intrinsic::loongarch_lsx_vsat_hu:
3995 case Intrinsic::loongarch_lsx_vrotri_h:
3996 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3997 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3998 case Intrinsic::loongarch_lsx_vsrlri_h:
3999 case Intrinsic::loongarch_lsx_vsrari_h:
4000 case Intrinsic::loongarch_lsx_vreplvei_b:
4001 case Intrinsic::loongarch_lasx_xvsat_h:
4002 case Intrinsic::loongarch_lasx_xvsat_hu:
4003 case Intrinsic::loongarch_lasx_xvrotri_h:
4004 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4005 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4006 case Intrinsic::loongarch_lasx_xvsrlri_h:
4007 case Intrinsic::loongarch_lasx_xvsrari_h:
4008 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4009 return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG);
4010 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4011 case Intrinsic::loongarch_lsx_vsrani_b_h:
4012 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4013 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4014 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4015 case Intrinsic::loongarch_lsx_vssrani_b_h:
4016 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4017 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4018 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4019 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4020 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4021 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4022 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4023 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4024 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4025 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4026 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4027 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4028 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4029 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4030 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4031 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4032 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4033 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4034 return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG);
4035 case Intrinsic::loongarch_lsx_vsat_w:
4036 case Intrinsic::loongarch_lsx_vsat_wu:
4037 case Intrinsic::loongarch_lsx_vrotri_w:
4038 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4039 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4040 case Intrinsic::loongarch_lsx_vsrlri_w:
4041 case Intrinsic::loongarch_lsx_vsrari_w:
4042 case Intrinsic::loongarch_lsx_vslei_bu:
4043 case Intrinsic::loongarch_lsx_vslei_hu:
4044 case Intrinsic::loongarch_lsx_vslei_wu:
4045 case Intrinsic::loongarch_lsx_vslei_du:
4046 case Intrinsic::loongarch_lsx_vslti_bu:
4047 case Intrinsic::loongarch_lsx_vslti_hu:
4048 case Intrinsic::loongarch_lsx_vslti_wu:
4049 case Intrinsic::loongarch_lsx_vslti_du:
4050 case Intrinsic::loongarch_lsx_vbsll_v:
4051 case Intrinsic::loongarch_lsx_vbsrl_v:
4052 case Intrinsic::loongarch_lasx_xvsat_w:
4053 case Intrinsic::loongarch_lasx_xvsat_wu:
4054 case Intrinsic::loongarch_lasx_xvrotri_w:
4055 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4056 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4057 case Intrinsic::loongarch_lasx_xvsrlri_w:
4058 case Intrinsic::loongarch_lasx_xvsrari_w:
4059 case Intrinsic::loongarch_lasx_xvslei_bu:
4060 case Intrinsic::loongarch_lasx_xvslei_hu:
4061 case Intrinsic::loongarch_lasx_xvslei_wu:
4062 case Intrinsic::loongarch_lasx_xvslei_du:
4063 case Intrinsic::loongarch_lasx_xvslti_bu:
4064 case Intrinsic::loongarch_lasx_xvslti_hu:
4065 case Intrinsic::loongarch_lasx_xvslti_wu:
4066 case Intrinsic::loongarch_lasx_xvslti_du:
4067 case Intrinsic::loongarch_lasx_xvbsll_v:
4068 case Intrinsic::loongarch_lasx_xvbsrl_v:
4069 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG);
4070 case Intrinsic::loongarch_lsx_vseqi_b:
4071 case Intrinsic::loongarch_lsx_vseqi_h:
4072 case Intrinsic::loongarch_lsx_vseqi_w:
4073 case Intrinsic::loongarch_lsx_vseqi_d:
4074 case Intrinsic::loongarch_lsx_vslei_b:
4075 case Intrinsic::loongarch_lsx_vslei_h:
4076 case Intrinsic::loongarch_lsx_vslei_w:
4077 case Intrinsic::loongarch_lsx_vslei_d:
4078 case Intrinsic::loongarch_lsx_vslti_b:
4079 case Intrinsic::loongarch_lsx_vslti_h:
4080 case Intrinsic::loongarch_lsx_vslti_w:
4081 case Intrinsic::loongarch_lsx_vslti_d:
4082 case Intrinsic::loongarch_lasx_xvseqi_b:
4083 case Intrinsic::loongarch_lasx_xvseqi_h:
4084 case Intrinsic::loongarch_lasx_xvseqi_w:
4085 case Intrinsic::loongarch_lasx_xvseqi_d:
4086 case Intrinsic::loongarch_lasx_xvslei_b:
4087 case Intrinsic::loongarch_lasx_xvslei_h:
4088 case Intrinsic::loongarch_lasx_xvslei_w:
4089 case Intrinsic::loongarch_lasx_xvslei_d:
4090 case Intrinsic::loongarch_lasx_xvslti_b:
4091 case Intrinsic::loongarch_lasx_xvslti_h:
4092 case Intrinsic::loongarch_lasx_xvslti_w:
4093 case Intrinsic::loongarch_lasx_xvslti_d:
4094 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true);
4095 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4096 case Intrinsic::loongarch_lsx_vsrani_h_w:
4097 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4098 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4099 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4100 case Intrinsic::loongarch_lsx_vssrani_h_w:
4101 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4102 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4103 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4104 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4105 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4106 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4107 case Intrinsic::loongarch_lsx_vfrstpi_b:
4108 case Intrinsic::loongarch_lsx_vfrstpi_h:
4109 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4110 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4111 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4112 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4113 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4114 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4115 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4116 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4117 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4118 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4119 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4120 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4121 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4122 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4123 return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG);
4124 case Intrinsic::loongarch_lsx_vsat_d:
4125 case Intrinsic::loongarch_lsx_vsat_du:
4126 case Intrinsic::loongarch_lsx_vrotri_d:
4127 case Intrinsic::loongarch_lsx_vsrlri_d:
4128 case Intrinsic::loongarch_lsx_vsrari_d:
4129 case Intrinsic::loongarch_lasx_xvsat_d:
4130 case Intrinsic::loongarch_lasx_xvsat_du:
4131 case Intrinsic::loongarch_lasx_xvrotri_d:
4132 case Intrinsic::loongarch_lasx_xvsrlri_d:
4133 case Intrinsic::loongarch_lasx_xvsrari_d:
4134 return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG);
4135 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4136 case Intrinsic::loongarch_lsx_vsrani_w_d:
4137 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4138 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4139 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4140 case Intrinsic::loongarch_lsx_vssrani_w_d:
4141 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4142 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4143 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4144 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4145 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4146 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4147 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4148 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4149 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4150 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4151 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4152 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4153 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4154 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4155 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4156 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4157 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4158 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4159 return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG);
4160 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4161 case Intrinsic::loongarch_lsx_vsrani_d_q:
4162 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4163 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4164 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4165 case Intrinsic::loongarch_lsx_vssrani_d_q:
4166 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4167 case Intrinsic::loongarch_lsx_vssrani_du_q:
4168 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4169 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4170 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4171 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4172 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4173 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4174 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4175 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4176 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4177 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4178 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4179 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4180 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4181 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4182 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4183 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4184 return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG);
4185 case Intrinsic::loongarch_lsx_vnori_b:
4186 case Intrinsic::loongarch_lsx_vshuf4i_b:
4187 case Intrinsic::loongarch_lsx_vshuf4i_h:
4188 case Intrinsic::loongarch_lsx_vshuf4i_w:
4189 case Intrinsic::loongarch_lasx_xvnori_b:
4190 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4191 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4192 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4193 case Intrinsic::loongarch_lasx_xvpermi_d:
4194 return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG);
4195 case Intrinsic::loongarch_lsx_vshuf4i_d:
4196 case Intrinsic::loongarch_lsx_vpermi_w:
4197 case Intrinsic::loongarch_lsx_vbitseli_b:
4198 case Intrinsic::loongarch_lsx_vextrins_b:
4199 case Intrinsic::loongarch_lsx_vextrins_h:
4200 case Intrinsic::loongarch_lsx_vextrins_w:
4201 case Intrinsic::loongarch_lsx_vextrins_d:
4202 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4203 case Intrinsic::loongarch_lasx_xvpermi_w:
4204 case Intrinsic::loongarch_lasx_xvpermi_q:
4205 case Intrinsic::loongarch_lasx_xvbitseli_b:
4206 case Intrinsic::loongarch_lasx_xvextrins_b:
4207 case Intrinsic::loongarch_lasx_xvextrins_h:
4208 case Intrinsic::loongarch_lasx_xvextrins_w:
4209 case Intrinsic::loongarch_lasx_xvextrins_d:
4210 return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG);
4211 case Intrinsic::loongarch_lsx_vrepli_b:
4212 case Intrinsic::loongarch_lsx_vrepli_h:
4213 case Intrinsic::loongarch_lsx_vrepli_w:
4214 case Intrinsic::loongarch_lsx_vrepli_d:
4215 case Intrinsic::loongarch_lasx_xvrepli_b:
4216 case Intrinsic::loongarch_lasx_xvrepli_h:
4217 case Intrinsic::loongarch_lasx_xvrepli_w:
4218 case Intrinsic::loongarch_lasx_xvrepli_d:
4219 return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
4220 case Intrinsic::loongarch_lsx_vldi:
4221 case Intrinsic::loongarch_lasx_xvldi:
4222 return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
4223 }
4224}
4225
4226// Helper function that emits error message for intrinsics with chain and return
4227// merge values of a UNDEF and the chain.
4228static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
4229 StringRef ErrorMsg,
4230 SelectionDAG &DAG) {
4231 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4232 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)},
4233 dl: SDLoc(Op));
4234}
4235
4236SDValue
4237LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4238 SelectionDAG &DAG) const {
4239 SDLoc DL(Op);
4240 MVT GRLenVT = Subtarget.getGRLenVT();
4241 EVT VT = Op.getValueType();
4242 SDValue Chain = Op.getOperand(i: 0);
4243 const StringRef ErrorMsgOOR = "argument out of range";
4244 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4245 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4246
4247 switch (Op.getConstantOperandVal(i: 1)) {
4248 default:
4249 return Op;
4250 case Intrinsic::loongarch_crc_w_b_w:
4251 case Intrinsic::loongarch_crc_w_h_w:
4252 case Intrinsic::loongarch_crc_w_w_w:
4253 case Intrinsic::loongarch_crc_w_d_w:
4254 case Intrinsic::loongarch_crcc_w_b_w:
4255 case Intrinsic::loongarch_crcc_w_h_w:
4256 case Intrinsic::loongarch_crcc_w_w_w:
4257 case Intrinsic::loongarch_crcc_w_d_w:
4258 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4259 case Intrinsic::loongarch_csrrd_w:
4260 case Intrinsic::loongarch_csrrd_d: {
4261 unsigned Imm = Op.getConstantOperandVal(i: 2);
4262 return !isUInt<14>(x: Imm)
4263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4264 : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
4265 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4266 }
4267 case Intrinsic::loongarch_csrwr_w:
4268 case Intrinsic::loongarch_csrwr_d: {
4269 unsigned Imm = Op.getConstantOperandVal(i: 3);
4270 return !isUInt<14>(x: Imm)
4271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4272 : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
4273 Ops: {Chain, Op.getOperand(i: 2),
4274 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4275 }
4276 case Intrinsic::loongarch_csrxchg_w:
4277 case Intrinsic::loongarch_csrxchg_d: {
4278 unsigned Imm = Op.getConstantOperandVal(i: 4);
4279 return !isUInt<14>(x: Imm)
4280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4281 : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
4282 Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3),
4283 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4284 }
4285 case Intrinsic::loongarch_iocsrrd_d: {
4286 return DAG.getNode(
4287 Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
4288 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))});
4289 }
4290#define IOCSRRD_CASE(NAME, NODE) \
4291 case Intrinsic::loongarch_##NAME: { \
4292 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4293 {Chain, Op.getOperand(2)}); \
4294 }
4295 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4296 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4297 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4298#undef IOCSRRD_CASE
4299 case Intrinsic::loongarch_cpucfg: {
4300 return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
4301 Ops: {Chain, Op.getOperand(i: 2)});
4302 }
4303 case Intrinsic::loongarch_lddir_d: {
4304 unsigned Imm = Op.getConstantOperandVal(i: 3);
4305 return !isUInt<8>(x: Imm)
4306 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4307 : Op;
4308 }
4309 case Intrinsic::loongarch_movfcsr2gr: {
4310 if (!Subtarget.hasBasicF())
4311 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4312 unsigned Imm = Op.getConstantOperandVal(i: 2);
4313 return !isUInt<2>(x: Imm)
4314 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4315 : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
4316 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4317 }
4318 case Intrinsic::loongarch_lsx_vld:
4319 case Intrinsic::loongarch_lsx_vldrepl_b:
4320 case Intrinsic::loongarch_lasx_xvld:
4321 case Intrinsic::loongarch_lasx_xvldrepl_b:
4322 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4323 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4324 : SDValue();
4325 case Intrinsic::loongarch_lsx_vldrepl_h:
4326 case Intrinsic::loongarch_lasx_xvldrepl_h:
4327 return !isShiftedInt<11, 1>(
4328 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4329 ? emitIntrinsicWithChainErrorMessage(
4330 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4331 : SDValue();
4332 case Intrinsic::loongarch_lsx_vldrepl_w:
4333 case Intrinsic::loongarch_lasx_xvldrepl_w:
4334 return !isShiftedInt<10, 2>(
4335 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4336 ? emitIntrinsicWithChainErrorMessage(
4337 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4338 : SDValue();
4339 case Intrinsic::loongarch_lsx_vldrepl_d:
4340 case Intrinsic::loongarch_lasx_xvldrepl_d:
4341 return !isShiftedInt<9, 3>(
4342 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
4343 ? emitIntrinsicWithChainErrorMessage(
4344 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4345 : SDValue();
4346 }
4347}
4348
4349// Helper function that emits error message for intrinsics with void return
4350// value and return the chain.
4351static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
4352 SelectionDAG &DAG) {
4353
4354 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4355 return Op.getOperand(i: 0);
4356}
4357
4358SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4359 SelectionDAG &DAG) const {
4360 SDLoc DL(Op);
4361 MVT GRLenVT = Subtarget.getGRLenVT();
4362 SDValue Chain = Op.getOperand(i: 0);
4363 uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1);
4364 SDValue Op2 = Op.getOperand(i: 2);
4365 const StringRef ErrorMsgOOR = "argument out of range";
4366 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4367 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4368 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4369
4370 switch (IntrinsicEnum) {
4371 default:
4372 // TODO: Add more Intrinsics.
4373 return SDValue();
4374 case Intrinsic::loongarch_cacop_d:
4375 case Intrinsic::loongarch_cacop_w: {
4376 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4377 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
4378 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4379 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
4380 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4381 unsigned Imm1 = Op2->getAsZExtVal();
4382 int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue();
4383 if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2))
4384 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
4385 return Op;
4386 }
4387 case Intrinsic::loongarch_dbar: {
4388 unsigned Imm = Op2->getAsZExtVal();
4389 return !isUInt<15>(x: Imm)
4390 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4391 : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
4392 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4393 }
4394 case Intrinsic::loongarch_ibar: {
4395 unsigned Imm = Op2->getAsZExtVal();
4396 return !isUInt<15>(x: Imm)
4397 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4398 : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
4399 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4400 }
4401 case Intrinsic::loongarch_break: {
4402 unsigned Imm = Op2->getAsZExtVal();
4403 return !isUInt<15>(x: Imm)
4404 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4405 : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
4406 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4407 }
4408 case Intrinsic::loongarch_movgr2fcsr: {
4409 if (!Subtarget.hasBasicF())
4410 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
4411 unsigned Imm = Op2->getAsZExtVal();
4412 return !isUInt<2>(x: Imm)
4413 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4414 : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
4415 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
4416 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
4417 Operand: Op.getOperand(i: 3)));
4418 }
4419 case Intrinsic::loongarch_syscall: {
4420 unsigned Imm = Op2->getAsZExtVal();
4421 return !isUInt<15>(x: Imm)
4422 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4423 : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
4424 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
4425 }
4426#define IOCSRWR_CASE(NAME, NODE) \
4427 case Intrinsic::loongarch_##NAME: { \
4428 SDValue Op3 = Op.getOperand(3); \
4429 return Subtarget.is64Bit() \
4430 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4432 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4433 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4434 Op3); \
4435 }
4436 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4437 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4438 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4439#undef IOCSRWR_CASE
4440 case Intrinsic::loongarch_iocsrwr_d: {
4441 return !Subtarget.is64Bit()
4442 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
4443 : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
4444 N2: Op2,
4445 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
4446 Operand: Op.getOperand(i: 3)));
4447 }
4448#define ASRT_LE_GT_CASE(NAME) \
4449 case Intrinsic::loongarch_##NAME: { \
4450 return !Subtarget.is64Bit() \
4451 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4452 : Op; \
4453 }
4454 ASRT_LE_GT_CASE(asrtle_d)
4455 ASRT_LE_GT_CASE(asrtgt_d)
4456#undef ASRT_LE_GT_CASE
4457 case Intrinsic::loongarch_ldpte_d: {
4458 unsigned Imm = Op.getConstantOperandVal(i: 3);
4459 return !Subtarget.is64Bit()
4460 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
4461 : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4462 : Op;
4463 }
4464 case Intrinsic::loongarch_lsx_vst:
4465 case Intrinsic::loongarch_lasx_xvst:
4466 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue())
4467 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4468 : SDValue();
4469 case Intrinsic::loongarch_lasx_xvstelm_b:
4470 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4471 !isUInt<5>(x: Op.getConstantOperandVal(i: 5)))
4472 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4473 : SDValue();
4474 case Intrinsic::loongarch_lsx_vstelm_b:
4475 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4476 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
4477 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
4478 : SDValue();
4479 case Intrinsic::loongarch_lasx_xvstelm_h:
4480 return (!isShiftedInt<8, 1>(
4481 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4482 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
4483 ? emitIntrinsicErrorMessage(
4484 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4485 : SDValue();
4486 case Intrinsic::loongarch_lsx_vstelm_h:
4487 return (!isShiftedInt<8, 1>(
4488 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4489 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
4490 ? emitIntrinsicErrorMessage(
4491 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
4492 : SDValue();
4493 case Intrinsic::loongarch_lasx_xvstelm_w:
4494 return (!isShiftedInt<8, 2>(
4495 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4496 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
4497 ? emitIntrinsicErrorMessage(
4498 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4499 : SDValue();
4500 case Intrinsic::loongarch_lsx_vstelm_w:
4501 return (!isShiftedInt<8, 2>(
4502 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4503 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
4504 ? emitIntrinsicErrorMessage(
4505 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
4506 : SDValue();
4507 case Intrinsic::loongarch_lasx_xvstelm_d:
4508 return (!isShiftedInt<8, 3>(
4509 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4510 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
4511 ? emitIntrinsicErrorMessage(
4512 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4513 : SDValue();
4514 case Intrinsic::loongarch_lsx_vstelm_d:
4515 return (!isShiftedInt<8, 3>(
4516 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
4517 !isUInt<1>(x: Op.getConstantOperandVal(i: 5)))
4518 ? emitIntrinsicErrorMessage(
4519 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
4520 : SDValue();
4521 }
4522}
4523
4524SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4525 SelectionDAG &DAG) const {
4526 SDLoc DL(Op);
4527 SDValue Lo = Op.getOperand(i: 0);
4528 SDValue Hi = Op.getOperand(i: 1);
4529 SDValue Shamt = Op.getOperand(i: 2);
4530 EVT VT = Lo.getValueType();
4531
4532 // if Shamt-GRLen < 0: // Shamt < GRLen
4533 // Lo = Lo << Shamt
4534 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4535 // else:
4536 // Lo = 0
4537 // Hi = Lo << (Shamt-GRLen)
4538
4539 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
4540 SDValue One = DAG.getConstant(Val: 1, DL, VT);
4541 SDValue MinusGRLen =
4542 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
4543 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
4544 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
4545 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
4546
4547 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
4548 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
4549 SDValue ShiftRightLo =
4550 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
4551 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
4552 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
4553 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
4554
4555 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
4556
4557 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
4558 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
4559
4560 SDValue Parts[2] = {Lo, Hi};
4561 return DAG.getMergeValues(Ops: Parts, dl: DL);
4562}
4563
4564SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4565 SelectionDAG &DAG,
4566 bool IsSRA) const {
4567 SDLoc DL(Op);
4568 SDValue Lo = Op.getOperand(i: 0);
4569 SDValue Hi = Op.getOperand(i: 1);
4570 SDValue Shamt = Op.getOperand(i: 2);
4571 EVT VT = Lo.getValueType();
4572
4573 // SRA expansion:
4574 // if Shamt-GRLen < 0: // Shamt < GRLen
4575 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4576 // Hi = Hi >>s Shamt
4577 // else:
4578 // Lo = Hi >>s (Shamt-GRLen);
4579 // Hi = Hi >>s (GRLen-1)
4580 //
4581 // SRL expansion:
4582 // if Shamt-GRLen < 0: // Shamt < GRLen
4583 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4584 // Hi = Hi >>u Shamt
4585 // else:
4586 // Lo = Hi >>u (Shamt-GRLen);
4587 // Hi = 0;
4588
4589 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4590
4591 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
4592 SDValue One = DAG.getConstant(Val: 1, DL, VT);
4593 SDValue MinusGRLen =
4594 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
4595 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
4596 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
4597 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
4598
4599 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
4600 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
4601 SDValue ShiftLeftHi =
4602 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
4603 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
4604 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
4605 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
4606 SDValue HiFalse =
4607 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
4608
4609 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
4610
4611 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
4612 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
4613
4614 SDValue Parts[2] = {Lo, Hi};
4615 return DAG.getMergeValues(Ops: Parts, dl: DL);
4616}
4617
4618// Returns the opcode of the target-specific SDNode that implements the 32-bit
4619// form of the given Opcode.
4620static unsigned getLoongArchWOpcode(unsigned Opcode) {
4621 switch (Opcode) {
4622 default:
4623 llvm_unreachable("Unexpected opcode");
4624 case ISD::SDIV:
4625 return LoongArchISD::DIV_W;
4626 case ISD::UDIV:
4627 return LoongArchISD::DIV_WU;
4628 case ISD::SREM:
4629 return LoongArchISD::MOD_W;
4630 case ISD::UREM:
4631 return LoongArchISD::MOD_WU;
4632 case ISD::SHL:
4633 return LoongArchISD::SLL_W;
4634 case ISD::SRA:
4635 return LoongArchISD::SRA_W;
4636 case ISD::SRL:
4637 return LoongArchISD::SRL_W;
4638 case ISD::ROTL:
4639 case ISD::ROTR:
4640 return LoongArchISD::ROTR_W;
4641 case ISD::CTTZ:
4642 return LoongArchISD::CTZ_W;
4643 case ISD::CTLZ:
4644 return LoongArchISD::CLZ_W;
4645 }
4646}
4647
4648// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4649// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4650// otherwise be promoted to i64, making it difficult to select the
4651// SLL_W/.../*W later one because the fact the operation was originally of
4652// type i8/i16/i32 is lost.
4653static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
4654 unsigned ExtOpc = ISD::ANY_EXTEND) {
4655 SDLoc DL(N);
4656 unsigned WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
4657 SDValue NewOp0, NewRes;
4658
4659 switch (NumOp) {
4660 default:
4661 llvm_unreachable("Unexpected NumOp");
4662 case 1: {
4663 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
4664 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
4665 break;
4666 }
4667 case 2: {
4668 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
4669 SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
4670 if (N->getOpcode() == ISD::ROTL) {
4671 SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64);
4672 NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
4673 }
4674 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
4675 break;
4676 }
4677 // TODO:Handle more NumOp.
4678 }
4679
4680 // ReplaceNodeResults requires we maintain the same type for the return
4681 // value.
4682 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
4683}
4684
4685// Converts the given 32-bit operation to a i64 operation with signed extension
4686// semantic to reduce the signed extension instructions.
4687static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
4688 SDLoc DL(N);
4689 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
4690 SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
4691 SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
4692 SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
4693 N2: DAG.getValueType(MVT::i32));
4694 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
4695}
4696
4697// Helper function that emits error message for intrinsics with/without chain
4698// and return a UNDEF or and the chain as the results.
4699static void emitErrorAndReplaceIntrinsicResults(
4700 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
4701 StringRef ErrorMsg, bool WithChain = true) {
4702 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
4703 Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)));
4704 if (!WithChain)
4705 return;
4706 Results.push_back(Elt: N->getOperand(Num: 0));
4707}
4708
4709template <unsigned N>
4710static void
4711replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
4712 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4713 unsigned ResOp) {
4714 const StringRef ErrorMsgOOR = "argument out of range";
4715 unsigned Imm = Node->getConstantOperandVal(Num: 2);
4716 if (!isUInt<N>(Imm)) {
4717 emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
4718 /*WithChain=*/false);
4719 return;
4720 }
4721 SDLoc DL(Node);
4722 SDValue Vec = Node->getOperand(Num: 1);
4723
4724 SDValue PickElt =
4725 DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
4726 N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
4727 N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
4728 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0),
4729 Operand: PickElt.getValue(R: 0)));
4730}
4731
4732static void replaceVecCondBranchResults(SDNode *N,
4733 SmallVectorImpl<SDValue> &Results,
4734 SelectionDAG &DAG,
4735 const LoongArchSubtarget &Subtarget,
4736 unsigned ResOp) {
4737 SDLoc DL(N);
4738 SDValue Vec = N->getOperand(Num: 1);
4739
4740 SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
4741 Results.push_back(
4742 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0)));
4743}
4744
4745static void
4746replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
4747 SelectionDAG &DAG,
4748 const LoongArchSubtarget &Subtarget) {
4749 switch (N->getConstantOperandVal(Num: 0)) {
4750 default:
4751 llvm_unreachable("Unexpected Intrinsic.");
4752 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4753 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
4754 ResOp: LoongArchISD::VPICK_SEXT_ELT);
4755 break;
4756 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4757 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4758 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
4759 ResOp: LoongArchISD::VPICK_SEXT_ELT);
4760 break;
4761 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4762 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
4763 ResOp: LoongArchISD::VPICK_SEXT_ELT);
4764 break;
4765 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4766 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
4767 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4768 break;
4769 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4770 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4771 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
4772 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4773 break;
4774 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4775 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
4776 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
4777 break;
4778 case Intrinsic::loongarch_lsx_bz_b:
4779 case Intrinsic::loongarch_lsx_bz_h:
4780 case Intrinsic::loongarch_lsx_bz_w:
4781 case Intrinsic::loongarch_lsx_bz_d:
4782 case Intrinsic::loongarch_lasx_xbz_b:
4783 case Intrinsic::loongarch_lasx_xbz_h:
4784 case Intrinsic::loongarch_lasx_xbz_w:
4785 case Intrinsic::loongarch_lasx_xbz_d:
4786 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4787 ResOp: LoongArchISD::VALL_ZERO);
4788 break;
4789 case Intrinsic::loongarch_lsx_bz_v:
4790 case Intrinsic::loongarch_lasx_xbz_v:
4791 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4792 ResOp: LoongArchISD::VANY_ZERO);
4793 break;
4794 case Intrinsic::loongarch_lsx_bnz_b:
4795 case Intrinsic::loongarch_lsx_bnz_h:
4796 case Intrinsic::loongarch_lsx_bnz_w:
4797 case Intrinsic::loongarch_lsx_bnz_d:
4798 case Intrinsic::loongarch_lasx_xbnz_b:
4799 case Intrinsic::loongarch_lasx_xbnz_h:
4800 case Intrinsic::loongarch_lasx_xbnz_w:
4801 case Intrinsic::loongarch_lasx_xbnz_d:
4802 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4803 ResOp: LoongArchISD::VALL_NONZERO);
4804 break;
4805 case Intrinsic::loongarch_lsx_bnz_v:
4806 case Intrinsic::loongarch_lasx_xbnz_v:
4807 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4808 ResOp: LoongArchISD::VANY_NONZERO);
4809 break;
4810 }
4811}
4812
4813static void replaceCMP_XCHG_128Results(SDNode *N,
4814 SmallVectorImpl<SDValue> &Results,
4815 SelectionDAG &DAG) {
4816 assert(N->getValueType(0) == MVT::i128 &&
4817 "AtomicCmpSwap on types less than 128 should be legal");
4818 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4819
4820 unsigned Opcode;
4821 switch (MemOp->getMergedOrdering()) {
4822 case AtomicOrdering::Acquire:
4823 case AtomicOrdering::AcquireRelease:
4824 case AtomicOrdering::SequentiallyConsistent:
4825 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4826 break;
4827 case AtomicOrdering::Monotonic:
4828 case AtomicOrdering::Release:
4829 Opcode = LoongArch::PseudoCmpXchg128;
4830 break;
4831 default:
4832 llvm_unreachable("Unexpected ordering!");
4833 }
4834
4835 SDLoc DL(N);
4836 auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: 2), DL, LoVT: MVT::i64, HiVT: MVT::i64);
4837 auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: 3), DL, LoVT: MVT::i64, HiVT: MVT::i64);
4838 SDValue Ops[] = {N->getOperand(Num: 1), CmpVal.first, CmpVal.second,
4839 NewVal.first, NewVal.second, N->getOperand(Num: 0)};
4840
4841 SDNode *CmpSwap = DAG.getMachineNode(
4842 Opcode, dl: SDLoc(N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other),
4843 Ops);
4844 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4845 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128,
4846 N1: SDValue(CmpSwap, 0), N2: SDValue(CmpSwap, 1)));
4847 Results.push_back(Elt: SDValue(CmpSwap, 3));
4848}
4849
4850void LoongArchTargetLowering::ReplaceNodeResults(
4851 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
4852 SDLoc DL(N);
4853 EVT VT = N->getValueType(ResNo: 0);
4854 switch (N->getOpcode()) {
4855 default:
4856 llvm_unreachable("Don't know how to legalize this operation");
4857 case ISD::ADD:
4858 case ISD::SUB:
4859 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4860 "Unexpected custom legalisation");
4861 Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
4862 break;
4863 case ISD::SDIV:
4864 case ISD::UDIV:
4865 case ISD::SREM:
4866 case ISD::UREM:
4867 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4868 "Unexpected custom legalisation");
4869 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2,
4870 ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32
4871 ? ISD::ANY_EXTEND
4872 : ISD::SIGN_EXTEND));
4873 break;
4874 case ISD::SHL:
4875 case ISD::SRA:
4876 case ISD::SRL:
4877 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4878 "Unexpected custom legalisation");
4879 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
4880 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
4881 break;
4882 }
4883 break;
4884 case ISD::ROTL:
4885 case ISD::ROTR:
4886 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4887 "Unexpected custom legalisation");
4888 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
4889 break;
4890 case ISD::FP_TO_SINT: {
4891 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4892 "Unexpected custom legalisation");
4893 SDValue Src = N->getOperand(Num: 0);
4894 EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0));
4895 if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
4896 TargetLowering::TypeSoftenFloat) {
4897 if (!isTypeLegal(VT: Src.getValueType()))
4898 return;
4899 if (Src.getValueType() == MVT::f16)
4900 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src);
4901 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
4902 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
4903 return;
4904 }
4905 // If the FP type needs to be softened, emit a library call using the 'si'
4906 // version. If we left it to default legalization we'd end up with 'di'.
4907 RTLIB::Libcall LC;
4908 LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
4909 MakeLibCallOptions CallOptions;
4910 EVT OpVT = Src.getValueType();
4911 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT);
4912 SDValue Chain = SDValue();
4913 SDValue Result;
4914 std::tie(args&: Result, args&: Chain) =
4915 makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
4916 Results.push_back(Elt: Result);
4917 break;
4918 }
4919 case ISD::BITCAST: {
4920 SDValue Src = N->getOperand(Num: 0);
4921 EVT SrcVT = Src.getValueType();
4922 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4923 Subtarget.hasBasicF()) {
4924 SDValue Dst =
4925 DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
4926 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
4927 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4928 SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
4929 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src);
4930 SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
4931 N1: NewReg.getValue(R: 0), N2: NewReg.getValue(R: 1));
4932 Results.push_back(Elt: RetReg);
4933 }
4934 break;
4935 }
4936 case ISD::FP_TO_UINT: {
4937 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4938 "Unexpected custom legalisation");
4939 auto &TLI = DAG.getTargetLoweringInfo();
4940 SDValue Tmp1, Tmp2;
4941 TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
4942 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
4943 break;
4944 }
4945 case ISD::BSWAP: {
4946 SDValue Src = N->getOperand(Num: 0);
4947 assert((VT == MVT::i16 || VT == MVT::i32) &&
4948 "Unexpected custom legalization");
4949 MVT GRLenVT = Subtarget.getGRLenVT();
4950 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4951 SDValue Tmp;
4952 switch (VT.getSizeInBits()) {
4953 default:
4954 llvm_unreachable("Unexpected operand width");
4955 case 16:
4956 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
4957 break;
4958 case 32:
4959 // Only LA64 will get to here due to the size mismatch between VT and
4960 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4961 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
4962 break;
4963 }
4964 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4965 break;
4966 }
4967 case ISD::BITREVERSE: {
4968 SDValue Src = N->getOperand(Num: 0);
4969 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4970 "Unexpected custom legalization");
4971 MVT GRLenVT = Subtarget.getGRLenVT();
4972 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4973 SDValue Tmp;
4974 switch (VT.getSizeInBits()) {
4975 default:
4976 llvm_unreachable("Unexpected operand width");
4977 case 8:
4978 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
4979 break;
4980 case 32:
4981 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
4982 break;
4983 }
4984 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4985 break;
4986 }
4987 case ISD::CTLZ:
4988 case ISD::CTTZ: {
4989 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4990 "Unexpected custom legalisation");
4991 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1));
4992 break;
4993 }
4994 case ISD::INTRINSIC_W_CHAIN: {
4995 SDValue Chain = N->getOperand(Num: 0);
4996 SDValue Op2 = N->getOperand(Num: 2);
4997 MVT GRLenVT = Subtarget.getGRLenVT();
4998 const StringRef ErrorMsgOOR = "argument out of range";
4999 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5000 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5001
5002 switch (N->getConstantOperandVal(Num: 1)) {
5003 default:
5004 llvm_unreachable("Unexpected Intrinsic.");
5005 case Intrinsic::loongarch_movfcsr2gr: {
5006 if (!Subtarget.hasBasicF()) {
5007 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
5008 return;
5009 }
5010 unsigned Imm = Op2->getAsZExtVal();
5011 if (!isUInt<2>(x: Imm)) {
5012 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5013 return;
5014 }
5015 SDValue MOVFCSR2GRResults = DAG.getNode(
5016 Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other},
5017 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5018 Results.push_back(
5019 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0)));
5020 Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1));
5021 break;
5022 }
5023#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5024 case Intrinsic::loongarch_##NAME: { \
5025 SDValue NODE = DAG.getNode( \
5026 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5027 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5028 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5029 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5030 Results.push_back(NODE.getValue(1)); \
5031 break; \
5032 }
5033 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5034 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5035 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5036 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5037 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5038 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5039#undef CRC_CASE_EXT_BINARYOP
5040
5041#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5042 case Intrinsic::loongarch_##NAME: { \
5043 SDValue NODE = DAG.getNode( \
5044 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5045 {Chain, Op2, \
5046 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5047 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5048 Results.push_back(NODE.getValue(1)); \
5049 break; \
5050 }
5051 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5052 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5053#undef CRC_CASE_EXT_UNARYOP
5054#define CSR_CASE(ID) \
5055 case Intrinsic::loongarch_##ID: { \
5056 if (!Subtarget.is64Bit()) \
5057 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5058 break; \
5059 }
5060 CSR_CASE(csrrd_d);
5061 CSR_CASE(csrwr_d);
5062 CSR_CASE(csrxchg_d);
5063 CSR_CASE(iocsrrd_d);
5064#undef CSR_CASE
5065 case Intrinsic::loongarch_csrrd_w: {
5066 unsigned Imm = Op2->getAsZExtVal();
5067 if (!isUInt<14>(x: Imm)) {
5068 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5069 return;
5070 }
5071 SDValue CSRRDResults =
5072 DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
5073 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5074 Results.push_back(
5075 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0)));
5076 Results.push_back(Elt: CSRRDResults.getValue(R: 1));
5077 break;
5078 }
5079 case Intrinsic::loongarch_csrwr_w: {
5080 unsigned Imm = N->getConstantOperandVal(Num: 3);
5081 if (!isUInt<14>(x: Imm)) {
5082 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5083 return;
5084 }
5085 SDValue CSRWRResults =
5086 DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
5087 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5088 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5089 Results.push_back(
5090 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0)));
5091 Results.push_back(Elt: CSRWRResults.getValue(R: 1));
5092 break;
5093 }
5094 case Intrinsic::loongarch_csrxchg_w: {
5095 unsigned Imm = N->getConstantOperandVal(Num: 4);
5096 if (!isUInt<14>(x: Imm)) {
5097 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
5098 return;
5099 }
5100 SDValue CSRXCHGResults = DAG.getNode(
5101 Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
5102 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
5103 DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)),
5104 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
5105 Results.push_back(
5106 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0)));
5107 Results.push_back(Elt: CSRXCHGResults.getValue(R: 1));
5108 break;
5109 }
5110#define IOCSRRD_CASE(NAME, NODE) \
5111 case Intrinsic::loongarch_##NAME: { \
5112 SDValue IOCSRRDResults = \
5113 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5114 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5115 Results.push_back( \
5116 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5117 Results.push_back(IOCSRRDResults.getValue(1)); \
5118 break; \
5119 }
5120 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5121 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5122 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5123#undef IOCSRRD_CASE
5124 case Intrinsic::loongarch_cpucfg: {
5125 SDValue CPUCFGResults =
5126 DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
5127 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
5128 Results.push_back(
5129 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0)));
5130 Results.push_back(Elt: CPUCFGResults.getValue(R: 1));
5131 break;
5132 }
5133 case Intrinsic::loongarch_lddir_d: {
5134 if (!Subtarget.is64Bit()) {
5135 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
5136 return;
5137 }
5138 break;
5139 }
5140 }
5141 break;
5142 }
5143 case ISD::READ_REGISTER: {
5144 if (Subtarget.is64Bit())
5145 DAG.getContext()->emitError(
5146 ErrorStr: "On LA64, only 64-bit registers can be read.");
5147 else
5148 DAG.getContext()->emitError(
5149 ErrorStr: "On LA32, only 32-bit registers can be read.");
5150 Results.push_back(Elt: DAG.getUNDEF(VT));
5151 Results.push_back(Elt: N->getOperand(Num: 0));
5152 break;
5153 }
5154 case ISD::INTRINSIC_WO_CHAIN: {
5155 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5156 break;
5157 }
5158 case ISD::LROUND: {
5159 SDValue Op0 = N->getOperand(Num: 0);
5160 EVT OpVT = Op0.getValueType();
5161 RTLIB::Libcall LC =
5162 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5163 MakeLibCallOptions CallOptions;
5164 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64);
5165 SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
5166 Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
5167 Results.push_back(Elt: Result);
5168 break;
5169 }
5170 case ISD::ATOMIC_CMP_SWAP: {
5171 replaceCMP_XCHG_128Results(N, Results, DAG);
5172 break;
5173 }
5174 case ISD::TRUNCATE: {
5175 MVT VT = N->getSimpleValueType(ResNo: 0);
5176 if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector)
5177 return;
5178
5179 MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT();
5180 SDValue In = N->getOperand(Num: 0);
5181 EVT InVT = In.getValueType();
5182 EVT InEltVT = InVT.getVectorElementType();
5183 EVT EltVT = VT.getVectorElementType();
5184 unsigned MinElts = VT.getVectorNumElements();
5185 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5186 unsigned InBits = InVT.getSizeInBits();
5187
5188 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5189 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5190 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5191 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5192 for (unsigned I = 0; I < MinElts; ++I)
5193 TruncMask[I] = Scale * I;
5194
5195 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5196 MVT SVT = In.getSimpleValueType().getScalarType();
5197 MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts);
5198 SDValue WidenIn =
5199 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In,
5200 N3: DAG.getVectorIdxConstant(Val: 0, DL));
5201 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5202 "Illegal vector type in truncation");
5203 WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn);
5204 Results.push_back(
5205 Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask));
5206 return;
5207 }
5208 }
5209
5210 break;
5211 }
5212 }
5213}
5214
5215/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5216static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
5217 SelectionDAG &DAG) {
5218 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5219
5220 MVT VT = N->getSimpleValueType(ResNo: 0);
5221 if (!VT.is128BitVector() && !VT.is256BitVector())
5222 return SDValue();
5223
5224 SDValue X, Y;
5225 SDValue N0 = N->getOperand(Num: 0);
5226 SDValue N1 = N->getOperand(Num: 1);
5227
5228 if (SDValue Not = isNOT(V: N0, DAG)) {
5229 X = Not;
5230 Y = N1;
5231 } else if (SDValue Not = isNOT(V: N1, DAG)) {
5232 X = Not;
5233 Y = N0;
5234 } else
5235 return SDValue();
5236
5237 X = DAG.getBitcast(VT, V: X);
5238 Y = DAG.getBitcast(VT, V: Y);
5239 return DAG.getNode(Opcode: LoongArchISD::VANDN, DL, VT, N1: X, N2: Y);
5240}
5241
5242static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
5243 TargetLowering::DAGCombinerInfo &DCI,
5244 const LoongArchSubtarget &Subtarget) {
5245 if (DCI.isBeforeLegalizeOps())
5246 return SDValue();
5247
5248 SDValue FirstOperand = N->getOperand(Num: 0);
5249 SDValue SecondOperand = N->getOperand(Num: 1);
5250 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5251 EVT ValTy = N->getValueType(ResNo: 0);
5252 SDLoc DL(N);
5253 uint64_t lsb, msb;
5254 unsigned SMIdx, SMLen;
5255 ConstantSDNode *CN;
5256 SDValue NewOperand;
5257 MVT GRLenVT = Subtarget.getGRLenVT();
5258
5259 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5260 return R;
5261
5262 // BSTRPICK requires the 32S feature.
5263 if (!Subtarget.has32S())
5264 return SDValue();
5265
5266 // Op's second operand must be a shifted mask.
5267 if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) ||
5268 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
5269 return SDValue();
5270
5271 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5272 // Pattern match BSTRPICK.
5273 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5274 // => BSTRPICK $dst, $src, msb, lsb
5275 // where msb = lsb + len - 1
5276
5277 // The second operand of the shift must be an immediate.
5278 if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))))
5279 return SDValue();
5280
5281 lsb = CN->getZExtValue();
5282
5283 // Return if the shifted mask does not start at bit 0 or the sum of its
5284 // length and lsb exceeds the word's size.
5285 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5286 return SDValue();
5287
5288 NewOperand = FirstOperand.getOperand(i: 0);
5289 } else {
5290 // Pattern match BSTRPICK.
5291 // $dst = and $src, (2**len- 1) , if len > 12
5292 // => BSTRPICK $dst, $src, msb, lsb
5293 // where lsb = 0 and msb = len - 1
5294
5295 // If the mask is <= 0xfff, andi can be used instead.
5296 if (CN->getZExtValue() <= 0xfff)
5297 return SDValue();
5298
5299 // Return if the MSB exceeds.
5300 if (SMIdx + SMLen > ValTy.getSizeInBits())
5301 return SDValue();
5302
5303 if (SMIdx > 0) {
5304 // Omit if the constant has more than 2 uses. This a conservative
5305 // decision. Whether it is a win depends on the HW microarchitecture.
5306 // However it should always be better for 1 and 2 uses.
5307 if (CN->use_size() > 2)
5308 return SDValue();
5309 // Return if the constant can be composed by a single LU12I.W.
5310 if ((CN->getZExtValue() & 0xfff) == 0)
5311 return SDValue();
5312 // Return if the constand can be composed by a single ADDI with
5313 // the zero register.
5314 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5315 return SDValue();
5316 }
5317
5318 lsb = SMIdx;
5319 NewOperand = FirstOperand;
5320 }
5321
5322 msb = lsb + SMLen - 1;
5323 SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
5324 N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
5325 N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
5326 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5327 return NR0;
5328 // Try to optimize to
5329 // bstrpick $Rd, $Rs, msb, lsb
5330 // slli $Rd, $Rd, lsb
5331 return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
5332 N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
5333}
5334
5335static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
5336 TargetLowering::DAGCombinerInfo &DCI,
5337 const LoongArchSubtarget &Subtarget) {
5338 // BSTRPICK requires the 32S feature.
5339 if (!Subtarget.has32S())
5340 return SDValue();
5341
5342 if (DCI.isBeforeLegalizeOps())
5343 return SDValue();
5344
5345 // $dst = srl (and $src, Mask), Shamt
5346 // =>
5347 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5348 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5349 //
5350
5351 SDValue FirstOperand = N->getOperand(Num: 0);
5352 ConstantSDNode *CN;
5353 EVT ValTy = N->getValueType(ResNo: 0);
5354 SDLoc DL(N);
5355 MVT GRLenVT = Subtarget.getGRLenVT();
5356 unsigned MaskIdx, MaskLen;
5357 uint64_t Shamt;
5358
5359 // The first operand must be an AND and the second operand of the AND must be
5360 // a shifted mask.
5361 if (FirstOperand.getOpcode() != ISD::AND ||
5362 !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) ||
5363 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
5364 return SDValue();
5365
5366 // The second operand (shift amount) must be an immediate.
5367 if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))))
5368 return SDValue();
5369
5370 Shamt = CN->getZExtValue();
5371 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5372 return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
5373 N1: FirstOperand->getOperand(Num: 0),
5374 N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
5375 N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5376
5377 return SDValue();
5378}
5379
5380// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5381// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5382static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5383 unsigned Depth) {
5384 // Limit recursion.
5385 if (Depth >= SelectionDAG::MaxRecursionDepth)
5386 return false;
5387 switch (Src.getOpcode()) {
5388 case ISD::SETCC:
5389 case ISD::TRUNCATE:
5390 return Src.getOperand(i: 0).getValueSizeInBits() == Size;
5391 case ISD::FREEZE:
5392 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1);
5393 case ISD::AND:
5394 case ISD::XOR:
5395 case ISD::OR:
5396 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1) &&
5397 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1);
5398 case ISD::SELECT:
5399 case ISD::VSELECT:
5400 return Src.getOperand(i: 0).getScalarValueSizeInBits() == 1 &&
5401 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1) &&
5402 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 2), Size, Depth: Depth + 1);
5403 case ISD::BUILD_VECTOR:
5404 return ISD::isBuildVectorAllZeros(N: Src.getNode()) ||
5405 ISD::isBuildVectorAllOnes(N: Src.getNode());
5406 }
5407 return false;
5408}
5409
5410// Helper to push sign extension of vXi1 SETCC result through bitops.
5411static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
5412 SDValue Src, const SDLoc &DL) {
5413 switch (Src.getOpcode()) {
5414 case ISD::SETCC:
5415 case ISD::FREEZE:
5416 case ISD::TRUNCATE:
5417 case ISD::BUILD_VECTOR:
5418 return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
5419 case ISD::AND:
5420 case ISD::XOR:
5421 case ISD::OR:
5422 return DAG.getNode(
5423 Opcode: Src.getOpcode(), DL, VT: SExtVT,
5424 N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 0), DL),
5425 N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL));
5426 case ISD::SELECT:
5427 case ISD::VSELECT:
5428 return DAG.getSelect(
5429 DL, VT: SExtVT, Cond: Src.getOperand(i: 0),
5430 LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL),
5431 RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 2), DL));
5432 }
5433 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5434}
5435
5436static SDValue
5437performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
5438 TargetLowering::DAGCombinerInfo &DCI,
5439 const LoongArchSubtarget &Subtarget) {
5440 SDLoc DL(N);
5441 EVT VT = N->getValueType(ResNo: 0);
5442 SDValue Src = N->getOperand(Num: 0);
5443 EVT SrcVT = Src.getValueType();
5444
5445 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5446 return SDValue();
5447
5448 bool UseLASX;
5449 unsigned Opc = ISD::DELETED_NODE;
5450 EVT CmpVT = Src.getOperand(i: 0).getValueType();
5451 EVT EltVT = CmpVT.getVectorElementType();
5452
5453 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5454 UseLASX = false;
5455 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5456 CmpVT.getSizeInBits() == 256)
5457 UseLASX = true;
5458 else
5459 return SDValue();
5460
5461 SDValue SrcN1 = Src.getOperand(i: 1);
5462 switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get()) {
5463 default:
5464 break;
5465 case ISD::SETEQ:
5466 // x == 0 => not (vmsknez.b x)
5467 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5468 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5469 break;
5470 case ISD::SETGT:
5471 // x > -1 => vmskgez.b x
5472 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8)
5473 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5474 break;
5475 case ISD::SETGE:
5476 // x >= 0 => vmskgez.b x
5477 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5478 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5479 break;
5480 case ISD::SETLT:
5481 // x < 0 => vmskltz.{b,h,w,d} x
5482 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) &&
5483 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5484 EltVT == MVT::i64))
5485 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486 break;
5487 case ISD::SETLE:
5488 // x <= -1 => vmskltz.{b,h,w,d} x
5489 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) &&
5490 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5491 EltVT == MVT::i64))
5492 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5493 break;
5494 case ISD::SETNE:
5495 // x != 0 => vmsknez.b x
5496 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
5497 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5498 break;
5499 }
5500
5501 if (Opc == ISD::DELETED_NODE)
5502 return SDValue();
5503
5504 SDValue V = DAG.getNode(Opcode: Opc, DL, VT: Subtarget.getGRLenVT(), Operand: Src.getOperand(i: 0));
5505 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
5506 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
5507 return DAG.getBitcast(VT, V);
5508}
5509
5510static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
5511 TargetLowering::DAGCombinerInfo &DCI,
5512 const LoongArchSubtarget &Subtarget) {
5513 SDLoc DL(N);
5514 EVT VT = N->getValueType(ResNo: 0);
5515 SDValue Src = N->getOperand(Num: 0);
5516 EVT SrcVT = Src.getValueType();
5517 MVT GRLenVT = Subtarget.getGRLenVT();
5518
5519 if (!DCI.isBeforeLegalizeOps())
5520 return SDValue();
5521
5522 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5523 return SDValue();
5524
5525 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5526 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5527 if (Res)
5528 return Res;
5529
5530 // Generate vXi1 using [X]VMSKLTZ
5531 MVT SExtVT;
5532 unsigned Opc;
5533 bool UseLASX = false;
5534 bool PropagateSExt = false;
5535
5536 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5537 EVT CmpVT = Src.getOperand(i: 0).getValueType();
5538 if (CmpVT.getSizeInBits() > 256)
5539 return SDValue();
5540 }
5541
5542 switch (SrcVT.getSimpleVT().SimpleTy) {
5543 default:
5544 return SDValue();
5545 case MVT::v2i1:
5546 SExtVT = MVT::v2i64;
5547 break;
5548 case MVT::v4i1:
5549 SExtVT = MVT::v4i32;
5550 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
5551 SExtVT = MVT::v4i64;
5552 UseLASX = true;
5553 PropagateSExt = true;
5554 }
5555 break;
5556 case MVT::v8i1:
5557 SExtVT = MVT::v8i16;
5558 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
5559 SExtVT = MVT::v8i32;
5560 UseLASX = true;
5561 PropagateSExt = true;
5562 }
5563 break;
5564 case MVT::v16i1:
5565 SExtVT = MVT::v16i8;
5566 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
5567 SExtVT = MVT::v16i16;
5568 UseLASX = true;
5569 PropagateSExt = true;
5570 }
5571 break;
5572 case MVT::v32i1:
5573 SExtVT = MVT::v32i8;
5574 UseLASX = true;
5575 break;
5576 };
5577 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5578 : DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
5579
5580 SDValue V;
5581 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5582 if (Src.getSimpleValueType() == MVT::v32i8) {
5583 SDValue Lo, Hi;
5584 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Src, DL);
5585 Lo = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Lo);
5586 Hi = DAG.getNode(Opcode: LoongArchISD::VMSKLTZ, DL, VT: GRLenVT, Operand: Hi);
5587 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: GRLenVT, N1: Hi,
5588 N2: DAG.getShiftAmountConstant(Val: 16, VT: GRLenVT, DL));
5589 V = DAG.getNode(Opcode: ISD::OR, DL, VT: GRLenVT, N1: Lo, N2: Hi);
5590 } else if (UseLASX) {
5591 return SDValue();
5592 }
5593 }
5594
5595 if (!V) {
5596 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5597 V = DAG.getNode(Opcode: Opc, DL, VT: GRLenVT, Operand: Src);
5598 }
5599
5600 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
5601 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
5602 return DAG.getBitcast(VT, V);
5603}
5604
5605static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
5606 TargetLowering::DAGCombinerInfo &DCI,
5607 const LoongArchSubtarget &Subtarget) {
5608 MVT GRLenVT = Subtarget.getGRLenVT();
5609 EVT ValTy = N->getValueType(ResNo: 0);
5610 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
5611 ConstantSDNode *CN0, *CN1;
5612 SDLoc DL(N);
5613 unsigned ValBits = ValTy.getSizeInBits();
5614 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5615 unsigned Shamt;
5616 bool SwapAndRetried = false;
5617
5618 // BSTRPICK requires the 32S feature.
5619 if (!Subtarget.has32S())
5620 return SDValue();
5621
5622 if (DCI.isBeforeLegalizeOps())
5623 return SDValue();
5624
5625 if (ValBits != 32 && ValBits != 64)
5626 return SDValue();
5627
5628Retry:
5629 // 1st pattern to match BSTRINS:
5630 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5631 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5632 // =>
5633 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5634 if (N0.getOpcode() == ISD::AND &&
5635 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5636 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5637 N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
5638 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5639 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
5640 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5641 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5642 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5643 (MaskIdx0 + MaskLen0 <= ValBits)) {
5644 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5645 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5646 N2: N1.getOperand(i: 0).getOperand(i: 0),
5647 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
5648 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5649 }
5650
5651 // 2nd pattern to match BSTRINS:
5652 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5653 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5654 // =>
5655 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5656 if (N0.getOpcode() == ISD::AND &&
5657 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5658 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5659 N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
5660 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5661 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5662 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5663 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
5664 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5665 (MaskIdx0 + MaskLen0 <= ValBits)) {
5666 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5667 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5668 N2: N1.getOperand(i: 0).getOperand(i: 0),
5669 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
5670 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5671 }
5672
5673 // 3rd pattern to match BSTRINS:
5674 // R = or (and X, mask0), (and Y, mask1)
5675 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5676 // =>
5677 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5678 // where msb = lsb + size - 1
5679 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5680 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5681 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5682 (MaskIdx0 + MaskLen0 <= 64) &&
5683 (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) &&
5684 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5685 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5686 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5687 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1,
5688 N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
5689 N3: DAG.getConstant(Val: ValBits == 32
5690 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5691 : (MaskIdx0 + MaskLen0 - 1),
5692 DL, VT: GRLenVT),
5693 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5694 }
5695
5696 // 4th pattern to match BSTRINS:
5697 // R = or (and X, mask), (shl Y, shamt)
5698 // where mask = (2**shamt - 1)
5699 // =>
5700 // R = BSTRINS X, Y, ValBits - 1, shamt
5701 // where ValBits = 32 or 64
5702 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5703 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5704 isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5705 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5706 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5707 (MaskIdx0 + MaskLen0 <= ValBits)) {
5708 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5709 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5710 N2: N1.getOperand(i: 0),
5711 N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT),
5712 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5713 }
5714
5715 // 5th pattern to match BSTRINS:
5716 // R = or (and X, mask), const
5717 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5718 // =>
5719 // R = BSTRINS X, (const >> lsb), msb, lsb
5720 // where msb = lsb + size - 1
5721 if (N0.getOpcode() == ISD::AND &&
5722 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
5723 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
5724 (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
5725 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5726 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5727 return DAG.getNode(
5728 Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
5729 N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
5730 N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5731 : (MaskIdx0 + MaskLen0 - 1),
5732 DL, VT: GRLenVT),
5733 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
5734 }
5735
5736 // 6th pattern.
5737 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5738 // by the incoming bits are known to be zero.
5739 // =>
5740 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5741 //
5742 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5743 // pattern is more common than the 1st. So we put the 1st before the 6th in
5744 // order to match as many nodes as possible.
5745 ConstantSDNode *CNMask, *CNShamt;
5746 unsigned MaskIdx, MaskLen;
5747 if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
5748 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5749 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5750 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5751 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5752 Shamt = CNShamt->getZExtValue();
5753 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5754 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5755 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5756 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5757 N2: N1.getOperand(i: 0).getOperand(i: 0),
5758 N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT),
5759 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
5760 }
5761 }
5762
5763 // 7th pattern.
5764 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5765 // overwritten by the incoming bits are known to be zero.
5766 // =>
5767 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5768 //
5769 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5770 // before the 7th in order to match as many nodes as possible.
5771 if (N1.getOpcode() == ISD::AND &&
5772 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5773 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5774 N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
5775 (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
5776 CNShamt->getZExtValue() == MaskIdx) {
5777 APInt ShMask(ValBits, CNMask->getZExtValue());
5778 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5779 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5780 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5781 N2: N1.getOperand(i: 0).getOperand(i: 0),
5782 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
5783 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
5784 }
5785 }
5786
5787 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5788 if (!SwapAndRetried) {
5789 std::swap(a&: N0, b&: N1);
5790 SwapAndRetried = true;
5791 goto Retry;
5792 }
5793
5794 SwapAndRetried = false;
5795Retry2:
5796 // 8th pattern.
5797 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5798 // the incoming bits are known to be zero.
5799 // =>
5800 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5801 //
5802 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5803 // we put it here in order to match as many nodes as possible or generate less
5804 // instructions.
5805 if (N1.getOpcode() == ISD::AND &&
5806 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
5807 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5808 APInt ShMask(ValBits, CNMask->getZExtValue());
5809 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
5810 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5811 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
5812 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0),
5813 N1: N1->getOperand(Num: 0),
5814 N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
5815 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
5816 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
5817 }
5818 }
5819 // Swap N0/N1 and retry.
5820 if (!SwapAndRetried) {
5821 std::swap(a&: N0, b&: N1);
5822 SwapAndRetried = true;
5823 goto Retry2;
5824 }
5825
5826 return SDValue();
5827}
5828
5829static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5830 ExtType = ISD::NON_EXTLOAD;
5831
5832 switch (V.getNode()->getOpcode()) {
5833 case ISD::LOAD: {
5834 LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
5835 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5836 (LoadNode->getMemoryVT() == MVT::i16)) {
5837 ExtType = LoadNode->getExtensionType();
5838 return true;
5839 }
5840 return false;
5841 }
5842 case ISD::AssertSext: {
5843 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
5844 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5845 ExtType = ISD::SEXTLOAD;
5846 return true;
5847 }
5848 return false;
5849 }
5850 case ISD::AssertZext: {
5851 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
5852 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5853 ExtType = ISD::ZEXTLOAD;
5854 return true;
5855 }
5856 return false;
5857 }
5858 default:
5859 return false;
5860 }
5861
5862 return false;
5863}
5864
5865// Eliminate redundant truncation and zero-extension nodes.
5866// * Case 1:
5867// +------------+ +------------+ +------------+
5868// | Input1 | | Input2 | | CC |
5869// +------------+ +------------+ +------------+
5870// | | |
5871// V V +----+
5872// +------------+ +------------+ |
5873// | TRUNCATE | | TRUNCATE | |
5874// +------------+ +------------+ |
5875// | | |
5876// V V |
5877// +------------+ +------------+ |
5878// | ZERO_EXT | | ZERO_EXT | |
5879// +------------+ +------------+ |
5880// | | |
5881// | +-------------+ |
5882// V V | |
5883// +----------------+ | |
5884// | AND | | |
5885// +----------------+ | |
5886// | | |
5887// +---------------+ | |
5888// | | |
5889// V V V
5890// +-------------+
5891// | CMP |
5892// +-------------+
5893// * Case 2:
5894// +------------+ +------------+ +-------------+ +------------+ +------------+
5895// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5896// +------------+ +------------+ +-------------+ +------------+ +------------+
5897// | | | | |
5898// V | | | |
5899// +------------+ | | | |
5900// | XOR |<---------------------+ | |
5901// +------------+ | | |
5902// | | | |
5903// V V +---------------+ |
5904// +------------+ +------------+ | |
5905// | TRUNCATE | | TRUNCATE | | +-------------------------+
5906// +------------+ +------------+ | |
5907// | | | |
5908// V V | |
5909// +------------+ +------------+ | |
5910// | ZERO_EXT | | ZERO_EXT | | |
5911// +------------+ +------------+ | |
5912// | | | |
5913// V V | |
5914// +----------------+ | |
5915// | AND | | |
5916// +----------------+ | |
5917// | | |
5918// +---------------+ | |
5919// | | |
5920// V V V
5921// +-------------+
5922// | CMP |
5923// +-------------+
5924static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
5925 TargetLowering::DAGCombinerInfo &DCI,
5926 const LoongArchSubtarget &Subtarget) {
5927 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
5928
5929 SDNode *AndNode = N->getOperand(Num: 0).getNode();
5930 if (AndNode->getOpcode() != ISD::AND)
5931 return SDValue();
5932
5933 SDValue AndInputValue2 = AndNode->getOperand(Num: 1);
5934 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5935 return SDValue();
5936
5937 SDValue CmpInputValue = N->getOperand(Num: 1);
5938 SDValue AndInputValue1 = AndNode->getOperand(Num: 0);
5939 if (AndInputValue1.getOpcode() == ISD::XOR) {
5940 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5941 return SDValue();
5942 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1));
5943 if (!CN || !CN->isAllOnes())
5944 return SDValue();
5945 CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
5946 if (!CN || !CN->isZero())
5947 return SDValue();
5948 AndInputValue1 = AndInputValue1.getOperand(i: 0);
5949 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5950 return SDValue();
5951 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5952 if (AndInputValue2 != CmpInputValue)
5953 return SDValue();
5954 } else {
5955 return SDValue();
5956 }
5957
5958 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0);
5959 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5960 return SDValue();
5961
5962 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0);
5963 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5964 return SDValue();
5965
5966 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0);
5967 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0);
5968 ISD::LoadExtType ExtType1;
5969 ISD::LoadExtType ExtType2;
5970
5971 if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) ||
5972 !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
5973 return SDValue();
5974
5975 if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) ||
5976 AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0))
5977 return SDValue();
5978
5979 if ((ExtType2 != ISD::ZEXTLOAD) &&
5980 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5981 return SDValue();
5982
5983 // These truncation and zero-extension nodes are not necessary, remove them.
5984 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0),
5985 N1: TruncInputValue1, N2: TruncInputValue2);
5986 SDValue NewSetCC =
5987 DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
5988 DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
5989 return SDValue(N, 0);
5990}
5991
5992// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5993static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5994 TargetLowering::DAGCombinerInfo &DCI,
5995 const LoongArchSubtarget &Subtarget) {
5996 if (DCI.isBeforeLegalizeOps())
5997 return SDValue();
5998
5999 SDValue Src = N->getOperand(Num: 0);
6000 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6001 return SDValue();
6002
6003 return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6004 Operand: Src.getOperand(i: 0));
6005}
6006
6007// Perform common combines for BR_CC and SELECT_CC conditions.
6008static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6009 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6010 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
6011
6012 // As far as arithmetic right shift always saves the sign,
6013 // shift can be omitted.
6014 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6015 // setge (sra X, N), 0 -> setge X, 0
6016 if (isNullConstant(V: RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6017 LHS.getOpcode() == ISD::SRA) {
6018 LHS = LHS.getOperand(i: 0);
6019 return true;
6020 }
6021
6022 if (!ISD::isIntEqualitySetCC(Code: CCVal))
6023 return false;
6024
6025 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6026 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6027 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(V: RHS) &&
6028 LHS.getOperand(i: 0).getValueType() == Subtarget.getGRLenVT()) {
6029 // If we're looking for eq 0 instead of ne 0, we need to invert the
6030 // condition.
6031 bool Invert = CCVal == ISD::SETEQ;
6032 CCVal = cast<CondCodeSDNode>(Val: LHS.getOperand(i: 2))->get();
6033 if (Invert)
6034 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
6035
6036 RHS = LHS.getOperand(i: 1);
6037 LHS = LHS.getOperand(i: 0);
6038 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
6039
6040 CC = DAG.getCondCode(Cond: CCVal);
6041 return true;
6042 }
6043
6044 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6045 if (isNullConstant(V: RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6046 LHS.getOperand(i: 1).getOpcode() == ISD::Constant) {
6047 SDValue LHS0 = LHS.getOperand(i: 0);
6048 if (LHS0.getOpcode() == ISD::AND &&
6049 LHS0.getOperand(i: 1).getOpcode() == ISD::Constant) {
6050 uint64_t Mask = LHS0.getConstantOperandVal(i: 1);
6051 uint64_t ShAmt = LHS.getConstantOperandVal(i: 1);
6052 if (isPowerOf2_64(Value: Mask) && Log2_64(Value: Mask) == ShAmt) {
6053 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6054 CC = DAG.getCondCode(Cond: CCVal);
6055
6056 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6057 LHS = LHS0.getOperand(i: 0);
6058 if (ShAmt != 0)
6059 LHS =
6060 DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS0.getOperand(i: 0),
6061 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
6062 return true;
6063 }
6064 }
6065 }
6066
6067 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6068 // This can occur when legalizing some floating point comparisons.
6069 APInt Mask = APInt::getBitsSetFrom(numBits: LHS.getValueSizeInBits(), loBit: 1);
6070 if (isOneConstant(V: RHS) && DAG.MaskedValueIsZero(Op: LHS, Mask)) {
6071 CCVal = ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType());
6072 CC = DAG.getCondCode(Cond: CCVal);
6073 RHS = DAG.getConstant(Val: 0, DL, VT: LHS.getValueType());
6074 return true;
6075 }
6076
6077 return false;
6078}
6079
6080static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG,
6081 TargetLowering::DAGCombinerInfo &DCI,
6082 const LoongArchSubtarget &Subtarget) {
6083 SDValue LHS = N->getOperand(Num: 1);
6084 SDValue RHS = N->getOperand(Num: 2);
6085 SDValue CC = N->getOperand(Num: 3);
6086 SDLoc DL(N);
6087
6088 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6089 return DAG.getNode(Opcode: LoongArchISD::BR_CC, DL, VT: N->getValueType(ResNo: 0),
6090 N1: N->getOperand(Num: 0), N2: LHS, N3: RHS, N4: CC, N5: N->getOperand(Num: 4));
6091
6092 return SDValue();
6093}
6094
6095static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
6096 TargetLowering::DAGCombinerInfo &DCI,
6097 const LoongArchSubtarget &Subtarget) {
6098 // Transform
6099 SDValue LHS = N->getOperand(Num: 0);
6100 SDValue RHS = N->getOperand(Num: 1);
6101 SDValue CC = N->getOperand(Num: 2);
6102 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val&: CC)->get();
6103 SDValue TrueV = N->getOperand(Num: 3);
6104 SDValue FalseV = N->getOperand(Num: 4);
6105 SDLoc DL(N);
6106 EVT VT = N->getValueType(ResNo: 0);
6107
6108 // If the True and False values are the same, we don't need a select_cc.
6109 if (TrueV == FalseV)
6110 return TrueV;
6111
6112 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6113 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6114 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
6115 isNullConstant(V: RHS) &&
6116 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6117 if (CCVal == ISD::CondCode::SETGE)
6118 std::swap(a&: TrueV, b&: FalseV);
6119
6120 int64_t TrueSImm = cast<ConstantSDNode>(Val&: TrueV)->getSExtValue();
6121 int64_t FalseSImm = cast<ConstantSDNode>(Val&: FalseV)->getSExtValue();
6122 // Only handle simm12, if it is not in this range, it can be considered as
6123 // register.
6124 if (isInt<12>(x: TrueSImm) && isInt<12>(x: FalseSImm) &&
6125 isInt<12>(x: TrueSImm - FalseSImm)) {
6126 SDValue SRA =
6127 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LHS,
6128 N2: DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT));
6129 SDValue AND =
6130 DAG.getNode(Opcode: ISD::AND, DL, VT, N1: SRA,
6131 N2: DAG.getSignedConstant(Val: TrueSImm - FalseSImm, DL, VT));
6132 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: AND, N2: FalseV);
6133 }
6134
6135 if (CCVal == ISD::CondCode::SETGE)
6136 std::swap(a&: TrueV, b&: FalseV);
6137 }
6138
6139 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6140 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT: N->getValueType(ResNo: 0),
6141 Ops: {LHS, RHS, CC, TrueV, FalseV});
6142
6143 return SDValue();
6144}
6145
6146template <unsigned N>
6147static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
6148 SelectionDAG &DAG,
6149 const LoongArchSubtarget &Subtarget,
6150 bool IsSigned = false) {
6151 SDLoc DL(Node);
6152 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
6153 // Check the ImmArg.
6154 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6155 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6156 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6157 ": argument out of range.");
6158 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
6159 }
6160 return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
6161}
6162
6163template <unsigned N>
6164static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6165 SelectionDAG &DAG, bool IsSigned = false) {
6166 SDLoc DL(Node);
6167 EVT ResTy = Node->getValueType(ResNo: 0);
6168 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
6169
6170 // Check the ImmArg.
6171 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6172 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6173 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6174 ": argument out of range.");
6175 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6176 }
6177 return DAG.getConstant(
6178 Val: APInt(ResTy.getScalarType().getSizeInBits(),
6179 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6180 DL, VT: ResTy);
6181}
6182
6183static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
6184 SDLoc DL(Node);
6185 EVT ResTy = Node->getValueType(ResNo: 0);
6186 SDValue Vec = Node->getOperand(Num: 2);
6187 SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy);
6188 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
6189}
6190
6191static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
6192 SDLoc DL(Node);
6193 EVT ResTy = Node->getValueType(ResNo: 0);
6194 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
6195 SDValue Bit =
6196 DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
6197
6198 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1),
6199 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
6200}
6201
6202template <unsigned N>
6203static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
6204 SDLoc DL(Node);
6205 EVT ResTy = Node->getValueType(ResNo: 0);
6206 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
6207 // Check the unsigned ImmArg.
6208 if (!isUInt<N>(CImm->getZExtValue())) {
6209 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6210 ": argument out of range.");
6211 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6212 }
6213
6214 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6215 SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
6216
6217 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask);
6218}
6219
6220template <unsigned N>
6221static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
6222 SDLoc DL(Node);
6223 EVT ResTy = Node->getValueType(ResNo: 0);
6224 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
6225 // Check the unsigned ImmArg.
6226 if (!isUInt<N>(CImm->getZExtValue())) {
6227 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6228 ": argument out of range.");
6229 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6230 }
6231
6232 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6233 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
6234 return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
6235}
6236
6237template <unsigned N>
6238static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
6239 SDLoc DL(Node);
6240 EVT ResTy = Node->getValueType(ResNo: 0);
6241 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
6242 // Check the unsigned ImmArg.
6243 if (!isUInt<N>(CImm->getZExtValue())) {
6244 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
6245 ": argument out of range.");
6246 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
6247 }
6248
6249 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6250 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
6251 return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
6252}
6253
6254template <unsigned W>
6255static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
6256 unsigned ResOp) {
6257 unsigned Imm = N->getConstantOperandVal(Num: 2);
6258 if (!isUInt<W>(Imm)) {
6259 const StringRef ErrorMsg = "argument out of range";
6260 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
6261 return DAG.getUNDEF(VT: N->getValueType(ResNo: 0));
6262 }
6263 SDLoc DL(N);
6264 SDValue Vec = N->getOperand(Num: 1);
6265 SDValue Idx = DAG.getConstant(Val: Imm, DL, VT: MVT::i32);
6266 SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
6267 return DAG.getNode(Opcode: ResOp, DL, VT: N->getValueType(ResNo: 0), N1: Vec, N2: Idx, N3: EltVT);
6268}
6269
6270static SDValue
6271performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
6272 TargetLowering::DAGCombinerInfo &DCI,
6273 const LoongArchSubtarget &Subtarget) {
6274 SDLoc DL(N);
6275 switch (N->getConstantOperandVal(Num: 0)) {
6276 default:
6277 break;
6278 case Intrinsic::loongarch_lsx_vadd_b:
6279 case Intrinsic::loongarch_lsx_vadd_h:
6280 case Intrinsic::loongarch_lsx_vadd_w:
6281 case Intrinsic::loongarch_lsx_vadd_d:
6282 case Intrinsic::loongarch_lasx_xvadd_b:
6283 case Intrinsic::loongarch_lasx_xvadd_h:
6284 case Intrinsic::loongarch_lasx_xvadd_w:
6285 case Intrinsic::loongarch_lasx_xvadd_d:
6286 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6287 N2: N->getOperand(Num: 2));
6288 case Intrinsic::loongarch_lsx_vaddi_bu:
6289 case Intrinsic::loongarch_lsx_vaddi_hu:
6290 case Intrinsic::loongarch_lsx_vaddi_wu:
6291 case Intrinsic::loongarch_lsx_vaddi_du:
6292 case Intrinsic::loongarch_lasx_xvaddi_bu:
6293 case Intrinsic::loongarch_lasx_xvaddi_hu:
6294 case Intrinsic::loongarch_lasx_xvaddi_wu:
6295 case Intrinsic::loongarch_lasx_xvaddi_du:
6296 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6297 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6298 case Intrinsic::loongarch_lsx_vsub_b:
6299 case Intrinsic::loongarch_lsx_vsub_h:
6300 case Intrinsic::loongarch_lsx_vsub_w:
6301 case Intrinsic::loongarch_lsx_vsub_d:
6302 case Intrinsic::loongarch_lasx_xvsub_b:
6303 case Intrinsic::loongarch_lasx_xvsub_h:
6304 case Intrinsic::loongarch_lasx_xvsub_w:
6305 case Intrinsic::loongarch_lasx_xvsub_d:
6306 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6307 N2: N->getOperand(Num: 2));
6308 case Intrinsic::loongarch_lsx_vsubi_bu:
6309 case Intrinsic::loongarch_lsx_vsubi_hu:
6310 case Intrinsic::loongarch_lsx_vsubi_wu:
6311 case Intrinsic::loongarch_lsx_vsubi_du:
6312 case Intrinsic::loongarch_lasx_xvsubi_bu:
6313 case Intrinsic::loongarch_lasx_xvsubi_hu:
6314 case Intrinsic::loongarch_lasx_xvsubi_wu:
6315 case Intrinsic::loongarch_lasx_xvsubi_du:
6316 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6317 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6318 case Intrinsic::loongarch_lsx_vneg_b:
6319 case Intrinsic::loongarch_lsx_vneg_h:
6320 case Intrinsic::loongarch_lsx_vneg_w:
6321 case Intrinsic::loongarch_lsx_vneg_d:
6322 case Intrinsic::loongarch_lasx_xvneg_b:
6323 case Intrinsic::loongarch_lasx_xvneg_h:
6324 case Intrinsic::loongarch_lasx_xvneg_w:
6325 case Intrinsic::loongarch_lasx_xvneg_d:
6326 return DAG.getNode(
6327 Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0),
6328 N1: DAG.getConstant(
6329 Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0,
6330 /*isSigned=*/true),
6331 DL: SDLoc(N), VT: N->getValueType(ResNo: 0)),
6332 N2: N->getOperand(Num: 1));
6333 case Intrinsic::loongarch_lsx_vmax_b:
6334 case Intrinsic::loongarch_lsx_vmax_h:
6335 case Intrinsic::loongarch_lsx_vmax_w:
6336 case Intrinsic::loongarch_lsx_vmax_d:
6337 case Intrinsic::loongarch_lasx_xvmax_b:
6338 case Intrinsic::loongarch_lasx_xvmax_h:
6339 case Intrinsic::loongarch_lasx_xvmax_w:
6340 case Intrinsic::loongarch_lasx_xvmax_d:
6341 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6342 N2: N->getOperand(Num: 2));
6343 case Intrinsic::loongarch_lsx_vmax_bu:
6344 case Intrinsic::loongarch_lsx_vmax_hu:
6345 case Intrinsic::loongarch_lsx_vmax_wu:
6346 case Intrinsic::loongarch_lsx_vmax_du:
6347 case Intrinsic::loongarch_lasx_xvmax_bu:
6348 case Intrinsic::loongarch_lasx_xvmax_hu:
6349 case Intrinsic::loongarch_lasx_xvmax_wu:
6350 case Intrinsic::loongarch_lasx_xvmax_du:
6351 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6352 N2: N->getOperand(Num: 2));
6353 case Intrinsic::loongarch_lsx_vmaxi_b:
6354 case Intrinsic::loongarch_lsx_vmaxi_h:
6355 case Intrinsic::loongarch_lsx_vmaxi_w:
6356 case Intrinsic::loongarch_lsx_vmaxi_d:
6357 case Intrinsic::loongarch_lasx_xvmaxi_b:
6358 case Intrinsic::loongarch_lasx_xvmaxi_h:
6359 case Intrinsic::loongarch_lasx_xvmaxi_w:
6360 case Intrinsic::loongarch_lasx_xvmaxi_d:
6361 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6362 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
6363 case Intrinsic::loongarch_lsx_vmaxi_bu:
6364 case Intrinsic::loongarch_lsx_vmaxi_hu:
6365 case Intrinsic::loongarch_lsx_vmaxi_wu:
6366 case Intrinsic::loongarch_lsx_vmaxi_du:
6367 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6368 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6369 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6370 case Intrinsic::loongarch_lasx_xvmaxi_du:
6371 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6372 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6373 case Intrinsic::loongarch_lsx_vmin_b:
6374 case Intrinsic::loongarch_lsx_vmin_h:
6375 case Intrinsic::loongarch_lsx_vmin_w:
6376 case Intrinsic::loongarch_lsx_vmin_d:
6377 case Intrinsic::loongarch_lasx_xvmin_b:
6378 case Intrinsic::loongarch_lasx_xvmin_h:
6379 case Intrinsic::loongarch_lasx_xvmin_w:
6380 case Intrinsic::loongarch_lasx_xvmin_d:
6381 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6382 N2: N->getOperand(Num: 2));
6383 case Intrinsic::loongarch_lsx_vmin_bu:
6384 case Intrinsic::loongarch_lsx_vmin_hu:
6385 case Intrinsic::loongarch_lsx_vmin_wu:
6386 case Intrinsic::loongarch_lsx_vmin_du:
6387 case Intrinsic::loongarch_lasx_xvmin_bu:
6388 case Intrinsic::loongarch_lasx_xvmin_hu:
6389 case Intrinsic::loongarch_lasx_xvmin_wu:
6390 case Intrinsic::loongarch_lasx_xvmin_du:
6391 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6392 N2: N->getOperand(Num: 2));
6393 case Intrinsic::loongarch_lsx_vmini_b:
6394 case Intrinsic::loongarch_lsx_vmini_h:
6395 case Intrinsic::loongarch_lsx_vmini_w:
6396 case Intrinsic::loongarch_lsx_vmini_d:
6397 case Intrinsic::loongarch_lasx_xvmini_b:
6398 case Intrinsic::loongarch_lasx_xvmini_h:
6399 case Intrinsic::loongarch_lasx_xvmini_w:
6400 case Intrinsic::loongarch_lasx_xvmini_d:
6401 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6402 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
6403 case Intrinsic::loongarch_lsx_vmini_bu:
6404 case Intrinsic::loongarch_lsx_vmini_hu:
6405 case Intrinsic::loongarch_lsx_vmini_wu:
6406 case Intrinsic::loongarch_lsx_vmini_du:
6407 case Intrinsic::loongarch_lasx_xvmini_bu:
6408 case Intrinsic::loongarch_lasx_xvmini_hu:
6409 case Intrinsic::loongarch_lasx_xvmini_wu:
6410 case Intrinsic::loongarch_lasx_xvmini_du:
6411 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6412 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6413 case Intrinsic::loongarch_lsx_vmul_b:
6414 case Intrinsic::loongarch_lsx_vmul_h:
6415 case Intrinsic::loongarch_lsx_vmul_w:
6416 case Intrinsic::loongarch_lsx_vmul_d:
6417 case Intrinsic::loongarch_lasx_xvmul_b:
6418 case Intrinsic::loongarch_lasx_xvmul_h:
6419 case Intrinsic::loongarch_lasx_xvmul_w:
6420 case Intrinsic::loongarch_lasx_xvmul_d:
6421 return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6422 N2: N->getOperand(Num: 2));
6423 case Intrinsic::loongarch_lsx_vmadd_b:
6424 case Intrinsic::loongarch_lsx_vmadd_h:
6425 case Intrinsic::loongarch_lsx_vmadd_w:
6426 case Intrinsic::loongarch_lsx_vmadd_d:
6427 case Intrinsic::loongarch_lasx_xvmadd_b:
6428 case Intrinsic::loongarch_lasx_xvmadd_h:
6429 case Intrinsic::loongarch_lasx_xvmadd_w:
6430 case Intrinsic::loongarch_lasx_xvmadd_d: {
6431 EVT ResTy = N->getValueType(ResNo: 0);
6432 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
6433 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
6434 N2: N->getOperand(Num: 3)));
6435 }
6436 case Intrinsic::loongarch_lsx_vmsub_b:
6437 case Intrinsic::loongarch_lsx_vmsub_h:
6438 case Intrinsic::loongarch_lsx_vmsub_w:
6439 case Intrinsic::loongarch_lsx_vmsub_d:
6440 case Intrinsic::loongarch_lasx_xvmsub_b:
6441 case Intrinsic::loongarch_lasx_xvmsub_h:
6442 case Intrinsic::loongarch_lasx_xvmsub_w:
6443 case Intrinsic::loongarch_lasx_xvmsub_d: {
6444 EVT ResTy = N->getValueType(ResNo: 0);
6445 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
6446 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
6447 N2: N->getOperand(Num: 3)));
6448 }
6449 case Intrinsic::loongarch_lsx_vdiv_b:
6450 case Intrinsic::loongarch_lsx_vdiv_h:
6451 case Intrinsic::loongarch_lsx_vdiv_w:
6452 case Intrinsic::loongarch_lsx_vdiv_d:
6453 case Intrinsic::loongarch_lasx_xvdiv_b:
6454 case Intrinsic::loongarch_lasx_xvdiv_h:
6455 case Intrinsic::loongarch_lasx_xvdiv_w:
6456 case Intrinsic::loongarch_lasx_xvdiv_d:
6457 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6458 N2: N->getOperand(Num: 2));
6459 case Intrinsic::loongarch_lsx_vdiv_bu:
6460 case Intrinsic::loongarch_lsx_vdiv_hu:
6461 case Intrinsic::loongarch_lsx_vdiv_wu:
6462 case Intrinsic::loongarch_lsx_vdiv_du:
6463 case Intrinsic::loongarch_lasx_xvdiv_bu:
6464 case Intrinsic::loongarch_lasx_xvdiv_hu:
6465 case Intrinsic::loongarch_lasx_xvdiv_wu:
6466 case Intrinsic::loongarch_lasx_xvdiv_du:
6467 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6468 N2: N->getOperand(Num: 2));
6469 case Intrinsic::loongarch_lsx_vmod_b:
6470 case Intrinsic::loongarch_lsx_vmod_h:
6471 case Intrinsic::loongarch_lsx_vmod_w:
6472 case Intrinsic::loongarch_lsx_vmod_d:
6473 case Intrinsic::loongarch_lasx_xvmod_b:
6474 case Intrinsic::loongarch_lasx_xvmod_h:
6475 case Intrinsic::loongarch_lasx_xvmod_w:
6476 case Intrinsic::loongarch_lasx_xvmod_d:
6477 return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6478 N2: N->getOperand(Num: 2));
6479 case Intrinsic::loongarch_lsx_vmod_bu:
6480 case Intrinsic::loongarch_lsx_vmod_hu:
6481 case Intrinsic::loongarch_lsx_vmod_wu:
6482 case Intrinsic::loongarch_lsx_vmod_du:
6483 case Intrinsic::loongarch_lasx_xvmod_bu:
6484 case Intrinsic::loongarch_lasx_xvmod_hu:
6485 case Intrinsic::loongarch_lasx_xvmod_wu:
6486 case Intrinsic::loongarch_lasx_xvmod_du:
6487 return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6488 N2: N->getOperand(Num: 2));
6489 case Intrinsic::loongarch_lsx_vand_v:
6490 case Intrinsic::loongarch_lasx_xvand_v:
6491 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6492 N2: N->getOperand(Num: 2));
6493 case Intrinsic::loongarch_lsx_vor_v:
6494 case Intrinsic::loongarch_lasx_xvor_v:
6495 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6496 N2: N->getOperand(Num: 2));
6497 case Intrinsic::loongarch_lsx_vxor_v:
6498 case Intrinsic::loongarch_lasx_xvxor_v:
6499 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6500 N2: N->getOperand(Num: 2));
6501 case Intrinsic::loongarch_lsx_vnor_v:
6502 case Intrinsic::loongarch_lasx_xvnor_v: {
6503 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6504 N2: N->getOperand(Num: 2));
6505 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
6506 }
6507 case Intrinsic::loongarch_lsx_vandi_b:
6508 case Intrinsic::loongarch_lasx_xvandi_b:
6509 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6510 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
6511 case Intrinsic::loongarch_lsx_vori_b:
6512 case Intrinsic::loongarch_lasx_xvori_b:
6513 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6514 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
6515 case Intrinsic::loongarch_lsx_vxori_b:
6516 case Intrinsic::loongarch_lasx_xvxori_b:
6517 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6518 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
6519 case Intrinsic::loongarch_lsx_vsll_b:
6520 case Intrinsic::loongarch_lsx_vsll_h:
6521 case Intrinsic::loongarch_lsx_vsll_w:
6522 case Intrinsic::loongarch_lsx_vsll_d:
6523 case Intrinsic::loongarch_lasx_xvsll_b:
6524 case Intrinsic::loongarch_lasx_xvsll_h:
6525 case Intrinsic::loongarch_lasx_xvsll_w:
6526 case Intrinsic::loongarch_lasx_xvsll_d:
6527 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6528 N2: truncateVecElts(Node: N, DAG));
6529 case Intrinsic::loongarch_lsx_vslli_b:
6530 case Intrinsic::loongarch_lasx_xvslli_b:
6531 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6532 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
6533 case Intrinsic::loongarch_lsx_vslli_h:
6534 case Intrinsic::loongarch_lasx_xvslli_h:
6535 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6536 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
6537 case Intrinsic::loongarch_lsx_vslli_w:
6538 case Intrinsic::loongarch_lasx_xvslli_w:
6539 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6540 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6541 case Intrinsic::loongarch_lsx_vslli_d:
6542 case Intrinsic::loongarch_lasx_xvslli_d:
6543 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6544 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
6545 case Intrinsic::loongarch_lsx_vsrl_b:
6546 case Intrinsic::loongarch_lsx_vsrl_h:
6547 case Intrinsic::loongarch_lsx_vsrl_w:
6548 case Intrinsic::loongarch_lsx_vsrl_d:
6549 case Intrinsic::loongarch_lasx_xvsrl_b:
6550 case Intrinsic::loongarch_lasx_xvsrl_h:
6551 case Intrinsic::loongarch_lasx_xvsrl_w:
6552 case Intrinsic::loongarch_lasx_xvsrl_d:
6553 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6554 N2: truncateVecElts(Node: N, DAG));
6555 case Intrinsic::loongarch_lsx_vsrli_b:
6556 case Intrinsic::loongarch_lasx_xvsrli_b:
6557 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6558 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
6559 case Intrinsic::loongarch_lsx_vsrli_h:
6560 case Intrinsic::loongarch_lasx_xvsrli_h:
6561 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6562 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
6563 case Intrinsic::loongarch_lsx_vsrli_w:
6564 case Intrinsic::loongarch_lasx_xvsrli_w:
6565 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6566 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6567 case Intrinsic::loongarch_lsx_vsrli_d:
6568 case Intrinsic::loongarch_lasx_xvsrli_d:
6569 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6570 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
6571 case Intrinsic::loongarch_lsx_vsra_b:
6572 case Intrinsic::loongarch_lsx_vsra_h:
6573 case Intrinsic::loongarch_lsx_vsra_w:
6574 case Intrinsic::loongarch_lsx_vsra_d:
6575 case Intrinsic::loongarch_lasx_xvsra_b:
6576 case Intrinsic::loongarch_lasx_xvsra_h:
6577 case Intrinsic::loongarch_lasx_xvsra_w:
6578 case Intrinsic::loongarch_lasx_xvsra_d:
6579 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6580 N2: truncateVecElts(Node: N, DAG));
6581 case Intrinsic::loongarch_lsx_vsrai_b:
6582 case Intrinsic::loongarch_lasx_xvsrai_b:
6583 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6584 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
6585 case Intrinsic::loongarch_lsx_vsrai_h:
6586 case Intrinsic::loongarch_lasx_xvsrai_h:
6587 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6588 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
6589 case Intrinsic::loongarch_lsx_vsrai_w:
6590 case Intrinsic::loongarch_lasx_xvsrai_w:
6591 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6592 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
6593 case Intrinsic::loongarch_lsx_vsrai_d:
6594 case Intrinsic::loongarch_lasx_xvsrai_d:
6595 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6596 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
6597 case Intrinsic::loongarch_lsx_vclz_b:
6598 case Intrinsic::loongarch_lsx_vclz_h:
6599 case Intrinsic::loongarch_lsx_vclz_w:
6600 case Intrinsic::loongarch_lsx_vclz_d:
6601 case Intrinsic::loongarch_lasx_xvclz_b:
6602 case Intrinsic::loongarch_lasx_xvclz_h:
6603 case Intrinsic::loongarch_lasx_xvclz_w:
6604 case Intrinsic::loongarch_lasx_xvclz_d:
6605 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
6606 case Intrinsic::loongarch_lsx_vpcnt_b:
6607 case Intrinsic::loongarch_lsx_vpcnt_h:
6608 case Intrinsic::loongarch_lsx_vpcnt_w:
6609 case Intrinsic::loongarch_lsx_vpcnt_d:
6610 case Intrinsic::loongarch_lasx_xvpcnt_b:
6611 case Intrinsic::loongarch_lasx_xvpcnt_h:
6612 case Intrinsic::loongarch_lasx_xvpcnt_w:
6613 case Intrinsic::loongarch_lasx_xvpcnt_d:
6614 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
6615 case Intrinsic::loongarch_lsx_vbitclr_b:
6616 case Intrinsic::loongarch_lsx_vbitclr_h:
6617 case Intrinsic::loongarch_lsx_vbitclr_w:
6618 case Intrinsic::loongarch_lsx_vbitclr_d:
6619 case Intrinsic::loongarch_lasx_xvbitclr_b:
6620 case Intrinsic::loongarch_lasx_xvbitclr_h:
6621 case Intrinsic::loongarch_lasx_xvbitclr_w:
6622 case Intrinsic::loongarch_lasx_xvbitclr_d:
6623 return lowerVectorBitClear(Node: N, DAG);
6624 case Intrinsic::loongarch_lsx_vbitclri_b:
6625 case Intrinsic::loongarch_lasx_xvbitclri_b:
6626 return lowerVectorBitClearImm<3>(Node: N, DAG);
6627 case Intrinsic::loongarch_lsx_vbitclri_h:
6628 case Intrinsic::loongarch_lasx_xvbitclri_h:
6629 return lowerVectorBitClearImm<4>(Node: N, DAG);
6630 case Intrinsic::loongarch_lsx_vbitclri_w:
6631 case Intrinsic::loongarch_lasx_xvbitclri_w:
6632 return lowerVectorBitClearImm<5>(Node: N, DAG);
6633 case Intrinsic::loongarch_lsx_vbitclri_d:
6634 case Intrinsic::loongarch_lasx_xvbitclri_d:
6635 return lowerVectorBitClearImm<6>(Node: N, DAG);
6636 case Intrinsic::loongarch_lsx_vbitset_b:
6637 case Intrinsic::loongarch_lsx_vbitset_h:
6638 case Intrinsic::loongarch_lsx_vbitset_w:
6639 case Intrinsic::loongarch_lsx_vbitset_d:
6640 case Intrinsic::loongarch_lasx_xvbitset_b:
6641 case Intrinsic::loongarch_lasx_xvbitset_h:
6642 case Intrinsic::loongarch_lasx_xvbitset_w:
6643 case Intrinsic::loongarch_lasx_xvbitset_d: {
6644 EVT VecTy = N->getValueType(ResNo: 0);
6645 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
6646 return DAG.getNode(
6647 Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
6648 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
6649 }
6650 case Intrinsic::loongarch_lsx_vbitseti_b:
6651 case Intrinsic::loongarch_lasx_xvbitseti_b:
6652 return lowerVectorBitSetImm<3>(Node: N, DAG);
6653 case Intrinsic::loongarch_lsx_vbitseti_h:
6654 case Intrinsic::loongarch_lasx_xvbitseti_h:
6655 return lowerVectorBitSetImm<4>(Node: N, DAG);
6656 case Intrinsic::loongarch_lsx_vbitseti_w:
6657 case Intrinsic::loongarch_lasx_xvbitseti_w:
6658 return lowerVectorBitSetImm<5>(Node: N, DAG);
6659 case Intrinsic::loongarch_lsx_vbitseti_d:
6660 case Intrinsic::loongarch_lasx_xvbitseti_d:
6661 return lowerVectorBitSetImm<6>(Node: N, DAG);
6662 case Intrinsic::loongarch_lsx_vbitrev_b:
6663 case Intrinsic::loongarch_lsx_vbitrev_h:
6664 case Intrinsic::loongarch_lsx_vbitrev_w:
6665 case Intrinsic::loongarch_lsx_vbitrev_d:
6666 case Intrinsic::loongarch_lasx_xvbitrev_b:
6667 case Intrinsic::loongarch_lasx_xvbitrev_h:
6668 case Intrinsic::loongarch_lasx_xvbitrev_w:
6669 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6670 EVT VecTy = N->getValueType(ResNo: 0);
6671 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
6672 return DAG.getNode(
6673 Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
6674 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
6675 }
6676 case Intrinsic::loongarch_lsx_vbitrevi_b:
6677 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6678 return lowerVectorBitRevImm<3>(Node: N, DAG);
6679 case Intrinsic::loongarch_lsx_vbitrevi_h:
6680 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6681 return lowerVectorBitRevImm<4>(Node: N, DAG);
6682 case Intrinsic::loongarch_lsx_vbitrevi_w:
6683 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6684 return lowerVectorBitRevImm<5>(Node: N, DAG);
6685 case Intrinsic::loongarch_lsx_vbitrevi_d:
6686 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6687 return lowerVectorBitRevImm<6>(Node: N, DAG);
6688 case Intrinsic::loongarch_lsx_vfadd_s:
6689 case Intrinsic::loongarch_lsx_vfadd_d:
6690 case Intrinsic::loongarch_lasx_xvfadd_s:
6691 case Intrinsic::loongarch_lasx_xvfadd_d:
6692 return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6693 N2: N->getOperand(Num: 2));
6694 case Intrinsic::loongarch_lsx_vfsub_s:
6695 case Intrinsic::loongarch_lsx_vfsub_d:
6696 case Intrinsic::loongarch_lasx_xvfsub_s:
6697 case Intrinsic::loongarch_lasx_xvfsub_d:
6698 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6699 N2: N->getOperand(Num: 2));
6700 case Intrinsic::loongarch_lsx_vfmul_s:
6701 case Intrinsic::loongarch_lsx_vfmul_d:
6702 case Intrinsic::loongarch_lasx_xvfmul_s:
6703 case Intrinsic::loongarch_lasx_xvfmul_d:
6704 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6705 N2: N->getOperand(Num: 2));
6706 case Intrinsic::loongarch_lsx_vfdiv_s:
6707 case Intrinsic::loongarch_lsx_vfdiv_d:
6708 case Intrinsic::loongarch_lasx_xvfdiv_s:
6709 case Intrinsic::loongarch_lasx_xvfdiv_d:
6710 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6711 N2: N->getOperand(Num: 2));
6712 case Intrinsic::loongarch_lsx_vfmadd_s:
6713 case Intrinsic::loongarch_lsx_vfmadd_d:
6714 case Intrinsic::loongarch_lasx_xvfmadd_s:
6715 case Intrinsic::loongarch_lasx_xvfmadd_d:
6716 return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
6717 N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3));
6718 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6719 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6720 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6721 N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget));
6722 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6723 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6724 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6725 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6726 N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget));
6727 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6728 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6729 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6730 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6731 N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget));
6732 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6733 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
6734 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
6735 N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget));
6736 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6737 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6738 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6739 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6740 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6741 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6742 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6743 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6744 return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: 0),
6745 Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
6746 Operand: N->getOperand(Num: 1)));
6747 case Intrinsic::loongarch_lsx_vreplve_b:
6748 case Intrinsic::loongarch_lsx_vreplve_h:
6749 case Intrinsic::loongarch_lsx_vreplve_w:
6750 case Intrinsic::loongarch_lsx_vreplve_d:
6751 case Intrinsic::loongarch_lasx_xvreplve_b:
6752 case Intrinsic::loongarch_lasx_xvreplve_h:
6753 case Intrinsic::loongarch_lasx_xvreplve_w:
6754 case Intrinsic::loongarch_lasx_xvreplve_d:
6755 return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0),
6756 N1: N->getOperand(Num: 1),
6757 N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
6758 Operand: N->getOperand(Num: 2)));
6759 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6760 if (!Subtarget.is64Bit())
6761 return lowerVectorPickVE2GR<4>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6762 break;
6763 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6764 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6765 if (!Subtarget.is64Bit())
6766 return lowerVectorPickVE2GR<3>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6767 break;
6768 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6769 if (!Subtarget.is64Bit())
6770 return lowerVectorPickVE2GR<2>(N, DAG, ResOp: LoongArchISD::VPICK_SEXT_ELT);
6771 break;
6772 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6773 if (!Subtarget.is64Bit())
6774 return lowerVectorPickVE2GR<4>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6775 break;
6776 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6777 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6778 if (!Subtarget.is64Bit())
6779 return lowerVectorPickVE2GR<3>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6780 break;
6781 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6782 if (!Subtarget.is64Bit())
6783 return lowerVectorPickVE2GR<2>(N, DAG, ResOp: LoongArchISD::VPICK_ZEXT_ELT);
6784 break;
6785 case Intrinsic::loongarch_lsx_bz_b:
6786 case Intrinsic::loongarch_lsx_bz_h:
6787 case Intrinsic::loongarch_lsx_bz_w:
6788 case Intrinsic::loongarch_lsx_bz_d:
6789 case Intrinsic::loongarch_lasx_xbz_b:
6790 case Intrinsic::loongarch_lasx_xbz_h:
6791 case Intrinsic::loongarch_lasx_xbz_w:
6792 case Intrinsic::loongarch_lasx_xbz_d:
6793 if (!Subtarget.is64Bit())
6794 return DAG.getNode(Opcode: LoongArchISD::VALL_ZERO, DL, VT: N->getValueType(ResNo: 0),
6795 Operand: N->getOperand(Num: 1));
6796 break;
6797 case Intrinsic::loongarch_lsx_bz_v:
6798 case Intrinsic::loongarch_lasx_xbz_v:
6799 if (!Subtarget.is64Bit())
6800 return DAG.getNode(Opcode: LoongArchISD::VANY_ZERO, DL, VT: N->getValueType(ResNo: 0),
6801 Operand: N->getOperand(Num: 1));
6802 break;
6803 case Intrinsic::loongarch_lsx_bnz_b:
6804 case Intrinsic::loongarch_lsx_bnz_h:
6805 case Intrinsic::loongarch_lsx_bnz_w:
6806 case Intrinsic::loongarch_lsx_bnz_d:
6807 case Intrinsic::loongarch_lasx_xbnz_b:
6808 case Intrinsic::loongarch_lasx_xbnz_h:
6809 case Intrinsic::loongarch_lasx_xbnz_w:
6810 case Intrinsic::loongarch_lasx_xbnz_d:
6811 if (!Subtarget.is64Bit())
6812 return DAG.getNode(Opcode: LoongArchISD::VALL_NONZERO, DL, VT: N->getValueType(ResNo: 0),
6813 Operand: N->getOperand(Num: 1));
6814 break;
6815 case Intrinsic::loongarch_lsx_bnz_v:
6816 case Intrinsic::loongarch_lasx_xbnz_v:
6817 if (!Subtarget.is64Bit())
6818 return DAG.getNode(Opcode: LoongArchISD::VANY_NONZERO, DL, VT: N->getValueType(ResNo: 0),
6819 Operand: N->getOperand(Num: 1));
6820 break;
6821 case Intrinsic::loongarch_lasx_concat_128_s:
6822 case Intrinsic::loongarch_lasx_concat_128_d:
6823 case Intrinsic::loongarch_lasx_concat_128:
6824 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: N->getValueType(ResNo: 0),
6825 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2));
6826 }
6827 return SDValue();
6828}
6829
6830static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
6831 TargetLowering::DAGCombinerInfo &DCI,
6832 const LoongArchSubtarget &Subtarget) {
6833 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6834 // conversion is unnecessary and can be replaced with the
6835 // MOVFR2GR_S_LA64 operand.
6836 SDValue Op0 = N->getOperand(Num: 0);
6837 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6838 return Op0.getOperand(i: 0);
6839 return SDValue();
6840}
6841
6842static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
6843 TargetLowering::DAGCombinerInfo &DCI,
6844 const LoongArchSubtarget &Subtarget) {
6845 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6846 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6847 // operand.
6848 SDValue Op0 = N->getOperand(Num: 0);
6849 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6850 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6851 "Unexpected value type!");
6852 return Op0.getOperand(i: 0);
6853 }
6854 return SDValue();
6855}
6856
6857static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
6858 TargetLowering::DAGCombinerInfo &DCI,
6859 const LoongArchSubtarget &Subtarget) {
6860 MVT VT = N->getSimpleValueType(ResNo: 0);
6861 unsigned NumBits = VT.getScalarSizeInBits();
6862
6863 // Simplify the inputs.
6864 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6865 APInt DemandedMask(APInt::getAllOnes(numBits: NumBits));
6866 if (TLI.SimplifyDemandedBits(Op: SDValue(N, 0), DemandedBits: DemandedMask, DCI))
6867 return SDValue(N, 0);
6868
6869 return SDValue();
6870}
6871
6872static SDValue
6873performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
6874 TargetLowering::DAGCombinerInfo &DCI,
6875 const LoongArchSubtarget &Subtarget) {
6876 SDValue Op0 = N->getOperand(Num: 0);
6877 SDLoc DL(N);
6878
6879 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6880 // redundant. Instead, use BuildPairF64's operands directly.
6881 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6882 return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1));
6883
6884 if (Op0->isUndef()) {
6885 SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
6886 SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
6887 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
6888 }
6889
6890 // It's cheaper to materialise two 32-bit integers than to load a double
6891 // from the constant pool and transfer it to integer registers through the
6892 // stack.
6893 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
6894 APInt V = C->getValueAPF().bitcastToAPInt();
6895 SDValue Lo = DAG.getConstant(Val: V.trunc(width: 32), DL, VT: MVT::i32);
6896 SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
6897 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
6898 }
6899
6900 return SDValue();
6901}
6902
6903/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6904static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
6905 TargetLowering::DAGCombinerInfo &DCI,
6906 const LoongArchSubtarget &Subtarget) {
6907 SDValue N0 = N->getOperand(Num: 0);
6908 SDValue N1 = N->getOperand(Num: 1);
6909 MVT VT = N->getSimpleValueType(ResNo: 0);
6910 SDLoc DL(N);
6911
6912 // VANDN(undef, x) -> 0
6913 // VANDN(x, undef) -> 0
6914 if (N0.isUndef() || N1.isUndef())
6915 return DAG.getConstant(Val: 0, DL, VT);
6916
6917 // VANDN(0, x) -> x
6918 if (ISD::isBuildVectorAllZeros(N: N0.getNode()))
6919 return N1;
6920
6921 // VANDN(x, 0) -> 0
6922 if (ISD::isBuildVectorAllZeros(N: N1.getNode()))
6923 return DAG.getConstant(Val: 0, DL, VT);
6924
6925 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6926 if (ISD::isBuildVectorAllOnes(N: N1.getNode()))
6927 return DAG.getNOT(DL, Val: N0, VT);
6928
6929 // Turn VANDN back to AND if input is inverted.
6930 if (SDValue Not = isNOT(V: N0, DAG))
6931 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: DAG.getBitcast(VT, V: Not), N2: N1);
6932
6933 // Folds for better commutativity:
6934 if (N1->hasOneUse()) {
6935 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6936 if (SDValue Not = isNOT(V: N1, DAG))
6937 return DAG.getNOT(
6938 DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)), VT);
6939
6940 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6941 // -> NOT(OR(x, SplatVector(-Imm))
6942 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6943 // gain benefits.
6944 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6945 N1.getOpcode() == ISD::BUILD_VECTOR) {
6946 if (SDValue SplatValue =
6947 cast<BuildVectorSDNode>(Val: N1.getNode())->getSplatValue()) {
6948 if (!N1->isOnlyUserOf(N: SplatValue.getNode()))
6949 return SDValue();
6950
6951 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatValue)) {
6952 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6953 SDValue Not =
6954 DAG.getSplat(VT, DL, Op: DAG.getTargetConstant(Val: NCVal, DL, VT: MVT::i8));
6955 return DAG.getNOT(
6956 DL, Val: DAG.getNode(Opcode: ISD::OR, DL, VT, N1: N0, N2: DAG.getBitcast(VT, V: Not)),
6957 VT);
6958 }
6959 }
6960 }
6961 }
6962
6963 return SDValue();
6964}
6965
6966static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
6967 TargetLowering::DAGCombinerInfo &DCI,
6968 const LoongArchSubtarget &Subtarget) {
6969 SDLoc DL(N);
6970 EVT VT = N->getValueType(ResNo: 0);
6971
6972 if (VT != MVT::f32 && VT != MVT::f64)
6973 return SDValue();
6974 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6975 return SDValue();
6976 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6977 return SDValue();
6978
6979 // Only optimize when the source and destination types have the same width.
6980 if (VT.getSizeInBits() != N->getOperand(Num: 0).getValueSizeInBits())
6981 return SDValue();
6982
6983 SDValue Src = N->getOperand(Num: 0);
6984 // If the result of an integer load is only used by an integer-to-float
6985 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6986 // (movgr2fr) instruction.
6987 if (ISD::isNormalLoad(N: Src.getNode()) && Src.hasOneUse() &&
6988 // Do not change the width of a volatile load. This condition check is
6989 // inspired by AArch64.
6990 !cast<LoadSDNode>(Val&: Src)->isVolatile()) {
6991 LoadSDNode *LN0 = cast<LoadSDNode>(Val&: Src);
6992 SDValue Load = DAG.getLoad(VT, dl: DL, Chain: LN0->getChain(), Ptr: LN0->getBasePtr(),
6993 PtrInfo: LN0->getPointerInfo(), Alignment: LN0->getAlign(),
6994 MMOFlags: LN0->getMemOperand()->getFlags());
6995
6996 // Make sure successors of the original load stay after it by updating them
6997 // to use the new Chain.
6998 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN0, 1), To: Load.getValue(R: 1));
6999 return DAG.getNode(Opcode: LoongArchISD::SITOF, DL: SDLoc(N), VT, Operand: Load);
7000 }
7001
7002 return SDValue();
7003}
7004
7005// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7006// logical operations, like in the example below.
7007// or (and (truncate x, truncate y)),
7008// (xor (truncate z, build_vector (constants)))
7009// Given a target type \p VT, we generate
7010// or (and x, y), (xor z, zext(build_vector (constants)))
7011// given x, y and z are of type \p VT. We can do so, if operands are either
7012// truncates from VT types, the second operand is a vector of constants, can
7013// be recursively promoted or is an existing extension we can extend further.
7014static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,
7015 SelectionDAG &DAG,
7016 const LoongArchSubtarget &Subtarget,
7017 unsigned Depth) {
7018 // Limit recursion to avoid excessive compile times.
7019 if (Depth >= SelectionDAG::MaxRecursionDepth)
7020 return SDValue();
7021
7022 if (!ISD::isBitwiseLogicOp(Opcode: N.getOpcode()))
7023 return SDValue();
7024
7025 SDValue N0 = N.getOperand(i: 0);
7026 SDValue N1 = N.getOperand(i: 1);
7027
7028 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7029 if (!TLI.isOperationLegalOrPromote(Op: N.getOpcode(), VT))
7030 return SDValue();
7031
7032 if (SDValue NN0 =
7033 PromoteMaskArithmetic(N: N0, DL, VT, DAG, Subtarget, Depth: Depth + 1))
7034 N0 = NN0;
7035 else {
7036 // The left side has to be a 'trunc'.
7037 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7038 N0.getOperand(i: 0).getValueType() == VT;
7039 if (LHSTrunc)
7040 N0 = N0.getOperand(i: 0);
7041 else
7042 return SDValue();
7043 }
7044
7045 if (SDValue NN1 =
7046 PromoteMaskArithmetic(N: N1, DL, VT, DAG, Subtarget, Depth: Depth + 1))
7047 N1 = NN1;
7048 else {
7049 // The right side has to be a 'trunc', a (foldable) constant or an
7050 // existing extension we can extend further.
7051 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7052 N1.getOperand(i: 0).getValueType() == VT;
7053 if (RHSTrunc)
7054 N1 = N1.getOperand(i: 0);
7055 else if (ISD::isExtVecInRegOpcode(Opcode: N1.getOpcode()) && VT.is256BitVector() &&
7056 Subtarget.hasExtLASX() && N1.hasOneUse())
7057 N1 = DAG.getNode(Opcode: N1.getOpcode(), DL, VT, Operand: N1.getOperand(i: 0));
7058 // On 32-bit platform, i64 is an illegal integer scalar type, and
7059 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7060 // future.
7061 else if (SDValue Cst =
7062 DAG.FoldConstantArithmetic(Opcode: ISD::ZERO_EXTEND, DL, VT, Ops: {N1}))
7063 N1 = Cst;
7064 else
7065 return SDValue();
7066 }
7067
7068 return DAG.getNode(Opcode: N.getOpcode(), DL, VT, N1: N0, N2: N1);
7069}
7070
7071// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7072// is LSX-sized register. In most cases we actually compare or select LASX-sized
7073// registers and mixing the two types creates horrible code. This method
7074// optimizes some of the transition sequences.
7075static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL,
7076 SelectionDAG &DAG,
7077 const LoongArchSubtarget &Subtarget) {
7078 EVT VT = N.getValueType();
7079 assert(VT.isVector() && "Expected vector type");
7080 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7081 N.getOpcode() == ISD::ZERO_EXTEND ||
7082 N.getOpcode() == ISD::SIGN_EXTEND) &&
7083 "Invalid Node");
7084
7085 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7086 return SDValue();
7087
7088 SDValue Narrow = N.getOperand(i: 0);
7089 EVT NarrowVT = Narrow.getValueType();
7090
7091 // Generate the wide operation.
7092 SDValue Op = PromoteMaskArithmetic(N: Narrow, DL, VT, DAG, Subtarget, Depth: 0);
7093 if (!Op)
7094 return SDValue();
7095 switch (N.getOpcode()) {
7096 default:
7097 llvm_unreachable("Unexpected opcode");
7098 case ISD::ANY_EXTEND:
7099 return Op;
7100 case ISD::ZERO_EXTEND:
7101 return DAG.getZeroExtendInReg(Op, DL, VT: NarrowVT);
7102 case ISD::SIGN_EXTEND:
7103 return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: Op,
7104 N2: DAG.getValueType(NarrowVT));
7105 }
7106}
7107
7108static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG,
7109 TargetLowering::DAGCombinerInfo &DCI,
7110 const LoongArchSubtarget &Subtarget) {
7111 EVT VT = N->getValueType(ResNo: 0);
7112 SDLoc DL(N);
7113
7114 if (VT.isVector())
7115 if (SDValue R = PromoteMaskArithmetic(N: SDValue(N, 0), DL, DAG, Subtarget))
7116 return R;
7117
7118 return SDValue();
7119}
7120
7121SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
7122 DAGCombinerInfo &DCI) const {
7123 SelectionDAG &DAG = DCI.DAG;
7124 switch (N->getOpcode()) {
7125 default:
7126 break;
7127 case ISD::AND:
7128 return performANDCombine(N, DAG, DCI, Subtarget);
7129 case ISD::OR:
7130 return performORCombine(N, DAG, DCI, Subtarget);
7131 case ISD::SETCC:
7132 return performSETCCCombine(N, DAG, DCI, Subtarget);
7133 case ISD::SRL:
7134 return performSRLCombine(N, DAG, DCI, Subtarget);
7135 case ISD::BITCAST:
7136 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7137 case ISD::ANY_EXTEND:
7138 case ISD::ZERO_EXTEND:
7139 case ISD::SIGN_EXTEND:
7140 return performEXTENDCombine(N, DAG, DCI, Subtarget);
7141 case ISD::SINT_TO_FP:
7142 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7143 case LoongArchISD::BITREV_W:
7144 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7145 case LoongArchISD::BR_CC:
7146 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7147 case LoongArchISD::SELECT_CC:
7148 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7149 case ISD::INTRINSIC_WO_CHAIN:
7150 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7151 case LoongArchISD::MOVGR2FR_W_LA64:
7152 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7153 case LoongArchISD::MOVFR2GR_S_LA64:
7154 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7155 case LoongArchISD::VMSKLTZ:
7156 case LoongArchISD::XVMSKLTZ:
7157 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7158 case LoongArchISD::SPLIT_PAIR_F64:
7159 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7160 case LoongArchISD::VANDN:
7161 return performVANDNCombine(N, DAG, DCI, Subtarget);
7162 }
7163 return SDValue();
7164}
7165
7166static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
7167 MachineBasicBlock *MBB) {
7168 if (!ZeroDivCheck)
7169 return MBB;
7170
7171 // Build instructions:
7172 // MBB:
7173 // div(or mod) $dst, $dividend, $divisor
7174 // bne $divisor, $zero, SinkMBB
7175 // BreakMBB:
7176 // break 7 // BRK_DIVZERO
7177 // SinkMBB:
7178 // fallthrough
7179 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7180 MachineFunction::iterator It = ++MBB->getIterator();
7181 MachineFunction *MF = MBB->getParent();
7182 auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
7183 auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
7184 MF->insert(MBBI: It, MBB: BreakMBB);
7185 MF->insert(MBBI: It, MBB: SinkMBB);
7186
7187 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7188 SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
7189 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
7190
7191 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7192 DebugLoc DL = MI.getDebugLoc();
7193 MachineOperand &Divisor = MI.getOperand(i: 2);
7194 Register DivisorReg = Divisor.getReg();
7195
7196 // MBB:
7197 BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE))
7198 .addReg(RegNo: DivisorReg, Flags: getKillRegState(B: Divisor.isKill()))
7199 .addReg(RegNo: LoongArch::R0)
7200 .addMBB(MBB: SinkMBB);
7201 MBB->addSuccessor(Succ: BreakMBB);
7202 MBB->addSuccessor(Succ: SinkMBB);
7203
7204 // BreakMBB:
7205 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7206 // definition of BRK_DIVZERO.
7207 BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/);
7208 BreakMBB->addSuccessor(Succ: SinkMBB);
7209
7210 // Clear Divisor's kill flag.
7211 Divisor.setIsKill(false);
7212
7213 return SinkMBB;
7214}
7215
7216static MachineBasicBlock *
7217emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
7218 const LoongArchSubtarget &Subtarget) {
7219 unsigned CondOpc;
7220 switch (MI.getOpcode()) {
7221 default:
7222 llvm_unreachable("Unexpected opcode");
7223 case LoongArch::PseudoVBZ:
7224 CondOpc = LoongArch::VSETEQZ_V;
7225 break;
7226 case LoongArch::PseudoVBZ_B:
7227 CondOpc = LoongArch::VSETANYEQZ_B;
7228 break;
7229 case LoongArch::PseudoVBZ_H:
7230 CondOpc = LoongArch::VSETANYEQZ_H;
7231 break;
7232 case LoongArch::PseudoVBZ_W:
7233 CondOpc = LoongArch::VSETANYEQZ_W;
7234 break;
7235 case LoongArch::PseudoVBZ_D:
7236 CondOpc = LoongArch::VSETANYEQZ_D;
7237 break;
7238 case LoongArch::PseudoVBNZ:
7239 CondOpc = LoongArch::VSETNEZ_V;
7240 break;
7241 case LoongArch::PseudoVBNZ_B:
7242 CondOpc = LoongArch::VSETALLNEZ_B;
7243 break;
7244 case LoongArch::PseudoVBNZ_H:
7245 CondOpc = LoongArch::VSETALLNEZ_H;
7246 break;
7247 case LoongArch::PseudoVBNZ_W:
7248 CondOpc = LoongArch::VSETALLNEZ_W;
7249 break;
7250 case LoongArch::PseudoVBNZ_D:
7251 CondOpc = LoongArch::VSETALLNEZ_D;
7252 break;
7253 case LoongArch::PseudoXVBZ:
7254 CondOpc = LoongArch::XVSETEQZ_V;
7255 break;
7256 case LoongArch::PseudoXVBZ_B:
7257 CondOpc = LoongArch::XVSETANYEQZ_B;
7258 break;
7259 case LoongArch::PseudoXVBZ_H:
7260 CondOpc = LoongArch::XVSETANYEQZ_H;
7261 break;
7262 case LoongArch::PseudoXVBZ_W:
7263 CondOpc = LoongArch::XVSETANYEQZ_W;
7264 break;
7265 case LoongArch::PseudoXVBZ_D:
7266 CondOpc = LoongArch::XVSETANYEQZ_D;
7267 break;
7268 case LoongArch::PseudoXVBNZ:
7269 CondOpc = LoongArch::XVSETNEZ_V;
7270 break;
7271 case LoongArch::PseudoXVBNZ_B:
7272 CondOpc = LoongArch::XVSETALLNEZ_B;
7273 break;
7274 case LoongArch::PseudoXVBNZ_H:
7275 CondOpc = LoongArch::XVSETALLNEZ_H;
7276 break;
7277 case LoongArch::PseudoXVBNZ_W:
7278 CondOpc = LoongArch::XVSETALLNEZ_W;
7279 break;
7280 case LoongArch::PseudoXVBNZ_D:
7281 CondOpc = LoongArch::XVSETALLNEZ_D;
7282 break;
7283 }
7284
7285 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7286 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7287 DebugLoc DL = MI.getDebugLoc();
7288 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7289 MachineFunction::iterator It = ++BB->getIterator();
7290
7291 MachineFunction *F = BB->getParent();
7292 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7293 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7294 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7295
7296 F->insert(MBBI: It, MBB: FalseBB);
7297 F->insert(MBBI: It, MBB: TrueBB);
7298 F->insert(MBBI: It, MBB: SinkBB);
7299
7300 // Transfer the remainder of MBB and its successor edges to Sink.
7301 SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
7302 SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
7303
7304 // Insert the real instruction to BB.
7305 Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
7306 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg());
7307
7308 // Insert branch.
7309 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
7310 BB->addSuccessor(Succ: FalseBB);
7311 BB->addSuccessor(Succ: TrueBB);
7312
7313 // FalseBB.
7314 Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7315 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
7316 .addReg(RegNo: LoongArch::R0)
7317 .addImm(Val: 0);
7318 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
7319 FalseBB->addSuccessor(Succ: SinkBB);
7320
7321 // TrueBB.
7322 Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7323 BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
7324 .addReg(RegNo: LoongArch::R0)
7325 .addImm(Val: 1);
7326 TrueBB->addSuccessor(Succ: SinkBB);
7327
7328 // SinkBB: merge the results.
7329 BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
7330 DestReg: MI.getOperand(i: 0).getReg())
7331 .addReg(RegNo: RD1)
7332 .addMBB(MBB: FalseBB)
7333 .addReg(RegNo: RD2)
7334 .addMBB(MBB: TrueBB);
7335
7336 // The pseudo instruction is gone now.
7337 MI.eraseFromParent();
7338 return SinkBB;
7339}
7340
7341static MachineBasicBlock *
7342emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
7343 const LoongArchSubtarget &Subtarget) {
7344 unsigned InsOp;
7345 unsigned BroadcastOp;
7346 unsigned HalfSize;
7347 switch (MI.getOpcode()) {
7348 default:
7349 llvm_unreachable("Unexpected opcode");
7350 case LoongArch::PseudoXVINSGR2VR_B:
7351 HalfSize = 16;
7352 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7353 InsOp = LoongArch::XVEXTRINS_B;
7354 break;
7355 case LoongArch::PseudoXVINSGR2VR_H:
7356 HalfSize = 8;
7357 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7358 InsOp = LoongArch::XVEXTRINS_H;
7359 break;
7360 }
7361 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7362 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7363 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7364 DebugLoc DL = MI.getDebugLoc();
7365 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7366 // XDst = vector_insert XSrc, Elt, Idx
7367 Register XDst = MI.getOperand(i: 0).getReg();
7368 Register XSrc = MI.getOperand(i: 1).getReg();
7369 Register Elt = MI.getOperand(i: 2).getReg();
7370 unsigned Idx = MI.getOperand(i: 3).getImm();
7371
7372 if (XSrc.isVirtual() && MRI.getVRegDef(Reg: XSrc)->isImplicitDef() &&
7373 Idx < HalfSize) {
7374 Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
7375 Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
7376
7377 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
7378 .addReg(RegNo: XSrc, Flags: {}, SubReg: LoongArch::sub_128);
7379 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7380 MCID: TII->get(Opcode: HalfSize == 8 ? LoongArch::VINSGR2VR_H
7381 : LoongArch::VINSGR2VR_B),
7382 DestReg: ScratchSubReg2)
7383 .addReg(RegNo: ScratchSubReg1)
7384 .addReg(RegNo: Elt)
7385 .addImm(Val: Idx);
7386
7387 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: XDst)
7388 .addReg(RegNo: ScratchSubReg2)
7389 .addImm(Val: LoongArch::sub_128);
7390 } else {
7391 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
7392 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
7393
7394 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BroadcastOp), DestReg: ScratchReg1).addReg(RegNo: Elt);
7395
7396 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg2)
7397 .addReg(RegNo: ScratchReg1)
7398 .addReg(RegNo: XSrc)
7399 .addImm(Val: Idx >= HalfSize ? 48 : 18);
7400
7401 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: XDst)
7402 .addReg(RegNo: XSrc)
7403 .addReg(RegNo: ScratchReg2)
7404 .addImm(Val: (Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7405 }
7406
7407 MI.eraseFromParent();
7408 return BB;
7409}
7410
7411static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
7412 MachineBasicBlock *BB,
7413 const LoongArchSubtarget &Subtarget) {
7414 assert(Subtarget.hasExtLSX());
7415 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7416 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7417 DebugLoc DL = MI.getDebugLoc();
7418 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7419 Register Dst = MI.getOperand(i: 0).getReg();
7420 Register Src = MI.getOperand(i: 1).getReg();
7421 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
7422 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
7423 Register ScratchReg3 = MRI.createVirtualRegister(RegClass: RC);
7424
7425 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VLDI), DestReg: ScratchReg1).addImm(Val: 0);
7426 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7427 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7428 : LoongArch::VINSGR2VR_W),
7429 DestReg: ScratchReg2)
7430 .addReg(RegNo: ScratchReg1)
7431 .addReg(RegNo: Src)
7432 .addImm(Val: 0);
7433 BuildMI(
7434 BB&: *BB, I&: MI, MIMD: DL,
7435 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7436 DestReg: ScratchReg3)
7437 .addReg(RegNo: ScratchReg2);
7438 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7439 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7440 : LoongArch::VPICKVE2GR_W),
7441 DestReg: Dst)
7442 .addReg(RegNo: ScratchReg3)
7443 .addImm(Val: 0);
7444
7445 MI.eraseFromParent();
7446 return BB;
7447}
7448
7449static MachineBasicBlock *
7450emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
7451 const LoongArchSubtarget &Subtarget) {
7452 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7453 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7454 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7455 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7456 Register Dst = MI.getOperand(i: 0).getReg();
7457 Register Src = MI.getOperand(i: 1).getReg();
7458 DebugLoc DL = MI.getDebugLoc();
7459 unsigned EleBits = 8;
7460 unsigned NotOpc = 0;
7461 unsigned MskOpc;
7462
7463 switch (MI.getOpcode()) {
7464 default:
7465 llvm_unreachable("Unexpected opcode");
7466 case LoongArch::PseudoVMSKLTZ_B:
7467 MskOpc = LoongArch::VMSKLTZ_B;
7468 break;
7469 case LoongArch::PseudoVMSKLTZ_H:
7470 MskOpc = LoongArch::VMSKLTZ_H;
7471 EleBits = 16;
7472 break;
7473 case LoongArch::PseudoVMSKLTZ_W:
7474 MskOpc = LoongArch::VMSKLTZ_W;
7475 EleBits = 32;
7476 break;
7477 case LoongArch::PseudoVMSKLTZ_D:
7478 MskOpc = LoongArch::VMSKLTZ_D;
7479 EleBits = 64;
7480 break;
7481 case LoongArch::PseudoVMSKGEZ_B:
7482 MskOpc = LoongArch::VMSKGEZ_B;
7483 break;
7484 case LoongArch::PseudoVMSKEQZ_B:
7485 MskOpc = LoongArch::VMSKNZ_B;
7486 NotOpc = LoongArch::VNOR_V;
7487 break;
7488 case LoongArch::PseudoVMSKNEZ_B:
7489 MskOpc = LoongArch::VMSKNZ_B;
7490 break;
7491 case LoongArch::PseudoXVMSKLTZ_B:
7492 MskOpc = LoongArch::XVMSKLTZ_B;
7493 RC = &LoongArch::LASX256RegClass;
7494 break;
7495 case LoongArch::PseudoXVMSKLTZ_H:
7496 MskOpc = LoongArch::XVMSKLTZ_H;
7497 RC = &LoongArch::LASX256RegClass;
7498 EleBits = 16;
7499 break;
7500 case LoongArch::PseudoXVMSKLTZ_W:
7501 MskOpc = LoongArch::XVMSKLTZ_W;
7502 RC = &LoongArch::LASX256RegClass;
7503 EleBits = 32;
7504 break;
7505 case LoongArch::PseudoXVMSKLTZ_D:
7506 MskOpc = LoongArch::XVMSKLTZ_D;
7507 RC = &LoongArch::LASX256RegClass;
7508 EleBits = 64;
7509 break;
7510 case LoongArch::PseudoXVMSKGEZ_B:
7511 MskOpc = LoongArch::XVMSKGEZ_B;
7512 RC = &LoongArch::LASX256RegClass;
7513 break;
7514 case LoongArch::PseudoXVMSKEQZ_B:
7515 MskOpc = LoongArch::XVMSKNZ_B;
7516 NotOpc = LoongArch::XVNOR_V;
7517 RC = &LoongArch::LASX256RegClass;
7518 break;
7519 case LoongArch::PseudoXVMSKNEZ_B:
7520 MskOpc = LoongArch::XVMSKNZ_B;
7521 RC = &LoongArch::LASX256RegClass;
7522 break;
7523 }
7524
7525 Register Msk = MRI.createVirtualRegister(RegClass: RC);
7526 if (NotOpc) {
7527 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
7528 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src);
7529 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk)
7530 .addReg(RegNo: Tmp, Flags: RegState::Kill)
7531 .addReg(RegNo: Tmp, Flags: RegState::Kill);
7532 } else {
7533 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src);
7534 }
7535
7536 if (TRI->getRegSizeInBits(RC: *RC) > 128) {
7537 Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7538 Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
7539 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo)
7540 .addReg(RegNo: Msk)
7541 .addImm(Val: 0);
7542 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi)
7543 .addReg(RegNo: Msk, Flags: RegState::Kill)
7544 .addImm(Val: 4);
7545 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
7546 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7547 : LoongArch::BSTRINS_W),
7548 DestReg: Dst)
7549 .addReg(RegNo: Lo, Flags: RegState::Kill)
7550 .addReg(RegNo: Hi, Flags: RegState::Kill)
7551 .addImm(Val: 256 / EleBits - 1)
7552 .addImm(Val: 128 / EleBits);
7553 } else {
7554 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst)
7555 .addReg(RegNo: Msk, Flags: RegState::Kill)
7556 .addImm(Val: 0);
7557 }
7558
7559 MI.eraseFromParent();
7560 return BB;
7561}
7562
7563static MachineBasicBlock *
7564emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
7565 const LoongArchSubtarget &Subtarget) {
7566 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7567 "Unexpected instruction");
7568
7569 MachineFunction &MF = *BB->getParent();
7570 DebugLoc DL = MI.getDebugLoc();
7571 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7572 Register LoReg = MI.getOperand(i: 0).getReg();
7573 Register HiReg = MI.getOperand(i: 1).getReg();
7574 Register SrcReg = MI.getOperand(i: 2).getReg();
7575
7576 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg);
7577 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg)
7578 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
7579 MI.eraseFromParent(); // The pseudo instruction is gone now.
7580 return BB;
7581}
7582
7583static MachineBasicBlock *
7584emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
7585 const LoongArchSubtarget &Subtarget) {
7586 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7587 "Unexpected instruction");
7588
7589 MachineFunction &MF = *BB->getParent();
7590 DebugLoc DL = MI.getDebugLoc();
7591 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
7592 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
7593 Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass);
7594 Register DstReg = MI.getOperand(i: 0).getReg();
7595 Register LoReg = MI.getOperand(i: 1).getReg();
7596 Register HiReg = MI.getOperand(i: 2).getReg();
7597
7598 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg)
7599 .addReg(RegNo: LoReg, Flags: getKillRegState(B: MI.getOperand(i: 1).isKill()));
7600 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg)
7601 .addReg(RegNo: TmpReg, Flags: RegState::Kill)
7602 .addReg(RegNo: HiReg, Flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
7603 MI.eraseFromParent(); // The pseudo instruction is gone now.
7604 return BB;
7605}
7606
7607static bool isSelectPseudo(MachineInstr &MI) {
7608 switch (MI.getOpcode()) {
7609 default:
7610 return false;
7611 case LoongArch::Select_GPR_Using_CC_GPR:
7612 return true;
7613 }
7614}
7615
7616static MachineBasicBlock *
7617emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
7618 const LoongArchSubtarget &Subtarget) {
7619 // To "insert" Select_* instructions, we actually have to insert the triangle
7620 // control-flow pattern. The incoming instructions know the destination vreg
7621 // to set, the condition code register to branch on, the true/false values to
7622 // select between, and the condcode to use to select the appropriate branch.
7623 //
7624 // We produce the following control flow:
7625 // HeadMBB
7626 // | \
7627 // | IfFalseMBB
7628 // | /
7629 // TailMBB
7630 //
7631 // When we find a sequence of selects we attempt to optimize their emission
7632 // by sharing the control flow. Currently we only handle cases where we have
7633 // multiple selects with the exact same condition (same LHS, RHS and CC).
7634 // The selects may be interleaved with other instructions if the other
7635 // instructions meet some requirements we deem safe:
7636 // - They are not pseudo instructions.
7637 // - They are debug instructions. Otherwise,
7638 // - They do not have side-effects, do not access memory and their inputs do
7639 // not depend on the results of the select pseudo-instructions.
7640 // The TrueV/FalseV operands of the selects cannot depend on the result of
7641 // previous selects in the sequence.
7642 // These conditions could be further relaxed. See the X86 target for a
7643 // related approach and more information.
7644
7645 Register LHS = MI.getOperand(i: 1).getReg();
7646 Register RHS;
7647 if (MI.getOperand(i: 2).isReg())
7648 RHS = MI.getOperand(i: 2).getReg();
7649 auto CC = static_cast<unsigned>(MI.getOperand(i: 3).getImm());
7650
7651 SmallVector<MachineInstr *, 4> SelectDebugValues;
7652 SmallSet<Register, 4> SelectDests;
7653 SelectDests.insert(V: MI.getOperand(i: 0).getReg());
7654
7655 MachineInstr *LastSelectPseudo = &MI;
7656 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7657 SequenceMBBI != E; ++SequenceMBBI) {
7658 if (SequenceMBBI->isDebugInstr())
7659 continue;
7660 if (isSelectPseudo(MI&: *SequenceMBBI)) {
7661 if (SequenceMBBI->getOperand(i: 1).getReg() != LHS ||
7662 !SequenceMBBI->getOperand(i: 2).isReg() ||
7663 SequenceMBBI->getOperand(i: 2).getReg() != RHS ||
7664 SequenceMBBI->getOperand(i: 3).getImm() != CC ||
7665 SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) ||
7666 SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg()))
7667 break;
7668 LastSelectPseudo = &*SequenceMBBI;
7669 SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues);
7670 SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg());
7671 continue;
7672 }
7673 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7674 SequenceMBBI->mayLoadOrStore() ||
7675 SequenceMBBI->usesCustomInsertionHook())
7676 break;
7677 if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) {
7678 return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
7679 }))
7680 break;
7681 }
7682
7683 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7684 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7685 DebugLoc DL = MI.getDebugLoc();
7686 MachineFunction::iterator I = ++BB->getIterator();
7687
7688 MachineBasicBlock *HeadMBB = BB;
7689 MachineFunction *F = BB->getParent();
7690 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7691 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
7692
7693 F->insert(MBBI: I, MBB: IfFalseMBB);
7694 F->insert(MBBI: I, MBB: TailMBB);
7695
7696 // Set the call frame size on entry to the new basic blocks.
7697 unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
7698 IfFalseMBB->setCallFrameSize(CallFrameSize);
7699 TailMBB->setCallFrameSize(CallFrameSize);
7700
7701 // Transfer debug instructions associated with the selects to TailMBB.
7702 for (MachineInstr *DebugInstr : SelectDebugValues) {
7703 TailMBB->push_back(MI: DebugInstr->removeFromParent());
7704 }
7705
7706 // Move all instructions after the sequence to TailMBB.
7707 TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
7708 From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
7709 // Update machine-CFG edges by transferring all successors of the current
7710 // block to the new block which will contain the Phi nodes for the selects.
7711 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
7712 // Set the successors for HeadMBB.
7713 HeadMBB->addSuccessor(Succ: IfFalseMBB);
7714 HeadMBB->addSuccessor(Succ: TailMBB);
7715
7716 // Insert appropriate branch.
7717 if (MI.getOperand(i: 2).isImm())
7718 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC))
7719 .addReg(RegNo: LHS)
7720 .addImm(Val: MI.getOperand(i: 2).getImm())
7721 .addMBB(MBB: TailMBB);
7722 else
7723 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB);
7724
7725 // IfFalseMBB just falls through to TailMBB.
7726 IfFalseMBB->addSuccessor(Succ: TailMBB);
7727
7728 // Create PHIs for all of the select pseudo-instructions.
7729 auto SelectMBBI = MI.getIterator();
7730 auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
7731 auto InsertionPoint = TailMBB->begin();
7732 while (SelectMBBI != SelectEnd) {
7733 auto Next = std::next(x: SelectMBBI);
7734 if (isSelectPseudo(MI&: *SelectMBBI)) {
7735 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7736 BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI->getDebugLoc(),
7737 MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI->getOperand(i: 0).getReg())
7738 .addReg(RegNo: SelectMBBI->getOperand(i: 4).getReg())
7739 .addMBB(MBB: HeadMBB)
7740 .addReg(RegNo: SelectMBBI->getOperand(i: 5).getReg())
7741 .addMBB(MBB: IfFalseMBB);
7742 SelectMBBI->eraseFromParent();
7743 }
7744 SelectMBBI = Next;
7745 }
7746
7747 F->getProperties().resetNoPHIs();
7748 return TailMBB;
7749}
7750
7751MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7752 MachineInstr &MI, MachineBasicBlock *BB) const {
7753 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7754 DebugLoc DL = MI.getDebugLoc();
7755
7756 switch (MI.getOpcode()) {
7757 default:
7758 llvm_unreachable("Unexpected instr type to insert");
7759 case LoongArch::DIV_W:
7760 case LoongArch::DIV_WU:
7761 case LoongArch::MOD_W:
7762 case LoongArch::MOD_WU:
7763 case LoongArch::DIV_D:
7764 case LoongArch::DIV_DU:
7765 case LoongArch::MOD_D:
7766 case LoongArch::MOD_DU:
7767 return insertDivByZeroTrap(MI, MBB: BB);
7768 break;
7769 case LoongArch::WRFCSR: {
7770 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
7771 DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm())
7772 .addReg(RegNo: MI.getOperand(i: 1).getReg());
7773 MI.eraseFromParent();
7774 return BB;
7775 }
7776 case LoongArch::RDFCSR: {
7777 MachineInstr *ReadFCSR =
7778 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
7779 DestReg: MI.getOperand(i: 0).getReg())
7780 .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm());
7781 ReadFCSR->getOperand(i: 1).setIsUndef();
7782 MI.eraseFromParent();
7783 return BB;
7784 }
7785 case LoongArch::Select_GPR_Using_CC_GPR:
7786 return emitSelectPseudo(MI, BB, Subtarget);
7787 case LoongArch::BuildPairF64Pseudo:
7788 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7789 case LoongArch::SplitPairF64Pseudo:
7790 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7791 case LoongArch::PseudoVBZ:
7792 case LoongArch::PseudoVBZ_B:
7793 case LoongArch::PseudoVBZ_H:
7794 case LoongArch::PseudoVBZ_W:
7795 case LoongArch::PseudoVBZ_D:
7796 case LoongArch::PseudoVBNZ:
7797 case LoongArch::PseudoVBNZ_B:
7798 case LoongArch::PseudoVBNZ_H:
7799 case LoongArch::PseudoVBNZ_W:
7800 case LoongArch::PseudoVBNZ_D:
7801 case LoongArch::PseudoXVBZ:
7802 case LoongArch::PseudoXVBZ_B:
7803 case LoongArch::PseudoXVBZ_H:
7804 case LoongArch::PseudoXVBZ_W:
7805 case LoongArch::PseudoXVBZ_D:
7806 case LoongArch::PseudoXVBNZ:
7807 case LoongArch::PseudoXVBNZ_B:
7808 case LoongArch::PseudoXVBNZ_H:
7809 case LoongArch::PseudoXVBNZ_W:
7810 case LoongArch::PseudoXVBNZ_D:
7811 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7812 case LoongArch::PseudoXVINSGR2VR_B:
7813 case LoongArch::PseudoXVINSGR2VR_H:
7814 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7815 case LoongArch::PseudoCTPOP:
7816 return emitPseudoCTPOP(MI, BB, Subtarget);
7817 case LoongArch::PseudoVMSKLTZ_B:
7818 case LoongArch::PseudoVMSKLTZ_H:
7819 case LoongArch::PseudoVMSKLTZ_W:
7820 case LoongArch::PseudoVMSKLTZ_D:
7821 case LoongArch::PseudoVMSKGEZ_B:
7822 case LoongArch::PseudoVMSKEQZ_B:
7823 case LoongArch::PseudoVMSKNEZ_B:
7824 case LoongArch::PseudoXVMSKLTZ_B:
7825 case LoongArch::PseudoXVMSKLTZ_H:
7826 case LoongArch::PseudoXVMSKLTZ_W:
7827 case LoongArch::PseudoXVMSKLTZ_D:
7828 case LoongArch::PseudoXVMSKGEZ_B:
7829 case LoongArch::PseudoXVMSKEQZ_B:
7830 case LoongArch::PseudoXVMSKNEZ_B:
7831 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7832 case TargetOpcode::STATEPOINT:
7833 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7834 // while bl call instruction (where statepoint will be lowered at the
7835 // end) has implicit def. This def is early-clobber as it will be set at
7836 // the moment of the call and earlier than any use is read.
7837 // Add this implicit dead def here as a workaround.
7838 MI.addOperand(MF&: *MI.getMF(),
7839 Op: MachineOperand::CreateReg(
7840 Reg: LoongArch::R1, /*isDef*/ true,
7841 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7842 /*isUndef*/ false, /*isEarlyClobber*/ true));
7843 if (!Subtarget.is64Bit())
7844 report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets");
7845 return emitPatchPoint(MI, MBB: BB);
7846 }
7847}
7848
7849bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
7850 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7851 unsigned *Fast) const {
7852 if (!Subtarget.hasUAL())
7853 return false;
7854
7855 // TODO: set reasonable speed number.
7856 if (Fast)
7857 *Fast = 1;
7858 return true;
7859}
7860
7861//===----------------------------------------------------------------------===//
7862// Calling Convention Implementation
7863//===----------------------------------------------------------------------===//
7864
7865// Eight general-purpose registers a0-a7 used for passing integer arguments,
7866// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7867// fixed-point arguments, and floating-point arguments when no FPR is available
7868// or with soft float ABI.
7869const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7870 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7871 LoongArch::R10, LoongArch::R11};
7872
7873// PreserveNone calling convention:
7874// Arguments may be passed in any general-purpose registers except:
7875// - R1 : return address register
7876// - R22 : frame pointer
7877// - R31 : base pointer
7878//
7879// All general-purpose registers are treated as caller-saved,
7880// except R1 (RA) and R22 (FP).
7881//
7882// Non-volatile registers are allocated first so that a function
7883// can call normal functions without having to spill and reload
7884// argument registers.
7885const MCPhysReg PreserveNoneArgGPRs[] = {
7886 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7887 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7888 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7889 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7890 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7891 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7892 LoongArch::R20};
7893
7894// Eight floating-point registers fa0-fa7 used for passing floating-point
7895// arguments, and fa0-fa1 are also used to return values.
7896const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7897 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7898 LoongArch::F6, LoongArch::F7};
7899// FPR32 and FPR64 alias each other.
7900const MCPhysReg ArgFPR64s[] = {
7901 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7902 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7903
7904const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7905 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7906 LoongArch::VR6, LoongArch::VR7};
7907
7908const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7909 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7910 LoongArch::XR6, LoongArch::XR7};
7911
7912static Register allocateArgGPR(CCState &State) {
7913 switch (State.getCallingConv()) {
7914 case CallingConv::PreserveNone:
7915 if (!State.isVarArg())
7916 return State.AllocateReg(Regs: PreserveNoneArgGPRs);
7917 [[fallthrough]];
7918 default:
7919 return State.AllocateReg(Regs: ArgGPRs);
7920 }
7921}
7922
7923// Pass a 2*GRLen argument that has been split into two GRLen values through
7924// registers or the stack as necessary.
7925static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7926 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7927 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7928 ISD::ArgFlagsTy ArgFlags2) {
7929 unsigned GRLenInBytes = GRLen / 8;
7930 if (Register Reg = allocateArgGPR(State)) {
7931 // At least one half can be passed via register.
7932 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg,
7933 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
7934 } else {
7935 // Both halves must be passed on the stack, with proper alignment.
7936 Align StackAlign =
7937 std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
7938 State.addLoc(
7939 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
7940 Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
7941 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
7942 State.addLoc(V: CCValAssign::getMem(
7943 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
7944 LocVT: LocVT2, HTP: CCValAssign::Full));
7945 return false;
7946 }
7947 if (Register Reg = allocateArgGPR(State)) {
7948 // The second half can also be passed via register.
7949 State.addLoc(
7950 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
7951 } else {
7952 // The second half is passed via the stack, without additional alignment.
7953 State.addLoc(V: CCValAssign::getMem(
7954 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
7955 LocVT: LocVT2, HTP: CCValAssign::Full));
7956 }
7957 return false;
7958}
7959
7960// Implements the LoongArch calling convention. Returns true upon failure.
7961static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
7962 unsigned ValNo, MVT ValVT,
7963 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7964 CCState &State, bool IsRet, Type *OrigTy) {
7965 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7966 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7967 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7968 MVT LocVT = ValVT;
7969
7970 // Any return value split into more than two values can't be returned
7971 // directly.
7972 if (IsRet && ValNo > 1)
7973 return true;
7974
7975 // If passing a variadic argument, or if no FPR is available.
7976 bool UseGPRForFloat = true;
7977
7978 switch (ABI) {
7979 default:
7980 llvm_unreachable("Unexpected ABI");
7981 break;
7982 case LoongArchABI::ABI_ILP32F:
7983 case LoongArchABI::ABI_LP64F:
7984 case LoongArchABI::ABI_ILP32D:
7985 case LoongArchABI::ABI_LP64D:
7986 UseGPRForFloat = ArgFlags.isVarArg();
7987 break;
7988 case LoongArchABI::ABI_ILP32S:
7989 case LoongArchABI::ABI_LP64S:
7990 break;
7991 }
7992
7993 // If this is a variadic argument, the LoongArch calling convention requires
7994 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7995 // byte alignment. An aligned register should be used regardless of whether
7996 // the original argument was split during legalisation or not. The argument
7997 // will not be passed by registers if the original type is larger than
7998 // 2*GRLen, so the register alignment rule does not apply.
7999 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8000 if (ArgFlags.isVarArg() &&
8001 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8002 DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
8003 unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
8004 // Skip 'odd' register if necessary.
8005 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8006 State.AllocateReg(Regs: ArgGPRs);
8007 }
8008
8009 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8010 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8011 State.getPendingArgFlags();
8012
8013 assert(PendingLocs.size() == PendingArgFlags.size() &&
8014 "PendingLocs and PendingArgFlags out of sync");
8015
8016 // FPR32 and FPR64 alias each other.
8017 if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
8018 UseGPRForFloat = true;
8019
8020 if (UseGPRForFloat && ValVT == MVT::f32) {
8021 LocVT = GRLenVT;
8022 LocInfo = CCValAssign::BCvt;
8023 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8024 LocVT = MVT::i64;
8025 LocInfo = CCValAssign::BCvt;
8026 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8027 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8028 // registers are exhausted.
8029 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8030 // Depending on available argument GPRS, f64 may be passed in a pair of
8031 // GPRs, split between a GPR and the stack, or passed completely on the
8032 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8033 // cases.
8034 MCRegister Reg = allocateArgGPR(State);
8035 if (!Reg) {
8036 int64_t StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8));
8037 State.addLoc(
8038 V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8039 return false;
8040 }
8041 LocVT = MVT::i32;
8042 State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8043 MCRegister HiReg = allocateArgGPR(State);
8044 if (HiReg) {
8045 State.addLoc(
8046 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo));
8047 } else {
8048 int64_t StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4));
8049 State.addLoc(
8050 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8051 }
8052 return false;
8053 }
8054
8055 // Split arguments might be passed indirectly, so keep track of the pending
8056 // values.
8057 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8058 LocVT = GRLenVT;
8059 LocInfo = CCValAssign::Indirect;
8060 PendingLocs.push_back(
8061 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
8062 PendingArgFlags.push_back(Elt: ArgFlags);
8063 if (!ArgFlags.isSplitEnd()) {
8064 return false;
8065 }
8066 }
8067
8068 // If the split argument only had two elements, it should be passed directly
8069 // in registers or on the stack.
8070 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8071 PendingLocs.size() <= 2) {
8072 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8073 // Apply the normal calling convention rules to the first half of the
8074 // split argument.
8075 CCValAssign VA = PendingLocs[0];
8076 ISD::ArgFlagsTy AF = PendingArgFlags[0];
8077 PendingLocs.clear();
8078 PendingArgFlags.clear();
8079 return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
8080 ArgFlags2: ArgFlags);
8081 }
8082
8083 // Allocate to a register if possible, or else a stack slot.
8084 Register Reg;
8085 unsigned StoreSizeBytes = GRLen / 8;
8086 Align StackAlign = Align(GRLen / 8);
8087
8088 if (ValVT == MVT::f32 && !UseGPRForFloat) {
8089 Reg = State.AllocateReg(Regs: ArgFPR32s);
8090 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8091 Reg = State.AllocateReg(Regs: ArgFPR64s);
8092 } else if (ValVT.is128BitVector()) {
8093 Reg = State.AllocateReg(Regs: ArgVRs);
8094 UseGPRForFloat = false;
8095 StoreSizeBytes = 16;
8096 StackAlign = Align(16);
8097 } else if (ValVT.is256BitVector()) {
8098 Reg = State.AllocateReg(Regs: ArgXRs);
8099 UseGPRForFloat = false;
8100 StoreSizeBytes = 32;
8101 StackAlign = Align(32);
8102 } else {
8103 Reg = allocateArgGPR(State);
8104 }
8105
8106 unsigned StackOffset =
8107 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
8108
8109 // If we reach this point and PendingLocs is non-empty, we must be at the
8110 // end of a split argument that must be passed indirectly.
8111 if (!PendingLocs.empty()) {
8112 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8113 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8114 for (auto &It : PendingLocs) {
8115 if (Reg)
8116 It.convertToReg(Reg);
8117 else
8118 It.convertToMem(Offset: StackOffset);
8119 State.addLoc(V: It);
8120 }
8121 PendingLocs.clear();
8122 PendingArgFlags.clear();
8123 return false;
8124 }
8125 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8126 "Expected an GRLenVT at this stage");
8127
8128 if (Reg) {
8129 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8130 return false;
8131 }
8132
8133 // When a floating-point value is passed on the stack, no bit-cast is needed.
8134 if (ValVT.isFloatingPoint()) {
8135 LocVT = ValVT;
8136 LocInfo = CCValAssign::Full;
8137 }
8138
8139 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
8140 return false;
8141}
8142
8143void LoongArchTargetLowering::analyzeInputArgs(
8144 MachineFunction &MF, CCState &CCInfo,
8145 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8146 LoongArchCCAssignFn Fn) const {
8147 FunctionType *FType = MF.getFunction().getFunctionType();
8148 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8149 MVT ArgVT = Ins[i].VT;
8150 Type *ArgTy = nullptr;
8151 if (IsRet)
8152 ArgTy = FType->getReturnType();
8153 else if (Ins[i].isOrigArg())
8154 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
8155 LoongArchABI::ABI ABI =
8156 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8157 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8158 CCInfo, IsRet, ArgTy)) {
8159 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8160 << '\n');
8161 llvm_unreachable("");
8162 }
8163 }
8164}
8165
8166void LoongArchTargetLowering::analyzeOutputArgs(
8167 MachineFunction &MF, CCState &CCInfo,
8168 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8169 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8170 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8171 MVT ArgVT = Outs[i].VT;
8172 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8173 LoongArchABI::ABI ABI =
8174 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8175 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8176 CCInfo, IsRet, OrigTy)) {
8177 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8178 << "\n");
8179 llvm_unreachable("");
8180 }
8181 }
8182}
8183
8184// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8185// values.
8186static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
8187 const CCValAssign &VA, const SDLoc &DL) {
8188 switch (VA.getLocInfo()) {
8189 default:
8190 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8191 case CCValAssign::Full:
8192 case CCValAssign::Indirect:
8193 break;
8194 case CCValAssign::BCvt:
8195 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8196 Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
8197 else
8198 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
8199 break;
8200 }
8201 return Val;
8202}
8203
8204static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
8205 const CCValAssign &VA, const SDLoc &DL,
8206 const ISD::InputArg &In,
8207 const LoongArchTargetLowering &TLI) {
8208 MachineFunction &MF = DAG.getMachineFunction();
8209 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8210 EVT LocVT = VA.getLocVT();
8211 SDValue Val;
8212 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
8213 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
8214 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
8215 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
8216
8217 // If input is sign extended from 32 bits, note it for the OptW pass.
8218 if (In.isOrigArg()) {
8219 Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
8220 if (OrigArg->getType()->isIntegerTy()) {
8221 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8222 // An input zero extended from i31 can also be considered sign extended.
8223 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8224 (BitWidth < 32 && In.Flags.isZExt())) {
8225 LoongArchMachineFunctionInfo *LAFI =
8226 MF.getInfo<LoongArchMachineFunctionInfo>();
8227 LAFI->addSExt32Register(Reg: VReg);
8228 }
8229 }
8230 }
8231
8232 return convertLocVTToValVT(DAG, Val, VA, DL);
8233}
8234
8235// The caller is responsible for loading the full value if the argument is
8236// passed with CCValAssign::Indirect.
8237static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
8238 const CCValAssign &VA, const SDLoc &DL) {
8239 MachineFunction &MF = DAG.getMachineFunction();
8240 MachineFrameInfo &MFI = MF.getFrameInfo();
8241 EVT ValVT = VA.getValVT();
8242 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
8243 /*IsImmutable=*/true);
8244 SDValue FIN = DAG.getFrameIndex(
8245 FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)));
8246
8247 ISD::LoadExtType ExtType;
8248 switch (VA.getLocInfo()) {
8249 default:
8250 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8251 case CCValAssign::Full:
8252 case CCValAssign::Indirect:
8253 case CCValAssign::BCvt:
8254 ExtType = ISD::NON_EXTLOAD;
8255 break;
8256 }
8257 return DAG.getExtLoad(
8258 ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
8259 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
8260}
8261
8262static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
8263 const CCValAssign &VA,
8264 const CCValAssign &HiVA,
8265 const SDLoc &DL) {
8266 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8267 "Unexpected VA");
8268 MachineFunction &MF = DAG.getMachineFunction();
8269 MachineFrameInfo &MFI = MF.getFrameInfo();
8270 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8271
8272 assert(VA.isRegLoc() && "Expected register VA assignment");
8273
8274 Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8275 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
8276 SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
8277 SDValue Hi;
8278 if (HiVA.isMemLoc()) {
8279 // Second half of f64 is passed on the stack.
8280 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(),
8281 /*IsImmutable=*/true);
8282 SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
8283 Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
8284 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
8285 } else {
8286 // Second half of f64 is passed in another GPR.
8287 Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
8288 RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
8289 Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
8290 }
8291 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
8292}
8293
8294static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
8295 const CCValAssign &VA, const SDLoc &DL) {
8296 EVT LocVT = VA.getLocVT();
8297
8298 switch (VA.getLocInfo()) {
8299 default:
8300 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8301 case CCValAssign::Full:
8302 break;
8303 case CCValAssign::BCvt:
8304 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8305 Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
8306 else
8307 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
8308 break;
8309 }
8310 return Val;
8311}
8312
8313static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8314 CCValAssign::LocInfo LocInfo,
8315 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8316 CCState &State) {
8317 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8318 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8319 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8320 static const MCPhysReg GPRList[] = {
8321 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8322 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8323 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8324 if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) {
8325 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8326 return false;
8327 }
8328 }
8329
8330 if (LocVT == MVT::f32) {
8331 // Pass in STG registers: F1, F2, F3, F4
8332 // fs0,fs1,fs2,fs3
8333 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8334 LoongArch::F26, LoongArch::F27};
8335 if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) {
8336 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8337 return false;
8338 }
8339 }
8340
8341 if (LocVT == MVT::f64) {
8342 // Pass in STG registers: D1, D2, D3, D4
8343 // fs4,fs5,fs6,fs7
8344 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8345 LoongArch::F30_64, LoongArch::F31_64};
8346 if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) {
8347 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
8348 return false;
8349 }
8350 }
8351
8352 report_fatal_error(reason: "No registers left in GHC calling convention");
8353 return true;
8354}
8355
8356// Transform physical registers into virtual registers.
8357SDValue LoongArchTargetLowering::LowerFormalArguments(
8358 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8359 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8360 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8361
8362 MachineFunction &MF = DAG.getMachineFunction();
8363 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8364
8365 switch (CallConv) {
8366 default:
8367 llvm_unreachable("Unsupported calling convention");
8368 case CallingConv::C:
8369 case CallingConv::Fast:
8370 case CallingConv::PreserveNone:
8371 case CallingConv::PreserveMost:
8372 break;
8373 case CallingConv::GHC:
8374 if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) ||
8375 !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
8376 report_fatal_error(
8377 reason: "GHC calling convention requires the F and D extensions");
8378 }
8379
8380 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8381 MVT GRLenVT = Subtarget.getGRLenVT();
8382 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8383 // Used with varargs to acumulate store chains.
8384 std::vector<SDValue> OutChains;
8385
8386 // Assign locations to all of the incoming arguments.
8387 SmallVector<CCValAssign> ArgLocs;
8388 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8389
8390 if (CallConv == CallingConv::GHC)
8391 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
8392 else
8393 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch);
8394
8395 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8396 CCValAssign &VA = ArgLocs[i];
8397 SDValue ArgValue;
8398 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8399 // case.
8400 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8401 assert(VA.needsCustom());
8402 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL);
8403 } else if (VA.isRegLoc())
8404 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this);
8405 else
8406 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8407 if (VA.getLocInfo() == CCValAssign::Indirect) {
8408 // If the original argument was split and passed by reference, we need to
8409 // load all parts of it here (using the same address).
8410 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
8411 PtrInfo: MachinePointerInfo()));
8412 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8413 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8414 assert(ArgPartOffset == 0);
8415 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8416 CCValAssign &PartVA = ArgLocs[i + 1];
8417 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8418 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
8419 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
8420 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
8421 PtrInfo: MachinePointerInfo()));
8422 ++i;
8423 ++InsIdx;
8424 }
8425 continue;
8426 }
8427 InVals.push_back(Elt: ArgValue);
8428 if (Ins[InsIdx].Flags.isByVal())
8429 LoongArchFI->addIncomingByValArgs(Val: ArgValue);
8430 }
8431
8432 if (IsVarArg) {
8433 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
8434 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
8435 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8436 MachineFrameInfo &MFI = MF.getFrameInfo();
8437 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8438
8439 // Offset of the first variable argument from stack pointer, and size of
8440 // the vararg save area. For now, the varargs save area is either zero or
8441 // large enough to hold a0-a7.
8442 int VaArgOffset, VarArgsSaveSize;
8443
8444 // If all registers are allocated, then all varargs must be passed on the
8445 // stack and we don't need to save any argregs.
8446 if (ArgRegs.size() == Idx) {
8447 VaArgOffset = CCInfo.getStackSize();
8448 VarArgsSaveSize = 0;
8449 } else {
8450 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8451 VaArgOffset = -VarArgsSaveSize;
8452 }
8453
8454 // Record the frame index of the first variable argument
8455 // which is a value necessary to VASTART.
8456 int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
8457 LoongArchFI->setVarArgsFrameIndex(FI);
8458
8459 // If saving an odd number of registers then create an extra stack slot to
8460 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8461 // offsets to even-numbered registered remain 2*GRLen-aligned.
8462 if (Idx % 2) {
8463 MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
8464 IsImmutable: true);
8465 VarArgsSaveSize += GRLenInBytes;
8466 }
8467
8468 // Copy the integer registers that may have been used for passing varargs
8469 // to the vararg save area.
8470 for (unsigned I = Idx; I < ArgRegs.size();
8471 ++I, VaArgOffset += GRLenInBytes) {
8472 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
8473 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
8474 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
8475 FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
8476 SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
8477 SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
8478 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
8479 cast<StoreSDNode>(Val: Store.getNode())
8480 ->getMemOperand()
8481 ->setValue((Value *)nullptr);
8482 OutChains.push_back(x: Store);
8483 }
8484 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8485 }
8486
8487 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8488
8489 // All stores are grouped in one node to allow the matching between
8490 // the size of Ins and InVals. This only happens for vararg functions.
8491 if (!OutChains.empty()) {
8492 OutChains.push_back(x: Chain);
8493 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
8494 }
8495
8496 return Chain;
8497}
8498
8499bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
8500 return CI->isTailCall();
8501}
8502
8503// Check if the return value is used as only a return value, as otherwise
8504// we can't perform a tail-call.
8505bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
8506 SDValue &Chain) const {
8507 if (N->getNumValues() != 1)
8508 return false;
8509 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
8510 return false;
8511
8512 SDNode *Copy = *N->user_begin();
8513 if (Copy->getOpcode() != ISD::CopyToReg)
8514 return false;
8515
8516 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8517 // isn't safe to perform a tail call.
8518 if (Copy->getGluedNode())
8519 return false;
8520
8521 // The copy must be used by a LoongArchISD::RET, and nothing else.
8522 bool HasRet = false;
8523 for (SDNode *Node : Copy->users()) {
8524 if (Node->getOpcode() != LoongArchISD::RET)
8525 return false;
8526 HasRet = true;
8527 }
8528
8529 if (!HasRet)
8530 return false;
8531
8532 Chain = Copy->getOperand(Num: 0);
8533 return true;
8534}
8535
8536// Check whether the call is eligible for tail call optimization.
8537bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8538 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8539 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8540
8541 auto CalleeCC = CLI.CallConv;
8542 auto &Outs = CLI.Outs;
8543 auto &Caller = MF.getFunction();
8544 auto CallerCC = Caller.getCallingConv();
8545 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8546
8547 // If the stack arguments for this call do not fit into our own save area then
8548 // the call cannot be made tail.
8549 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8550 return false;
8551
8552 // Do not tail call opt if any parameters need to be passed indirectly.
8553 for (auto &VA : ArgLocs)
8554 if (VA.getLocInfo() == CCValAssign::Indirect)
8555 return false;
8556
8557 // Do not tail call opt if either caller or callee uses struct return
8558 // semantics.
8559 auto IsCallerStructRet = Caller.hasStructRetAttr();
8560 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8561 if (IsCallerStructRet != IsCalleeStructRet)
8562 return false;
8563
8564 // Do not tail call opt if caller's and callee's byval arguments do not match.
8565 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8566 if (!Outs[i].Flags.isByVal())
8567 continue;
8568 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8569 return false;
8570 if (LoongArchFI->getIncomingByValArgs(Idx: i).getValueType() != Outs[i].ArgVT)
8571 return false;
8572 }
8573
8574 // The callee has to preserve all registers the caller needs to preserve.
8575 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8576 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8577 if (CalleeCC != CallerCC) {
8578 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8579 if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
8580 return false;
8581 }
8582
8583 // If the callee takes no arguments then go on to check the results of the
8584 // call.
8585 const MachineRegisterInfo &MRI = MF.getRegInfo();
8586 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8587 if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
8588 return false;
8589
8590 return true;
8591}
8592
8593static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
8594 return DAG.getDataLayout().getPrefTypeAlign(
8595 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
8596}
8597
8598// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8599// and output parameter nodes.
8600SDValue
8601LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
8602 SmallVectorImpl<SDValue> &InVals) const {
8603 SelectionDAG &DAG = CLI.DAG;
8604 SDLoc &DL = CLI.DL;
8605 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
8606 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8607 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
8608 SDValue Chain = CLI.Chain;
8609 SDValue Callee = CLI.Callee;
8610 CallingConv::ID CallConv = CLI.CallConv;
8611 bool IsVarArg = CLI.IsVarArg;
8612 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8613 MVT GRLenVT = Subtarget.getGRLenVT();
8614 bool &IsTailCall = CLI.IsTailCall;
8615
8616 MachineFunction &MF = DAG.getMachineFunction();
8617 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8618
8619 // Analyze the operands of the call, assigning locations to each operand.
8620 SmallVector<CCValAssign> ArgLocs;
8621 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8622
8623 if (CallConv == CallingConv::GHC)
8624 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
8625 else
8626 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch);
8627
8628 // Check if it's really possible to do a tail call.
8629 if (IsTailCall)
8630 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
8631
8632 if (IsTailCall)
8633 ++NumTailCalls;
8634 else if (CLI.CB && CLI.CB->isMustTailCall())
8635 report_fatal_error(reason: "failed to perform tail call elimination on a call "
8636 "site marked musttail");
8637
8638 // Get a count of how many bytes are to be pushed on the stack.
8639 unsigned NumBytes = ArgCCInfo.getStackSize();
8640
8641 // Create local copies for byval args.
8642 SmallVector<SDValue> ByValArgs;
8643 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8644 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8645 if (!Flags.isByVal())
8646 continue;
8647
8648 SDValue Arg = OutVals[i];
8649 unsigned Size = Flags.getByValSize();
8650 Align Alignment = Flags.getNonZeroByValAlign();
8651 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
8652 SDValue Dst;
8653
8654 if (IsTailCall) {
8655 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(Idx: j++);
8656 if (isa<GlobalAddressSDNode>(Val: Arg) || isa<ExternalSymbolSDNode>(Val: Arg) ||
8657 isa<FrameIndexSDNode>(Val: Arg))
8658 Dst = CallerArg;
8659 } else {
8660 int FI =
8661 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
8662 Dst = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
8663 }
8664 if (Dst) {
8665 Chain =
8666 DAG.getMemcpy(Chain, dl: DL, Dst, Src: Arg, Size: SizeNode, Alignment,
8667 /*IsVolatile=*/isVol: false,
8668 /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
8669 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
8670 ByValArgs.push_back(Elt: Dst);
8671 }
8672 }
8673
8674 if (!IsTailCall)
8675 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
8676
8677 // During a tail call, stores to the argument area must happen after all of
8678 // the function's incoming arguments have been loaded because they may alias.
8679 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8680 // there's no point in doing so repeatedly so this tracks whether that's
8681 // happened yet.
8682 bool AfterFormalArgLoads = false;
8683
8684 // Copy argument values to their designated locations.
8685 SmallVector<std::pair<Register, SDValue>> RegsToPass;
8686 SmallVector<SDValue> MemOpChains;
8687 SDValue StackPtr;
8688 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8689 ++i, ++OutIdx) {
8690 CCValAssign &VA = ArgLocs[i];
8691 SDValue ArgValue = OutVals[OutIdx];
8692 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8693
8694 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8695 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8696 assert(VA.isRegLoc() && "Expected register VA assignment");
8697 assert(VA.needsCustom());
8698 SDValue SplitF64 =
8699 DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
8700 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
8701 SDValue Lo = SplitF64.getValue(R: 0);
8702 SDValue Hi = SplitF64.getValue(R: 1);
8703
8704 Register RegLo = VA.getLocReg();
8705 RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
8706
8707 // Get the CCValAssign for the Hi part.
8708 CCValAssign &HiVA = ArgLocs[++i];
8709
8710 if (HiVA.isMemLoc()) {
8711 // Second half of f64 is passed on the stack.
8712 if (!StackPtr.getNode())
8713 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
8714 SDValue Address =
8715 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
8716 N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
8717 // Emit the store.
8718 MemOpChains.push_back(Elt: DAG.getStore(
8719 Chain, dl: DL, Val: Hi, Ptr: Address,
8720 PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
8721 } else {
8722 // Second half of f64 is passed in another GPR.
8723 Register RegHigh = HiVA.getLocReg();
8724 RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
8725 }
8726 continue;
8727 }
8728
8729 // Promote the value if needed.
8730 // For now, only handle fully promoted and indirect arguments.
8731 if (VA.getLocInfo() == CCValAssign::Indirect) {
8732 // Store the argument in a stack slot and pass its address.
8733 Align StackAlign =
8734 std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG),
8735 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
8736 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8737 // If the original argument was split and passed by reference, we need to
8738 // store the required parts of it here (and pass just one address).
8739 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8740 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8741 assert(ArgPartOffset == 0);
8742 // Calculate the total size to store. We don't have access to what we're
8743 // actually storing other than performing the loop and collecting the
8744 // info.
8745 SmallVector<std::pair<SDValue, SDValue>> Parts;
8746 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8747 SDValue PartValue = OutVals[OutIdx + 1];
8748 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8749 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
8750 EVT PartVT = PartValue.getValueType();
8751
8752 StoredSize += PartVT.getStoreSize();
8753 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
8754 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
8755 ++i;
8756 ++OutIdx;
8757 }
8758 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
8759 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
8760 MemOpChains.push_back(
8761 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
8762 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
8763 for (const auto &Part : Parts) {
8764 SDValue PartValue = Part.first;
8765 SDValue PartOffset = Part.second;
8766 SDValue Address =
8767 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
8768 MemOpChains.push_back(
8769 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
8770 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
8771 }
8772 ArgValue = SpillSlot;
8773 } else {
8774 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
8775 }
8776
8777 // Use local copy if it is a byval arg.
8778 if (Flags.isByVal()) {
8779 if (!IsTailCall || (isa<GlobalAddressSDNode>(Val: ArgValue) ||
8780 isa<ExternalSymbolSDNode>(Val: ArgValue) ||
8781 isa<FrameIndexSDNode>(Val: ArgValue)))
8782 ArgValue = ByValArgs[j++];
8783 }
8784
8785 if (VA.isRegLoc()) {
8786 // Queue up the argument copies and emit them at the end.
8787 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
8788 } else {
8789 assert(VA.isMemLoc() && "Argument not register or memory");
8790 SDValue DstAddr;
8791 MachinePointerInfo DstInfo;
8792 int32_t Offset = VA.getLocMemOffset();
8793
8794 // Work out the address of the stack slot.
8795 if (!StackPtr.getNode())
8796 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
8797
8798 if (IsTailCall) {
8799 unsigned OpSize = divideCeil(Numerator: VA.getValVT().getSizeInBits(), Denominator: 8);
8800 int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
8801 DstAddr = DAG.getFrameIndex(FI, VT: PtrVT);
8802 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8803 if (!AfterFormalArgLoads) {
8804 Chain = DAG.getStackArgumentTokenFactor(Chain);
8805 AfterFormalArgLoads = true;
8806 }
8807 } else {
8808 SDValue PtrOff = DAG.getIntPtrConstant(Val: Offset, DL);
8809 DstAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
8810 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8811 }
8812
8813 // Emit the store.
8814 MemOpChains.push_back(
8815 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: DstAddr, PtrInfo: DstInfo));
8816 }
8817 }
8818
8819 // Join the stores, which are independent of one another.
8820 if (!MemOpChains.empty())
8821 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
8822
8823 SDValue Glue;
8824
8825 // Build a sequence of copy-to-reg nodes, chained and glued together.
8826 for (auto &Reg : RegsToPass) {
8827 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
8828 Glue = Chain.getValue(R: 1);
8829 }
8830
8831 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8832 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8833 // split it and then direct call can be matched by PseudoCALL_SMALL.
8834 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
8835 const GlobalValue *GV = S->getGlobal();
8836 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8837 ? LoongArchII::MO_CALL
8838 : LoongArchII::MO_CALL_PLT;
8839 Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags);
8840 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
8841 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
8842 ? LoongArchII::MO_CALL
8843 : LoongArchII::MO_CALL_PLT;
8844 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
8845 }
8846
8847 // The first call operand is the chain and the second is the target address.
8848 SmallVector<SDValue> Ops;
8849 Ops.push_back(Elt: Chain);
8850 Ops.push_back(Elt: Callee);
8851
8852 // Add argument registers to the end of the list so that they are
8853 // known live into the call.
8854 for (auto &Reg : RegsToPass)
8855 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
8856
8857 if (!IsTailCall) {
8858 // Add a register mask operand representing the call-preserved registers.
8859 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8860 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8861 assert(Mask && "Missing call preserved mask for calling convention");
8862 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
8863 }
8864
8865 // Glue the call to the argument copies, if any.
8866 if (Glue.getNode())
8867 Ops.push_back(Elt: Glue);
8868
8869 // Emit the call.
8870 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
8871 unsigned Op;
8872 switch (DAG.getTarget().getCodeModel()) {
8873 default:
8874 report_fatal_error(reason: "Unsupported code model");
8875 case CodeModel::Small:
8876 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8877 break;
8878 case CodeModel::Medium:
8879 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8880 break;
8881 case CodeModel::Large:
8882 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8883 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8884 break;
8885 }
8886
8887 if (IsTailCall) {
8888 MF.getFrameInfo().setHasTailCall();
8889 SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
8890 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
8891 return Ret;
8892 }
8893
8894 Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
8895 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
8896 Glue = Chain.getValue(R: 1);
8897
8898 // Mark the end of the call, which is glued to the call itself.
8899 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
8900 Glue = Chain.getValue(R: 1);
8901
8902 // Assign locations to each value returned by this call.
8903 SmallVector<CCValAssign> RVLocs;
8904 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8905 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch);
8906
8907 // Copy all of the result registers out of their specified physreg.
8908 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8909 auto &VA = RVLocs[i];
8910 // Copy the value out.
8911 SDValue RetValue =
8912 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
8913 // Glue the RetValue to the end of the call sequence.
8914 Chain = RetValue.getValue(R: 1);
8915 Glue = RetValue.getValue(R: 2);
8916
8917 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8918 assert(VA.needsCustom());
8919 SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs[++i].getLocReg(),
8920 VT: MVT::i32, Glue);
8921 Chain = RetValue2.getValue(R: 1);
8922 Glue = RetValue2.getValue(R: 2);
8923 RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64,
8924 N1: RetValue, N2: RetValue2);
8925 } else
8926 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
8927
8928 InVals.push_back(Elt: RetValue);
8929 }
8930
8931 return Chain;
8932}
8933
8934bool LoongArchTargetLowering::CanLowerReturn(
8935 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8936 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8937 const Type *RetTy) const {
8938 SmallVector<CCValAssign> RVLocs;
8939 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8940
8941 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8942 LoongArchABI::ABI ABI =
8943 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8944 if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full,
8945 ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsRet=*/true, OrigTy: nullptr))
8946 return false;
8947 }
8948 return true;
8949}
8950
8951SDValue LoongArchTargetLowering::LowerReturn(
8952 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8953 const SmallVectorImpl<ISD::OutputArg> &Outs,
8954 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8955 SelectionDAG &DAG) const {
8956 // Stores the assignment of the return value to a location.
8957 SmallVector<CCValAssign> RVLocs;
8958
8959 // Info about the registers and stack slot.
8960 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8961 *DAG.getContext());
8962
8963 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8964 CLI: nullptr, Fn: CC_LoongArch);
8965 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8966 report_fatal_error(reason: "GHC functions return void only");
8967 SDValue Glue;
8968 SmallVector<SDValue, 4> RetOps(1, Chain);
8969
8970 // Copy the result values into the output registers.
8971 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8972 SDValue Val = OutVals[OutIdx];
8973 CCValAssign &VA = RVLocs[i];
8974 assert(VA.isRegLoc() && "Can only return in registers!");
8975
8976 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8977 // Handle returning f64 on LA32D with a soft float ABI.
8978 assert(VA.isRegLoc() && "Expected return via registers");
8979 assert(VA.needsCustom());
8980 SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
8981 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
8982 SDValue Lo = SplitF64.getValue(R: 0);
8983 SDValue Hi = SplitF64.getValue(R: 1);
8984 Register RegLo = VA.getLocReg();
8985 Register RegHi = RVLocs[++i].getLocReg();
8986
8987 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
8988 Glue = Chain.getValue(R: 1);
8989 RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
8990 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
8991 Glue = Chain.getValue(R: 1);
8992 RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
8993 } else {
8994 // Handle a 'normal' return.
8995 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8996 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
8997
8998 // Guarantee that all emitted copies are stuck together.
8999 Glue = Chain.getValue(R: 1);
9000 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
9001 }
9002 }
9003
9004 RetOps[0] = Chain; // Update chain.
9005
9006 // Add the glue node if we have it.
9007 if (Glue.getNode())
9008 RetOps.push_back(Elt: Glue);
9009
9010 return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
9011}
9012
9013// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9014// Note: The following prefixes are excluded:
9015// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9016// as they can be represented using [x]vrepli.[whb]
9017std::pair<bool, uint64_t> LoongArchTargetLowering::isImmVLDILegalForMode1(
9018 const APInt &SplatValue, const unsigned SplatBitSize) const {
9019 uint64_t RequiredImm = 0;
9020 uint64_t V = SplatValue.getZExtValue();
9021 if (SplatBitSize == 16 && !(V & 0x00FF)) {
9022 // 4'b0101
9023 RequiredImm = (0b10101 << 8) | (V >> 8);
9024 return {true, RequiredImm};
9025 } else if (SplatBitSize == 32) {
9026 // 4'b0001
9027 if (!(V & 0xFFFF00FF)) {
9028 RequiredImm = (0b10001 << 8) | (V >> 8);
9029 return {true, RequiredImm};
9030 }
9031 // 4'b0010
9032 if (!(V & 0xFF00FFFF)) {
9033 RequiredImm = (0b10010 << 8) | (V >> 16);
9034 return {true, RequiredImm};
9035 }
9036 // 4'b0011
9037 if (!(V & 0x00FFFFFF)) {
9038 RequiredImm = (0b10011 << 8) | (V >> 24);
9039 return {true, RequiredImm};
9040 }
9041 // 4'b0110
9042 if ((V & 0xFFFF00FF) == 0xFF) {
9043 RequiredImm = (0b10110 << 8) | (V >> 8);
9044 return {true, RequiredImm};
9045 }
9046 // 4'b0111
9047 if ((V & 0xFF00FFFF) == 0xFFFF) {
9048 RequiredImm = (0b10111 << 8) | (V >> 16);
9049 return {true, RequiredImm};
9050 }
9051 // 4'b1010
9052 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
9053 RequiredImm =
9054 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9055 return {true, RequiredImm};
9056 }
9057 } else if (SplatBitSize == 64) {
9058 // 4'b1011
9059 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
9060 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
9061 RequiredImm =
9062 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9063 return {true, RequiredImm};
9064 }
9065 // 4'b1100
9066 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
9067 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
9068 RequiredImm =
9069 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
9070 return {true, RequiredImm};
9071 }
9072 // 4'b1001
9073 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9074 uint8_t res = 0;
9075 for (int i = 0; i < 8; ++i) {
9076 uint8_t byte = x & 0xFF;
9077 if (byte == 0 || byte == 0xFF)
9078 res |= ((byte & 1) << i);
9079 else
9080 return {false, 0};
9081 x >>= 8;
9082 }
9083 return {true, res};
9084 };
9085 auto [IsSame, Suffix] = sameBitsPreByte(V);
9086 if (IsSame) {
9087 RequiredImm = (0b11001 << 8) | Suffix;
9088 return {true, RequiredImm};
9089 }
9090 }
9091 return {false, RequiredImm};
9092}
9093
9094bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
9095 EVT VT) const {
9096 if (!Subtarget.hasExtLSX())
9097 return false;
9098
9099 if (VT == MVT::f32) {
9100 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
9101 return (masked == 0x3e000000 || masked == 0x40000000);
9102 }
9103
9104 if (VT == MVT::f64) {
9105 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
9106 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
9107 }
9108
9109 return false;
9110}
9111
9112bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9113 bool ForCodeSize) const {
9114 // TODO: Maybe need more checks here after vector extension is supported.
9115 if (VT == MVT::f32 && !Subtarget.hasBasicF())
9116 return false;
9117 if (VT == MVT::f64 && !Subtarget.hasBasicD())
9118 return false;
9119 return (Imm.isZero() || Imm.isExactlyValue(V: 1.0) || isFPImmVLDILegal(Imm, VT));
9120}
9121
9122bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
9123 return true;
9124}
9125
9126bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
9127 return true;
9128}
9129
9130bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9131 const Instruction *I) const {
9132 if (!Subtarget.is64Bit())
9133 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I);
9134
9135 if (isa<LoadInst>(Val: I))
9136 return true;
9137
9138 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9139 // require fences beacuse we can use amswap_db.[w/d].
9140 Type *Ty = I->getOperand(i: 0)->getType();
9141 if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) {
9142 unsigned Size = Ty->getIntegerBitWidth();
9143 return (Size == 8 || Size == 16);
9144 }
9145
9146 return false;
9147}
9148
9149EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
9150 LLVMContext &Context,
9151 EVT VT) const {
9152 if (!VT.isVector())
9153 return getPointerTy(DL);
9154 return VT.changeVectorElementTypeToInteger();
9155}
9156
9157bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
9158 EVT VT = Y.getValueType();
9159
9160 if (VT.isVector())
9161 return Subtarget.hasExtLSX() && VT.isInteger();
9162
9163 return VT.isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
9164}
9165
9166void LoongArchTargetLowering::getTgtMemIntrinsic(
9167 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
9168 MachineFunction &MF, unsigned Intrinsic) const {
9169 switch (Intrinsic) {
9170 default:
9171 return;
9172 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9173 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9174 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9175 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9176 IntrinsicInfo Info;
9177 Info.opc = ISD::INTRINSIC_W_CHAIN;
9178 Info.memVT = MVT::i32;
9179 Info.ptrVal = I.getArgOperand(i: 0);
9180 Info.offset = 0;
9181 Info.align = Align(4);
9182 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
9183 MachineMemOperand::MOVolatile;
9184 Infos.push_back(Elt: Info);
9185 return;
9186 // TODO: Add more Intrinsics later.
9187 }
9188 }
9189}
9190
9191// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9192// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9193// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9194// regression, we need to implement it manually.
9195void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
9196 AtomicRMWInst::BinOp Op = AI->getOperation();
9197
9198 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
9199 Op == AtomicRMWInst::And) &&
9200 "Unable to expand");
9201 unsigned MinWordSize = 4;
9202
9203 IRBuilder<> Builder(AI);
9204 LLVMContext &Ctx = Builder.getContext();
9205 const DataLayout &DL = AI->getDataLayout();
9206 Type *ValueType = AI->getType();
9207 Type *WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize * 8);
9208
9209 Value *Addr = AI->getPointerOperand();
9210 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
9211 IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
9212
9213 Value *AlignedAddr = Builder.CreateIntrinsic(
9214 ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
9215 Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, FMFSource: nullptr,
9216 Name: "AlignedAddr");
9217
9218 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
9219 Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
9220 Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
9221 ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt");
9222 Value *Mask = Builder.CreateShl(
9223 LHS: ConstantInt::get(Ty: WordType,
9224 V: (1 << (DL.getTypeStoreSize(Ty: ValueType) * 8)) - 1),
9225 RHS: ShiftAmt, Name: "Mask");
9226 Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask");
9227 Value *ValOperand_Shifted =
9228 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType),
9229 RHS: ShiftAmt, Name: "ValOperand_Shifted");
9230 Value *NewOperand;
9231 if (Op == AtomicRMWInst::And)
9232 NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand");
9233 else
9234 NewOperand = ValOperand_Shifted;
9235
9236 AtomicRMWInst *NewAI =
9237 Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align(MinWordSize),
9238 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
9239
9240 Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted");
9241 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted");
9242 Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType);
9243 AI->replaceAllUsesWith(V: FinalOldResult);
9244 AI->eraseFromParent();
9245}
9246
9247TargetLowering::AtomicExpansionKind
9248LoongArchTargetLowering::shouldExpandAtomicRMWInIR(
9249 const AtomicRMWInst *AI) const {
9250 // TODO: Add more AtomicRMWInst that needs to be extended.
9251
9252 // Since floating-point operation requires a non-trivial set of data
9253 // operations, use CmpXChg to expand.
9254 if (AI->isFloatingPointOperation() ||
9255 AI->getOperation() == AtomicRMWInst::UIncWrap ||
9256 AI->getOperation() == AtomicRMWInst::UDecWrap ||
9257 AI->getOperation() == AtomicRMWInst::USubCond ||
9258 AI->getOperation() == AtomicRMWInst::USubSat)
9259 return AtomicExpansionKind::CmpXChg;
9260
9261 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9262 (AI->getOperation() == AtomicRMWInst::Xchg ||
9263 AI->getOperation() == AtomicRMWInst::Add ||
9264 AI->getOperation() == AtomicRMWInst::Sub)) {
9265 return AtomicExpansionKind::None;
9266 }
9267
9268 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9269 if (Subtarget.hasLAMCAS()) {
9270 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9271 AI->getOperation() == AtomicRMWInst::Or ||
9272 AI->getOperation() == AtomicRMWInst::Xor))
9273 return AtomicExpansionKind::CustomExpand;
9274 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9275 return AtomicExpansionKind::CmpXChg;
9276 }
9277
9278 if (Size == 8 || Size == 16)
9279 return AtomicExpansionKind::MaskedIntrinsic;
9280 return AtomicExpansionKind::None;
9281}
9282
9283static Intrinsic::ID
9284getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
9285 AtomicRMWInst::BinOp BinOp) {
9286 if (GRLen == 64) {
9287 switch (BinOp) {
9288 default:
9289 llvm_unreachable("Unexpected AtomicRMW BinOp");
9290 case AtomicRMWInst::Xchg:
9291 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9292 case AtomicRMWInst::Add:
9293 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9294 case AtomicRMWInst::Sub:
9295 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9296 case AtomicRMWInst::Nand:
9297 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9298 case AtomicRMWInst::UMax:
9299 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9300 case AtomicRMWInst::UMin:
9301 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9302 case AtomicRMWInst::Max:
9303 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9304 case AtomicRMWInst::Min:
9305 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9306 // TODO: support other AtomicRMWInst.
9307 }
9308 }
9309
9310 if (GRLen == 32) {
9311 switch (BinOp) {
9312 default:
9313 llvm_unreachable("Unexpected AtomicRMW BinOp");
9314 case AtomicRMWInst::Xchg:
9315 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9316 case AtomicRMWInst::Add:
9317 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9318 case AtomicRMWInst::Sub:
9319 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9320 case AtomicRMWInst::Nand:
9321 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9322 case AtomicRMWInst::UMax:
9323 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9324 case AtomicRMWInst::UMin:
9325 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9326 case AtomicRMWInst::Max:
9327 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9328 case AtomicRMWInst::Min:
9329 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9330 // TODO: support other AtomicRMWInst.
9331 }
9332 }
9333
9334 llvm_unreachable("Unexpected GRLen\n");
9335}
9336
9337TargetLowering::AtomicExpansionKind
9338LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
9339 const AtomicCmpXchgInst *CI) const {
9340
9341 if (Subtarget.hasLAMCAS())
9342 return AtomicExpansionKind::None;
9343
9344 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
9345 if (Size == 8 || Size == 16)
9346 return AtomicExpansionKind::MaskedIntrinsic;
9347 return AtomicExpansionKind::None;
9348}
9349
9350Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
9351 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9352 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9353 unsigned GRLen = Subtarget.getGRLen();
9354 AtomicOrdering FailOrd = CI->getFailureOrdering();
9355 Value *FailureOrdering =
9356 Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
9357 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9358 if (GRLen == 64) {
9359 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9360 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
9361 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
9362 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
9363 }
9364 Type *Tys[] = {AlignedAddr->getType()};
9365 Value *Result = Builder.CreateIntrinsic(
9366 ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9367 if (GRLen == 64)
9368 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
9369 return Result;
9370}
9371
9372Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
9373 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9374 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9375 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9376 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9377 // mask, as this produces better code than the LL/SC loop emitted by
9378 // int_loongarch_masked_atomicrmw_xchg.
9379 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9380 isa<ConstantInt>(Val: AI->getValOperand())) {
9381 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
9382 if (CVal->isZero())
9383 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
9384 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
9385 Align: AI->getAlign(), Ordering: Ord);
9386 if (CVal->isMinusOne())
9387 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
9388 Align: AI->getAlign(), Ordering: Ord);
9389 }
9390
9391 unsigned GRLen = Subtarget.getGRLen();
9392 Value *Ordering =
9393 Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
9394 Type *Tys[] = {AlignedAddr->getType()};
9395 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
9396 M: AI->getModule(),
9397 id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
9398
9399 if (GRLen == 64) {
9400 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
9401 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
9402 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
9403 }
9404
9405 Value *Result;
9406
9407 // Must pass the shift amount needed to sign extend the loaded value prior
9408 // to performing a signed comparison for min/max. ShiftAmt is the number of
9409 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9410 // is the number of bits to left+right shift the value in order to
9411 // sign-extend.
9412 if (AI->getOperation() == AtomicRMWInst::Min ||
9413 AI->getOperation() == AtomicRMWInst::Max) {
9414 const DataLayout &DL = AI->getDataLayout();
9415 unsigned ValWidth =
9416 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
9417 Value *SextShamt =
9418 Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
9419 Result = Builder.CreateCall(Callee: LlwOpScwLoop,
9420 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9421 } else {
9422 Result =
9423 Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
9424 }
9425
9426 if (GRLen == 64)
9427 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
9428 return Result;
9429}
9430
9431bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
9432 const MachineFunction &MF, EVT VT) const {
9433 VT = VT.getScalarType();
9434
9435 if (!VT.isSimple())
9436 return false;
9437
9438 switch (VT.getSimpleVT().SimpleTy) {
9439 case MVT::f32:
9440 case MVT::f64:
9441 return true;
9442 default:
9443 break;
9444 }
9445
9446 return false;
9447}
9448
9449Register LoongArchTargetLowering::getExceptionPointerRegister(
9450 const Constant *PersonalityFn) const {
9451 return LoongArch::R4;
9452}
9453
9454Register LoongArchTargetLowering::getExceptionSelectorRegister(
9455 const Constant *PersonalityFn) const {
9456 return LoongArch::R5;
9457}
9458
9459//===----------------------------------------------------------------------===//
9460// Target Optimization Hooks
9461//===----------------------------------------------------------------------===//
9462
9463static int getEstimateRefinementSteps(EVT VT,
9464 const LoongArchSubtarget &Subtarget) {
9465 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9466 // IEEE float has 23 digits and double has 52 digits.
9467 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9468 return RefinementSteps;
9469}
9470
9471SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
9472 SelectionDAG &DAG, int Enabled,
9473 int &RefinementSteps,
9474 bool &UseOneConstNR,
9475 bool Reciprocal) const {
9476 if (Subtarget.hasFrecipe()) {
9477 SDLoc DL(Operand);
9478 EVT VT = Operand.getValueType();
9479
9480 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9481 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9482 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9483 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9484 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9485
9486 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9487 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9488
9489 SDValue Estimate = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand);
9490 if (Reciprocal)
9491 Estimate = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Estimate);
9492
9493 return Estimate;
9494 }
9495 }
9496
9497 return SDValue();
9498}
9499
9500SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
9501 SelectionDAG &DAG,
9502 int Enabled,
9503 int &RefinementSteps) const {
9504 if (Subtarget.hasFrecipe()) {
9505 SDLoc DL(Operand);
9506 EVT VT = Operand.getValueType();
9507
9508 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9509 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9510 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9511 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9512 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9513
9514 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9515 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9516
9517 return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand);
9518 }
9519 }
9520
9521 return SDValue();
9522}
9523
9524//===----------------------------------------------------------------------===//
9525// LoongArch Inline Assembly Support
9526//===----------------------------------------------------------------------===//
9527
9528LoongArchTargetLowering::ConstraintType
9529LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9530 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9531 //
9532 // 'f': A floating-point register (if available).
9533 // 'k': A memory operand whose address is formed by a base register and
9534 // (optionally scaled) index register.
9535 // 'l': A signed 16-bit constant.
9536 // 'm': A memory operand whose address is formed by a base register and
9537 // offset that is suitable for use in instructions with the same
9538 // addressing mode as st.w and ld.w.
9539 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9540 // instruction)
9541 // 'I': A signed 12-bit constant (for arithmetic instructions).
9542 // 'J': Integer zero.
9543 // 'K': An unsigned 12-bit constant (for logic instructions).
9544 // "ZB": An address that is held in a general-purpose register. The offset is
9545 // zero.
9546 // "ZC": A memory operand whose address is formed by a base register and
9547 // offset that is suitable for use in instructions with the same
9548 // addressing mode as ll.w and sc.w.
9549 if (Constraint.size() == 1) {
9550 switch (Constraint[0]) {
9551 default:
9552 break;
9553 case 'f':
9554 case 'q':
9555 return C_RegisterClass;
9556 case 'l':
9557 case 'I':
9558 case 'J':
9559 case 'K':
9560 return C_Immediate;
9561 case 'k':
9562 return C_Memory;
9563 }
9564 }
9565
9566 if (Constraint == "ZC" || Constraint == "ZB")
9567 return C_Memory;
9568
9569 // 'm' is handled here.
9570 return TargetLowering::getConstraintType(Constraint);
9571}
9572
9573InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9574 StringRef ConstraintCode) const {
9575 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9576 .Case(S: "k", Value: InlineAsm::ConstraintCode::k)
9577 .Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
9578 .Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
9579 .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9580}
9581
9582std::pair<unsigned, const TargetRegisterClass *>
9583LoongArchTargetLowering::getRegForInlineAsmConstraint(
9584 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9585 // First, see if this is a constraint that directly corresponds to a LoongArch
9586 // register class.
9587 if (Constraint.size() == 1) {
9588 switch (Constraint[0]) {
9589 case 'r':
9590 // TODO: Support fixed vectors up to GRLen?
9591 if (VT.isVector())
9592 break;
9593 return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass);
9594 case 'q':
9595 return std::make_pair(x: 0U, y: &LoongArch::GPRNoR0R1RegClass);
9596 case 'f':
9597 if (Subtarget.hasBasicF() && VT == MVT::f32)
9598 return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass);
9599 if (Subtarget.hasBasicD() && VT == MVT::f64)
9600 return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass);
9601 if (Subtarget.hasExtLSX() &&
9602 TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
9603 return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass);
9604 if (Subtarget.hasExtLASX() &&
9605 TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
9606 return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass);
9607 break;
9608 default:
9609 break;
9610 }
9611 }
9612
9613 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9614 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9615 // constraints while the official register name is prefixed with a '$'. So we
9616 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9617 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9618 // case insensitive, so no need to convert the constraint to upper case here.
9619 //
9620 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9621 // decode the usage of register name aliases into their official names. And
9622 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9623 // official register names.
9624 if (Constraint.starts_with(Prefix: "{$r") || Constraint.starts_with(Prefix: "{$f") ||
9625 Constraint.starts_with(Prefix: "{$vr") || Constraint.starts_with(Prefix: "{$xr")) {
9626 bool IsFP = Constraint[2] == 'f';
9627 std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$');
9628 std::pair<unsigned, const TargetRegisterClass *> R;
9629 R = TargetLowering::getRegForInlineAsmConstraint(
9630 TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
9631 // Match those names to the widest floating point register type available.
9632 if (IsFP) {
9633 unsigned RegNo = R.first;
9634 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9635 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9636 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9637 return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
9638 }
9639 }
9640 }
9641 return R;
9642 }
9643
9644 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9645}
9646
9647void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9648 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9649 SelectionDAG &DAG) const {
9650 // Currently only support length 1 constraints.
9651 if (Constraint.size() == 1) {
9652 switch (Constraint[0]) {
9653 case 'l':
9654 // Validate & create a 16-bit signed immediate operand.
9655 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9656 uint64_t CVal = C->getSExtValue();
9657 if (isInt<16>(x: CVal))
9658 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
9659 VT: Subtarget.getGRLenVT()));
9660 }
9661 return;
9662 case 'I':
9663 // Validate & create a 12-bit signed immediate operand.
9664 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9665 uint64_t CVal = C->getSExtValue();
9666 if (isInt<12>(x: CVal))
9667 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
9668 VT: Subtarget.getGRLenVT()));
9669 }
9670 return;
9671 case 'J':
9672 // Validate & create an integer zero operand.
9673 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
9674 if (C->getZExtValue() == 0)
9675 Ops.push_back(
9676 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
9677 return;
9678 case 'K':
9679 // Validate & create a 12-bit unsigned immediate operand.
9680 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
9681 uint64_t CVal = C->getZExtValue();
9682 if (isUInt<12>(x: CVal))
9683 Ops.push_back(
9684 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
9685 }
9686 return;
9687 default:
9688 break;
9689 }
9690 }
9691 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9692}
9693
9694#define GET_REGISTER_MATCHER
9695#include "LoongArchGenAsmMatcher.inc"
9696
9697Register
9698LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
9699 const MachineFunction &MF) const {
9700 std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$');
9701 std::string NewRegName = Name.second.str();
9702 Register Reg = MatchRegisterAltName(Name: NewRegName);
9703 if (!Reg)
9704 Reg = MatchRegisterName(Name: NewRegName);
9705 if (!Reg)
9706 return Reg;
9707 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9708 if (!ReservedRegs.test(Idx: Reg))
9709 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
9710 StringRef(RegName) + "\"."));
9711 return Reg;
9712}
9713
9714bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
9715 EVT VT, SDValue C) const {
9716 // TODO: Support vectors.
9717 if (!VT.isScalarInteger())
9718 return false;
9719
9720 // Omit the optimization if the data size exceeds GRLen.
9721 if (VT.getSizeInBits() > Subtarget.getGRLen())
9722 return false;
9723
9724 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
9725 const APInt &Imm = ConstNode->getAPIntValue();
9726 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9727 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9728 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9729 return true;
9730 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9731 if (ConstNode->hasOneUse() &&
9732 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9733 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9734 return true;
9735 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9736 // in which the immediate has two set bits. Or Break (MUL x, imm)
9737 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9738 // equals to (1 << s0) - (1 << s1).
9739 if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) {
9740 unsigned Shifts = Imm.countr_zero();
9741 // Reject immediates which can be composed via a single LUI.
9742 if (Shifts >= 12)
9743 return false;
9744 // Reject multiplications can be optimized to
9745 // (SLLI (ALSL x, x, 1/2/3/4), s).
9746 APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
9747 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9748 return false;
9749 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9750 // since it needs one more instruction than other 3 cases.
9751 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9752 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9753 (ImmSmall - Imm).isPowerOf2())
9754 return true;
9755 }
9756 }
9757
9758 return false;
9759}
9760
9761bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
9762 const AddrMode &AM,
9763 Type *Ty, unsigned AS,
9764 Instruction *I) const {
9765 // LoongArch has four basic addressing modes:
9766 // 1. reg
9767 // 2. reg + 12-bit signed offset
9768 // 3. reg + 14-bit signed offset left-shifted by 2
9769 // 4. reg1 + reg2
9770 // TODO: Add more checks after support vector extension.
9771
9772 // No global is ever allowed as a base.
9773 if (AM.BaseGV)
9774 return false;
9775
9776 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9777 // with `UAL` feature.
9778 if (!isInt<12>(x: AM.BaseOffs) &&
9779 !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL()))
9780 return false;
9781
9782 switch (AM.Scale) {
9783 case 0:
9784 // "r+i" or just "i", depending on HasBaseReg.
9785 break;
9786 case 1:
9787 // "r+r+i" is not allowed.
9788 if (AM.HasBaseReg && AM.BaseOffs)
9789 return false;
9790 // Otherwise we have "r+r" or "r+i".
9791 break;
9792 case 2:
9793 // "2*r+r" or "2*r+i" is not allowed.
9794 if (AM.HasBaseReg || AM.BaseOffs)
9795 return false;
9796 // Allow "2*r" as "r+r".
9797 break;
9798 default:
9799 return false;
9800 }
9801
9802 return true;
9803}
9804
9805bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
9806 return isInt<12>(x: Imm);
9807}
9808
9809bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
9810 return isInt<12>(x: Imm);
9811}
9812
9813bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
9814 // Zexts are free if they can be combined with a load.
9815 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9816 // poorly with type legalization of compares preferring sext.
9817 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9818 EVT MemVT = LD->getMemoryVT();
9819 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9820 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9821 LD->getExtensionType() == ISD::ZEXTLOAD))
9822 return true;
9823 }
9824
9825 return TargetLowering::isZExtFree(Val, VT2);
9826}
9827
9828bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
9829 EVT DstVT) const {
9830 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9831}
9832
9833bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
9834 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32);
9835}
9836
9837bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
9838 // TODO: Support vectors.
9839 if (Y.getValueType().isVector())
9840 return false;
9841
9842 return !isa<ConstantSDNode>(Val: Y);
9843}
9844
9845ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
9846 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9847 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9848}
9849
9850bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
9851 Type *Ty, bool IsSigned) const {
9852 if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: 32))
9853 return true;
9854
9855 return IsSigned;
9856}
9857
9858bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
9859 // Return false to suppress the unnecessary extensions if the LibCall
9860 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9861 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9862 Type.getSizeInBits() < Subtarget.getGRLen()))
9863 return false;
9864 return true;
9865}
9866
9867// memcpy, and other memory intrinsics, typically tries to use wider load/store
9868// if the source/dest is aligned and the copy size is large enough. We therefore
9869// want to align such objects passed to memory intrinsics.
9870bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
9871 unsigned &MinSize,
9872 Align &PrefAlign) const {
9873 if (!isa<MemIntrinsic>(Val: CI))
9874 return false;
9875
9876 if (Subtarget.is64Bit()) {
9877 MinSize = 8;
9878 PrefAlign = Align(8);
9879 } else {
9880 MinSize = 4;
9881 PrefAlign = Align(4);
9882 }
9883
9884 return true;
9885}
9886
9887TargetLoweringBase::LegalizeTypeAction
9888LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
9889 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
9890 VT.getVectorElementType() != MVT::i1)
9891 return TypeWidenVector;
9892
9893 return TargetLoweringBase::getPreferredVectorAction(VT);
9894}
9895
9896bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9897 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9898 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9899 bool IsABIRegCopy = CC.has_value();
9900 EVT ValueVT = Val.getValueType();
9901
9902 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9903 PartVT == MVT::f32) {
9904 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9905 // nan, and cast to f32.
9906 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
9907 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
9908 Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
9909 N2: DAG.getConstant(Val: 0xFFFF0000, DL, VT: MVT::i32));
9910 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val);
9911 Parts[0] = Val;
9912 return true;
9913 }
9914
9915 return false;
9916}
9917
9918SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9919 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9920 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9921 bool IsABIRegCopy = CC.has_value();
9922
9923 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9924 PartVT == MVT::f32) {
9925 SDValue Val = Parts[0];
9926
9927 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9928 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
9929 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
9930 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
9931 return Val;
9932 }
9933
9934 return SDValue();
9935}
9936
9937MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9938 CallingConv::ID CC,
9939 EVT VT) const {
9940 // Use f32 to pass f16.
9941 if (VT == MVT::f16 && Subtarget.hasBasicF())
9942 return MVT::f32;
9943
9944 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
9945}
9946
9947unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9948 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9949 // Use f32 to pass f16.
9950 if (VT == MVT::f16 && Subtarget.hasBasicF())
9951 return 1;
9952
9953 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
9954}
9955
9956bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
9957 SDValue Op, const APInt &OriginalDemandedBits,
9958 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9959 unsigned Depth) const {
9960 EVT VT = Op.getValueType();
9961 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9962 unsigned Opc = Op.getOpcode();
9963 switch (Opc) {
9964 default:
9965 break;
9966 case LoongArchISD::VMSKLTZ:
9967 case LoongArchISD::XVMSKLTZ: {
9968 SDValue Src = Op.getOperand(i: 0);
9969 MVT SrcVT = Src.getSimpleValueType();
9970 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9971 unsigned NumElts = SrcVT.getVectorNumElements();
9972
9973 // If we don't need the sign bits at all just return zero.
9974 if (OriginalDemandedBits.countr_zero() >= NumElts)
9975 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
9976
9977 // Only demand the vector elements of the sign bits we need.
9978 APInt KnownUndef, KnownZero;
9979 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts);
9980 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
9981 TLO, Depth: Depth + 1))
9982 return true;
9983
9984 Known.Zero = KnownZero.zext(width: BitWidth);
9985 Known.Zero.setHighBits(BitWidth - NumElts);
9986
9987 // [X]VMSKLTZ only uses the MSB from each vector element.
9988 KnownBits KnownSrc;
9989 APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits);
9990 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO,
9991 Depth: Depth + 1))
9992 return true;
9993
9994 if (KnownSrc.One[SrcBits - 1])
9995 Known.One.setLowBits(NumElts);
9996 else if (KnownSrc.Zero[SrcBits - 1])
9997 Known.Zero.setLowBits(NumElts);
9998
9999 // Attempt to avoid multi-use ops if we don't need anything from it.
10000 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
10001 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
10002 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT, Operand: NewSrc));
10003 return false;
10004 }
10005 }
10006
10007 return TargetLowering::SimplifyDemandedBitsForTargetNode(
10008 Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
10009}
10010
10011bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
10012 unsigned Opc = VecOp.getOpcode();
10013
10014 // Assume target opcodes can't be scalarized.
10015 // TODO - do we have any exceptions?
10016 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opcode: Opc))
10017 return false;
10018
10019 // If the vector op is not supported, try to convert to scalar.
10020 EVT VecVT = VecOp.getValueType();
10021 if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
10022 return true;
10023
10024 // If the vector op is supported, but the scalar op is not, the transform may
10025 // not be worthwhile.
10026 EVT ScalarVT = VecVT.getScalarType();
10027 return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
10028}
10029
10030bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
10031 unsigned Index) const {
10032 if (!isOperationLegalOrCustom(Op: ISD::EXTRACT_SUBVECTOR, VT: ResVT))
10033 return false;
10034
10035 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10036 return Index == 0;
10037}
10038
10039bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
10040 unsigned Index) const {
10041 EVT EltVT = VT.getScalarType();
10042
10043 // Extract a scalar FP value from index 0 of a vector is free.
10044 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
10045}
10046