1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSubtarget.h"
19#include "LoongArchTargetMachine.h"
20#include "MCTargetDesc/LoongArchBaseInfo.h"
21#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/RuntimeLibcallUtil.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
29#include "llvm/Support/CodeGen.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/KnownBits.h"
33#include "llvm/Support/MathExtras.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(Val: false));
44
45LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
74 setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
75 MemVT: MVT::i1, Action: Promote);
76
77 setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
78 setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
79 setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
80 setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
81 setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
82 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
83
84 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85 ISD::JumpTable, ISD::GlobalTLSAddress},
86 VT: GRLenVT, Action: Custom);
87
88 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
89
90 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
91 setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
92 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
93 setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
94
95 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
96 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
97
98 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
99 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
100 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
104 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
105 setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
110 setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
111
112 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
113 setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
114 setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
115 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
116 setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
117
118 setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
119 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
124 setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
125 setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
126 setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
127 setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
128 setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
129 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
130 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
131 setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
132 setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
133 setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
134 setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
135 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
136 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
137 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
138 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
139 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
140 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
141
142 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
143 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
144 setOperationAction(Ops: {ISD::UDIV, ISD::UREM}, VT: MVT::i32, Action: Custom);
145 }
146
147 // Set operations for LA32 only.
148
149 if (!Subtarget.is64Bit()) {
150 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
151 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
152 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
153 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
154 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
155 }
156
157 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
158
159 static const ISD::CondCode FPCCToExpand[] = {
160 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
161 ISD::SETGE, ISD::SETNE, ISD::SETGT};
162
163 // Set operations for 'F' feature.
164
165 if (Subtarget.hasBasicF()) {
166 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
167 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
168 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
169
170 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
171 setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
172 setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
173 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
174 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
175 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
176 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
177 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
178 setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
179 setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
180 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
181 setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
182 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand);
183 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
184 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
185
186 if (Subtarget.is64Bit())
187 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
188
189 if (!Subtarget.hasBasicD()) {
190 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
191 if (Subtarget.is64Bit()) {
192 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
193 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
194 }
195 }
196 }
197
198 // Set operations for 'D' feature.
199
200 if (Subtarget.hasBasicD()) {
201 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
202 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
203 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
204 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
205 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
206
207 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
208 setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
209 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
210 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
211 setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
212 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
213 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
214 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
215 setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
216 setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
217 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
218 setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
219 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand);
220 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
221 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
222
223 if (Subtarget.is64Bit())
224 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
225 }
226
227 // Set operations for 'LSX' feature.
228
229 if (Subtarget.hasExtLSX()) {
230 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
231 // Expand all truncating stores and extending loads.
232 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
234 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
235 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
236 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
237 }
238 // By default everything must be expanded. Then we will selectively turn
239 // on ones that can be effectively codegen'd.
240 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
241 setOperationAction(Op, VT, Action: Expand);
242 }
243
244 for (MVT VT : LSXVTs) {
245 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
246 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
247 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
248
249 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
250 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
251 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
252
253 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
254 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
255 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
256 }
257 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
258 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
259 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
260 Action: Legal);
261 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
262 VT, Action: Legal);
263 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
264 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
265 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
266 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
267 setCondCodeAction(
268 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
269 Action: Expand);
270 }
271 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
272 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
273 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
274 }
275 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
276 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
277 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
278 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
279 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
280 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
281 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
282 ISD::SETUGE, ISD::SETUGT},
283 VT, Action: Expand);
284 }
285 }
286
287 // Set operations for 'LASX' feature.
288
289 if (Subtarget.hasExtLASX()) {
290 for (MVT VT : LASXVTs) {
291 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
292 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
293 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
294
295 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
296 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
297 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
298
299 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
300 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
301 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
302 }
303 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
304 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
305 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
306 Action: Legal);
307 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
308 VT, Action: Legal);
309 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
310 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
311 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
312 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
313 setCondCodeAction(
314 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
315 Action: Expand);
316 }
317 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
318 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
319 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
320 }
321 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
322 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
323 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
324 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
325 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
326 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
327 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
328 ISD::SETUGE, ISD::SETUGT},
329 VT, Action: Expand);
330 }
331 }
332
333 // Set DAG combine for LA32 and LA64.
334
335 setTargetDAGCombine(ISD::AND);
336 setTargetDAGCombine(ISD::OR);
337 setTargetDAGCombine(ISD::SRL);
338 setTargetDAGCombine(ISD::SETCC);
339
340 // Set DAG combine for 'LSX' feature.
341
342 if (Subtarget.hasExtLSX())
343 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
344
345 // Compute derived properties from the register classes.
346 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
347
348 setStackPointerRegisterToSaveRestore(LoongArch::R3);
349
350 setBooleanContents(ZeroOrOneBooleanContent);
351 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
352
353 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
354
355 setMinCmpXchgSizeInBits(32);
356
357 // Function alignments.
358 setMinFunctionAlignment(Align(4));
359 // Set preferred alignments.
360 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
361 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
362 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
363}
364
365bool LoongArchTargetLowering::isOffsetFoldingLegal(
366 const GlobalAddressSDNode *GA) const {
367 // In order to maximise the opportunity for common subexpression elimination,
368 // keep a separate ADD node for the global address offset instead of folding
369 // it in the global address node. Later peephole optimisations may choose to
370 // fold it back in when profitable.
371 return false;
372}
373
374SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
375 SelectionDAG &DAG) const {
376 switch (Op.getOpcode()) {
377 case ISD::ATOMIC_FENCE:
378 return lowerATOMIC_FENCE(Op, DAG);
379 case ISD::EH_DWARF_CFA:
380 return lowerEH_DWARF_CFA(Op, DAG);
381 case ISD::GlobalAddress:
382 return lowerGlobalAddress(Op, DAG);
383 case ISD::GlobalTLSAddress:
384 return lowerGlobalTLSAddress(Op, DAG);
385 case ISD::INTRINSIC_WO_CHAIN:
386 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
387 case ISD::INTRINSIC_W_CHAIN:
388 return lowerINTRINSIC_W_CHAIN(Op, DAG);
389 case ISD::INTRINSIC_VOID:
390 return lowerINTRINSIC_VOID(Op, DAG);
391 case ISD::BlockAddress:
392 return lowerBlockAddress(Op, DAG);
393 case ISD::JumpTable:
394 return lowerJumpTable(Op, DAG);
395 case ISD::SHL_PARTS:
396 return lowerShiftLeftParts(Op, DAG);
397 case ISD::SRA_PARTS:
398 return lowerShiftRightParts(Op, DAG, IsSRA: true);
399 case ISD::SRL_PARTS:
400 return lowerShiftRightParts(Op, DAG, IsSRA: false);
401 case ISD::ConstantPool:
402 return lowerConstantPool(Op, DAG);
403 case ISD::FP_TO_SINT:
404 return lowerFP_TO_SINT(Op, DAG);
405 case ISD::BITCAST:
406 return lowerBITCAST(Op, DAG);
407 case ISD::UINT_TO_FP:
408 return lowerUINT_TO_FP(Op, DAG);
409 case ISD::SINT_TO_FP:
410 return lowerSINT_TO_FP(Op, DAG);
411 case ISD::VASTART:
412 return lowerVASTART(Op, DAG);
413 case ISD::FRAMEADDR:
414 return lowerFRAMEADDR(Op, DAG);
415 case ISD::RETURNADDR:
416 return lowerRETURNADDR(Op, DAG);
417 case ISD::WRITE_REGISTER:
418 return lowerWRITE_REGISTER(Op, DAG);
419 case ISD::INSERT_VECTOR_ELT:
420 return lowerINSERT_VECTOR_ELT(Op, DAG);
421 case ISD::EXTRACT_VECTOR_ELT:
422 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
423 case ISD::BUILD_VECTOR:
424 return lowerBUILD_VECTOR(Op, DAG);
425 case ISD::VECTOR_SHUFFLE:
426 return lowerVECTOR_SHUFFLE(Op, DAG);
427 }
428 return SDValue();
429}
430
431/// Determine whether a range fits a regular pattern of values.
432/// This function accounts for the possibility of jumping over the End iterator.
433template <typename ValType>
434static bool
435fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
436 unsigned CheckStride,
437 typename SmallVectorImpl<ValType>::const_iterator End,
438 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
439 auto &I = Begin;
440
441 while (I != End) {
442 if (*I != -1 && *I != ExpectedIndex)
443 return false;
444 ExpectedIndex += ExpectedIndexStride;
445
446 // Incrementing past End is undefined behaviour so we must increment one
447 // step at a time and check for End at each step.
448 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
449 ; // Empty loop body.
450 }
451 return true;
452}
453
454/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
455///
456/// VREPLVEI performs vector broadcast based on an element specified by an
457/// integer immediate, with its mask being similar to:
458/// <x, x, x, ...>
459/// where x is any valid index.
460///
461/// When undef's appear in the mask they are treated as if they were whatever
462/// value is necessary in order to fit the above form.
463static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
464 MVT VT, SDValue V1, SDValue V2,
465 SelectionDAG &DAG) {
466 int SplatIndex = -1;
467 for (const auto &M : Mask) {
468 if (M != -1) {
469 SplatIndex = M;
470 break;
471 }
472 }
473
474 if (SplatIndex == -1)
475 return DAG.getUNDEF(VT);
476
477 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
478 if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) {
479 APInt Imm(64, SplatIndex);
480 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
481 N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
482 }
483
484 return SDValue();
485}
486
487/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
488///
489/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
490/// elements according to a <4 x i2> constant (encoded as an integer immediate).
491///
492/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
493/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
494/// When undef's appear they are treated as if they were whatever value is
495/// necessary in order to fit the above forms.
496///
497/// For example:
498/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
499/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
500/// i32 7, i32 6, i32 5, i32 4>
501/// is lowered to:
502/// (VSHUF4I_H $v0, $v1, 27)
503/// where the 27 comes from:
504/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
505static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
506 MVT VT, SDValue V1, SDValue V2,
507 SelectionDAG &DAG) {
508
509 // When the size is less than 4, lower cost instructions may be used.
510 if (Mask.size() < 4)
511 return SDValue();
512
513 int SubMask[4] = {-1, -1, -1, -1};
514 for (unsigned i = 0; i < 4; ++i) {
515 for (unsigned j = i; j < Mask.size(); j += 4) {
516 int Idx = Mask[j];
517
518 // Convert from vector index to 4-element subvector index
519 // If an index refers to an element outside of the subvector then give up
520 if (Idx != -1) {
521 Idx -= 4 * (j / 4);
522 if (Idx < 0 || Idx >= 4)
523 return SDValue();
524 }
525
526 // If the mask has an undef, replace it with the current index.
527 // Note that it might still be undef if the current index is also undef
528 if (SubMask[i] == -1)
529 SubMask[i] = Idx;
530 // Check that non-undef values are the same as in the mask. If they
531 // aren't then give up
532 else if (Idx != -1 && Idx != SubMask[i])
533 return SDValue();
534 }
535 }
536
537 // Calculate the immediate. Replace any remaining undefs with zero
538 APInt Imm(64, 0);
539 for (int i = 3; i >= 0; --i) {
540 int Idx = SubMask[i];
541
542 if (Idx == -1)
543 Idx = 0;
544
545 Imm <<= 2;
546 Imm |= Idx & 0x3;
547 }
548
549 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
550 N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
551}
552
553/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
554///
555/// VPACKEV interleaves the even elements from each vector.
556///
557/// It is possible to lower into VPACKEV when the mask consists of two of the
558/// following forms interleaved:
559/// <0, 2, 4, ...>
560/// <n, n+2, n+4, ...>
561/// where n is the number of elements in the vector.
562/// For example:
563/// <0, 0, 2, 2, 4, 4, ...>
564/// <0, n, 2, n+2, 4, n+4, ...>
565///
566/// When undef's appear in the mask they are treated as if they were whatever
567/// value is necessary in order to fit the above forms.
568static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
569 MVT VT, SDValue V1, SDValue V2,
570 SelectionDAG &DAG) {
571
572 const auto &Begin = Mask.begin();
573 const auto &End = Mask.end();
574 SDValue OriV1 = V1, OriV2 = V2;
575
576 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
577 V1 = OriV1;
578 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
579 V1 = OriV2;
580 else
581 return SDValue();
582
583 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
584 V2 = OriV1;
585 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
586 V2 = OriV2;
587 else
588 return SDValue();
589
590 return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
591}
592
593/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
594///
595/// VPACKOD interleaves the odd elements from each vector.
596///
597/// It is possible to lower into VPACKOD when the mask consists of two of the
598/// following forms interleaved:
599/// <1, 3, 5, ...>
600/// <n+1, n+3, n+5, ...>
601/// where n is the number of elements in the vector.
602/// For example:
603/// <1, 1, 3, 3, 5, 5, ...>
604/// <1, n+1, 3, n+3, 5, n+5, ...>
605///
606/// When undef's appear in the mask they are treated as if they were whatever
607/// value is necessary in order to fit the above forms.
608static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
609 MVT VT, SDValue V1, SDValue V2,
610 SelectionDAG &DAG) {
611
612 const auto &Begin = Mask.begin();
613 const auto &End = Mask.end();
614 SDValue OriV1 = V1, OriV2 = V2;
615
616 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
617 V1 = OriV1;
618 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
619 V1 = OriV2;
620 else
621 return SDValue();
622
623 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
624 V2 = OriV1;
625 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
626 V2 = OriV2;
627 else
628 return SDValue();
629
630 return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
631}
632
633/// Lower VECTOR_SHUFFLE into VILVH (if possible).
634///
635/// VILVH interleaves consecutive elements from the left (highest-indexed) half
636/// of each vector.
637///
638/// It is possible to lower into VILVH when the mask consists of two of the
639/// following forms interleaved:
640/// <x, x+1, x+2, ...>
641/// <n+x, n+x+1, n+x+2, ...>
642/// where n is the number of elements in the vector and x is half n.
643/// For example:
644/// <x, x, x+1, x+1, x+2, x+2, ...>
645/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
646///
647/// When undef's appear in the mask they are treated as if they were whatever
648/// value is necessary in order to fit the above forms.
649static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
650 MVT VT, SDValue V1, SDValue V2,
651 SelectionDAG &DAG) {
652
653 const auto &Begin = Mask.begin();
654 const auto &End = Mask.end();
655 unsigned HalfSize = Mask.size() / 2;
656 SDValue OriV1 = V1, OriV2 = V2;
657
658 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
659 V1 = OriV1;
660 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
661 V1 = OriV2;
662 else
663 return SDValue();
664
665 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
666 V2 = OriV1;
667 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize,
668 ExpectedIndexStride: 1))
669 V2 = OriV2;
670 else
671 return SDValue();
672
673 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
674}
675
676/// Lower VECTOR_SHUFFLE into VILVL (if possible).
677///
678/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
679/// of each vector.
680///
681/// It is possible to lower into VILVL when the mask consists of two of the
682/// following forms interleaved:
683/// <0, 1, 2, ...>
684/// <n, n+1, n+2, ...>
685/// where n is the number of elements in the vector.
686/// For example:
687/// <0, 0, 1, 1, 2, 2, ...>
688/// <0, n, 1, n+1, 2, n+2, ...>
689///
690/// When undef's appear in the mask they are treated as if they were whatever
691/// value is necessary in order to fit the above forms.
692static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
693 MVT VT, SDValue V1, SDValue V2,
694 SelectionDAG &DAG) {
695
696 const auto &Begin = Mask.begin();
697 const auto &End = Mask.end();
698 SDValue OriV1 = V1, OriV2 = V2;
699
700 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
701 V1 = OriV1;
702 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
703 V1 = OriV2;
704 else
705 return SDValue();
706
707 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
708 V2 = OriV1;
709 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
710 V2 = OriV2;
711 else
712 return SDValue();
713
714 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
715}
716
717/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
718///
719/// VPICKEV copies the even elements of each vector into the result vector.
720///
721/// It is possible to lower into VPICKEV when the mask consists of two of the
722/// following forms concatenated:
723/// <0, 2, 4, ...>
724/// <n, n+2, n+4, ...>
725/// where n is the number of elements in the vector.
726/// For example:
727/// <0, 2, 4, ..., 0, 2, 4, ...>
728/// <0, 2, 4, ..., n, n+2, n+4, ...>
729///
730/// When undef's appear in the mask they are treated as if they were whatever
731/// value is necessary in order to fit the above forms.
732static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
733 MVT VT, SDValue V1, SDValue V2,
734 SelectionDAG &DAG) {
735
736 const auto &Begin = Mask.begin();
737 const auto &Mid = Mask.begin() + Mask.size() / 2;
738 const auto &End = Mask.end();
739 SDValue OriV1 = V1, OriV2 = V2;
740
741 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
742 V1 = OriV1;
743 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
744 V1 = OriV2;
745 else
746 return SDValue();
747
748 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
749 V2 = OriV1;
750 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
751 V2 = OriV2;
752
753 else
754 return SDValue();
755
756 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
757}
758
759/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
760///
761/// VPICKOD copies the odd elements of each vector into the result vector.
762///
763/// It is possible to lower into VPICKOD when the mask consists of two of the
764/// following forms concatenated:
765/// <1, 3, 5, ...>
766/// <n+1, n+3, n+5, ...>
767/// where n is the number of elements in the vector.
768/// For example:
769/// <1, 3, 5, ..., 1, 3, 5, ...>
770/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
771///
772/// When undef's appear in the mask they are treated as if they were whatever
773/// value is necessary in order to fit the above forms.
774static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
775 MVT VT, SDValue V1, SDValue V2,
776 SelectionDAG &DAG) {
777
778 const auto &Begin = Mask.begin();
779 const auto &Mid = Mask.begin() + Mask.size() / 2;
780 const auto &End = Mask.end();
781 SDValue OriV1 = V1, OriV2 = V2;
782
783 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
784 V1 = OriV1;
785 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
786 V1 = OriV2;
787 else
788 return SDValue();
789
790 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
791 V2 = OriV1;
792 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
793 V2 = OriV2;
794 else
795 return SDValue();
796
797 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
798}
799
800/// Lower VECTOR_SHUFFLE into VSHUF.
801///
802/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
803/// adding it as an operand to the resulting VSHUF.
804static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
805 MVT VT, SDValue V1, SDValue V2,
806 SelectionDAG &DAG) {
807
808 SmallVector<SDValue, 16> Ops;
809 for (auto M : Mask)
810 Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64));
811
812 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
813 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
814
815 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
816 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
817 // VSHF concatenates the vectors in a bitwise fashion:
818 // <0b00, 0b01> + <0b10, 0b11> ->
819 // 0b0100 + 0b1110 -> 0b01001110
820 // <0b10, 0b11, 0b00, 0b01>
821 // We must therefore swap the operands to get the correct result.
822 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
823}
824
825/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
826///
827/// This routine breaks down the specific type of 128-bit shuffle and
828/// dispatches to the lowering routines accordingly.
829static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
830 SDValue V1, SDValue V2, SelectionDAG &DAG) {
831 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
832 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
833 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
834 "Vector type is unsupported for lsx!");
835 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
836 "Two operands have different types!");
837 assert(VT.getVectorNumElements() == Mask.size() &&
838 "Unexpected mask size for shuffle!");
839 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
840
841 SDValue Result;
842 // TODO: Add more comparison patterns.
843 if (V2.isUndef()) {
844 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
845 return Result;
846 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
847 return Result;
848
849 // TODO: This comment may be enabled in the future to better match the
850 // pattern for instruction selection.
851 /* V2 = V1; */
852 }
853
854 // It is recommended not to change the pattern comparison order for better
855 // performance.
856 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
857 return Result;
858 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
859 return Result;
860 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
861 return Result;
862 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
863 return Result;
864 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
865 return Result;
866 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
867 return Result;
868 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
869 return Result;
870
871 return SDValue();
872}
873
874/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
875///
876/// It is a XVREPLVEI when the mask is:
877/// <x, x, x, ..., x+n, x+n, x+n, ...>
878/// where the number of x is equal to n and n is half the length of vector.
879///
880/// When undef's appear in the mask they are treated as if they were whatever
881/// value is necessary in order to fit the above form.
882static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
883 ArrayRef<int> Mask, MVT VT,
884 SDValue V1, SDValue V2,
885 SelectionDAG &DAG) {
886 int SplatIndex = -1;
887 for (const auto &M : Mask) {
888 if (M != -1) {
889 SplatIndex = M;
890 break;
891 }
892 }
893
894 if (SplatIndex == -1)
895 return DAG.getUNDEF(VT);
896
897 const auto &Begin = Mask.begin();
898 const auto &End = Mask.end();
899 unsigned HalfSize = Mask.size() / 2;
900
901 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
902 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) &&
903 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize,
904 ExpectedIndexStride: 0)) {
905 APInt Imm(64, SplatIndex);
906 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
907 N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
908 }
909
910 return SDValue();
911}
912
913/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
914static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
915 MVT VT, SDValue V1, SDValue V2,
916 SelectionDAG &DAG) {
917 // When the size is less than or equal to 4, lower cost instructions may be
918 // used.
919 if (Mask.size() <= 4)
920 return SDValue();
921 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
922}
923
924/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
925static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
926 MVT VT, SDValue V1, SDValue V2,
927 SelectionDAG &DAG) {
928 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
929}
930
931/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
932static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
933 MVT VT, SDValue V1, SDValue V2,
934 SelectionDAG &DAG) {
935 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
936}
937
938/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
939static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
940 MVT VT, SDValue V1, SDValue V2,
941 SelectionDAG &DAG) {
942
943 const auto &Begin = Mask.begin();
944 const auto &End = Mask.end();
945 unsigned HalfSize = Mask.size() / 2;
946 unsigned LeftSize = HalfSize / 2;
947 SDValue OriV1 = V1, OriV2 = V2;
948
949 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
950 ExpectedIndexStride: 1) &&
951 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1))
952 V1 = OriV1;
953 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize,
954 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
955 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
956 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
957 V1 = OriV2;
958 else
959 return SDValue();
960
961 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
962 ExpectedIndexStride: 1) &&
963 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize,
964 ExpectedIndexStride: 1))
965 V2 = OriV1;
966 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize,
967 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
968 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
969 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
970 V2 = OriV2;
971 else
972 return SDValue();
973
974 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
975}
976
977/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
978static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
979 MVT VT, SDValue V1, SDValue V2,
980 SelectionDAG &DAG) {
981
982 const auto &Begin = Mask.begin();
983 const auto &End = Mask.end();
984 unsigned HalfSize = Mask.size() / 2;
985 SDValue OriV1 = V1, OriV2 = V2;
986
987 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
988 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
989 V1 = OriV1;
990 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) &&
991 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
992 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
993 V1 = OriV2;
994 else
995 return SDValue();
996
997 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
998 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
999 V2 = OriV1;
1000 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(),
1001 ExpectedIndexStride: 1) &&
1002 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
1003 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
1004 V2 = OriV2;
1005 else
1006 return SDValue();
1007
1008 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
1009}
1010
1011/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1012static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1013 MVT VT, SDValue V1, SDValue V2,
1014 SelectionDAG &DAG) {
1015
1016 const auto &Begin = Mask.begin();
1017 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1018 const auto &Mid = Mask.begin() + Mask.size() / 2;
1019 const auto &RightMid = Mask.end() - Mask.size() / 4;
1020 const auto &End = Mask.end();
1021 unsigned HalfSize = Mask.size() / 2;
1022 SDValue OriV1 = V1, OriV2 = V2;
1023
1024 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
1025 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
1026 V1 = OriV1;
1027 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
1028 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
1029 V1 = OriV2;
1030 else
1031 return SDValue();
1032
1033 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
1034 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
1035 V2 = OriV1;
1036 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
1037 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
1038 V2 = OriV2;
1039
1040 else
1041 return SDValue();
1042
1043 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
1044}
1045
1046/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1047static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1048 MVT VT, SDValue V1, SDValue V2,
1049 SelectionDAG &DAG) {
1050
1051 const auto &Begin = Mask.begin();
1052 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1053 const auto &Mid = Mask.begin() + Mask.size() / 2;
1054 const auto &RightMid = Mask.end() - Mask.size() / 4;
1055 const auto &End = Mask.end();
1056 unsigned HalfSize = Mask.size() / 2;
1057 SDValue OriV1 = V1, OriV2 = V2;
1058
1059 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
1060 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
1061 V1 = OriV1;
1062 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
1063 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1,
1064 ExpectedIndexStride: 2))
1065 V1 = OriV2;
1066 else
1067 return SDValue();
1068
1069 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
1070 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
1071 V2 = OriV1;
1072 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
1073 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1,
1074 ExpectedIndexStride: 2))
1075 V2 = OriV2;
1076 else
1077 return SDValue();
1078
1079 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
1080}
1081
1082/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1083static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1084 MVT VT, SDValue V1, SDValue V2,
1085 SelectionDAG &DAG) {
1086
1087 int MaskSize = Mask.size();
1088 int HalfSize = Mask.size() / 2;
1089 const auto &Begin = Mask.begin();
1090 const auto &Mid = Mask.begin() + HalfSize;
1091 const auto &End = Mask.end();
1092
1093 // VECTOR_SHUFFLE concatenates the vectors:
1094 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1095 // shuffling ->
1096 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1097 //
1098 // XVSHUF concatenates the vectors:
1099 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1100 // shuffling ->
1101 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1102 SmallVector<SDValue, 8> MaskAlloc;
1103 for (auto it = Begin; it < Mid; it++) {
1104 if (*it < 0) // UNDEF
1105 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
1106 else if ((*it >= 0 && *it < HalfSize) ||
1107 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1108 int M = *it < HalfSize ? *it : *it - HalfSize;
1109 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
1110 } else
1111 return SDValue();
1112 }
1113 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1114
1115 for (auto it = Mid; it < End; it++) {
1116 if (*it < 0) // UNDEF
1117 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
1118 else if ((*it >= HalfSize && *it < MaskSize) ||
1119 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1120 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1121 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
1122 } else
1123 return SDValue();
1124 }
1125 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1126
1127 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1128 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
1129 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
1130}
1131
1132/// Shuffle vectors by lane to generate more optimized instructions.
1133/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1134///
1135/// Therefore, except for the following four cases, other cases are regarded
1136/// as cross-lane shuffles, where optimization is relatively limited.
1137///
1138/// - Shuffle high, low lanes of two inputs vector
1139/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1140/// - Shuffle low, high lanes of two inputs vector
1141/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1142/// - Shuffle low, low lanes of two inputs vector
1143/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1144/// - Shuffle high, high lanes of two inputs vector
1145/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1146///
1147/// The first case is the closest to LoongArch instructions and the other
1148/// cases need to be converted to it for processing.
1149///
1150/// This function may modify V1, V2 and Mask
1151static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1152 MutableArrayRef<int> Mask, MVT VT,
1153 SDValue &V1, SDValue &V2,
1154 SelectionDAG &DAG) {
1155
1156 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1157
1158 int MaskSize = Mask.size();
1159 int HalfSize = Mask.size() / 2;
1160
1161 HalfMaskType preMask = None, postMask = None;
1162
1163 if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
1164 return M < 0 || (M >= 0 && M < HalfSize) ||
1165 (M >= MaskSize && M < MaskSize + HalfSize);
1166 }))
1167 preMask = HighLaneTy;
1168 else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
1169 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1170 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1171 }))
1172 preMask = LowLaneTy;
1173
1174 if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
1175 return M < 0 || (M >= 0 && M < HalfSize) ||
1176 (M >= MaskSize && M < MaskSize + HalfSize);
1177 }))
1178 postMask = HighLaneTy;
1179 else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
1180 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1181 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1182 }))
1183 postMask = LowLaneTy;
1184
1185 // The pre-half of mask is high lane type, and the post-half of mask
1186 // is low lane type, which is closest to the LoongArch instructions.
1187 //
1188 // Note: In the LoongArch architecture, the high lane of mask corresponds
1189 // to the lower 128-bit of vector register, and the low lane of mask
1190 // corresponds the higher 128-bit of vector register.
1191 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1192 return;
1193 }
1194 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1195 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
1196 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
1197 N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64));
1198 V1 = DAG.getBitcast(VT, V: V1);
1199
1200 if (!V2.isUndef()) {
1201 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
1202 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
1203 N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64));
1204 V2 = DAG.getBitcast(VT, V: V2);
1205 }
1206
1207 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1208 *it = *it < 0 ? *it : *it - HalfSize;
1209 }
1210 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1211 *it = *it < 0 ? *it : *it + HalfSize;
1212 }
1213 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1214 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
1215 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
1216 N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64));
1217 V1 = DAG.getBitcast(VT, V: V1);
1218
1219 if (!V2.isUndef()) {
1220 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
1221 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
1222 N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64));
1223 V2 = DAG.getBitcast(VT, V: V2);
1224 }
1225
1226 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1227 *it = *it < 0 ? *it : *it - HalfSize;
1228 }
1229 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1230 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
1231 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
1232 N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64));
1233 V1 = DAG.getBitcast(VT, V: V1);
1234
1235 if (!V2.isUndef()) {
1236 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
1237 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
1238 N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64));
1239 V2 = DAG.getBitcast(VT, V: V2);
1240 }
1241
1242 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1243 *it = *it < 0 ? *it : *it + HalfSize;
1244 }
1245 } else { // cross-lane
1246 return;
1247 }
1248}
1249
1250/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1251///
1252/// This routine breaks down the specific type of 256-bit shuffle and
1253/// dispatches to the lowering routines accordingly.
1254static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1255 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1256 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1257 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1258 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1259 "Vector type is unsupported for lasx!");
1260 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1261 "Two operands have different types!");
1262 assert(VT.getVectorNumElements() == Mask.size() &&
1263 "Unexpected mask size for shuffle!");
1264 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1265 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1266
1267 // canonicalize non cross-lane shuffle vector
1268 SmallVector<int> NewMask(Mask);
1269 canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG);
1270
1271 SDValue Result;
1272 // TODO: Add more comparison patterns.
1273 if (V2.isUndef()) {
1274 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG)))
1275 return Result;
1276 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG)))
1277 return Result;
1278
1279 // TODO: This comment may be enabled in the future to better match the
1280 // pattern for instruction selection.
1281 /* V2 = V1; */
1282 }
1283
1284 // It is recommended not to change the pattern comparison order for better
1285 // performance.
1286 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
1287 return Result;
1288 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
1289 return Result;
1290 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG)))
1291 return Result;
1292 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG)))
1293 return Result;
1294 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
1295 return Result;
1296 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
1297 return Result;
1298 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
1299 return Result;
1300
1301 return SDValue();
1302}
1303
1304SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1305 SelectionDAG &DAG) const {
1306 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
1307 ArrayRef<int> OrigMask = SVOp->getMask();
1308 SDValue V1 = Op.getOperand(i: 0);
1309 SDValue V2 = Op.getOperand(i: 1);
1310 MVT VT = Op.getSimpleValueType();
1311 int NumElements = VT.getVectorNumElements();
1312 SDLoc DL(Op);
1313
1314 bool V1IsUndef = V1.isUndef();
1315 bool V2IsUndef = V2.isUndef();
1316 if (V1IsUndef && V2IsUndef)
1317 return DAG.getUNDEF(VT);
1318
1319 // When we create a shuffle node we put the UNDEF node to second operand,
1320 // but in some cases the first operand may be transformed to UNDEF.
1321 // In this case we should just commute the node.
1322 if (V1IsUndef)
1323 return DAG.getCommutedVectorShuffle(SV: *SVOp);
1324
1325 // Check for non-undef masks pointing at an undef vector and make the masks
1326 // undef as well. This makes it easier to match the shuffle based solely on
1327 // the mask.
1328 if (V2IsUndef &&
1329 any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
1330 SmallVector<int, 8> NewMask(OrigMask);
1331 for (int &M : NewMask)
1332 if (M >= NumElements)
1333 M = -1;
1334 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
1335 }
1336
1337 // Check for illegal shuffle mask element index values.
1338 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1339 (void)MaskUpperLimit;
1340 assert(llvm::all_of(OrigMask,
1341 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1342 "Out of bounds shuffle index");
1343
1344 // For each vector width, delegate to a specialized lowering routine.
1345 if (VT.is128BitVector())
1346 return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
1347
1348 if (VT.is256BitVector())
1349 return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
1350
1351 return SDValue();
1352}
1353
1354static bool isConstantOrUndef(const SDValue Op) {
1355 if (Op->isUndef())
1356 return true;
1357 if (isa<ConstantSDNode>(Val: Op))
1358 return true;
1359 if (isa<ConstantFPSDNode>(Val: Op))
1360 return true;
1361 return false;
1362}
1363
1364static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1365 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1366 if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
1367 return true;
1368 return false;
1369}
1370
1371SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1372 SelectionDAG &DAG) const {
1373 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
1374 EVT ResTy = Op->getValueType(ResNo: 0);
1375 SDLoc DL(Op);
1376 APInt SplatValue, SplatUndef;
1377 unsigned SplatBitSize;
1378 bool HasAnyUndefs;
1379 bool Is128Vec = ResTy.is128BitVector();
1380 bool Is256Vec = ResTy.is256BitVector();
1381
1382 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1383 (!Subtarget.hasExtLASX() || !Is256Vec))
1384 return SDValue();
1385
1386 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1387 /*MinSplatBits=*/8) &&
1388 SplatBitSize <= 64) {
1389 // We can only cope with 8, 16, 32, or 64-bit elements.
1390 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1391 SplatBitSize != 64)
1392 return SDValue();
1393
1394 EVT ViaVecTy;
1395
1396 switch (SplatBitSize) {
1397 default:
1398 return SDValue();
1399 case 8:
1400 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1401 break;
1402 case 16:
1403 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1404 break;
1405 case 32:
1406 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1407 break;
1408 case 64:
1409 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1410 break;
1411 }
1412
1413 // SelectionDAG::getConstant will promote SplatValue appropriately.
1414 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
1415
1416 // Bitcast to the type we originally wanted.
1417 if (ViaVecTy != ResTy)
1418 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
1419
1420 return Result;
1421 }
1422
1423 if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false))
1424 return Op;
1425
1426 if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
1427 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1428 // The resulting code is the same length as the expansion, but it doesn't
1429 // use memory operations.
1430 EVT ResTy = Node->getValueType(ResNo: 0);
1431
1432 assert(ResTy.isVector());
1433
1434 unsigned NumElts = ResTy.getVectorNumElements();
1435 SDValue Vector = DAG.getUNDEF(VT: ResTy);
1436 for (unsigned i = 0; i < NumElts; ++i) {
1437 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
1438 N2: Node->getOperand(Num: i),
1439 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
1440 }
1441 return Vector;
1442 }
1443
1444 return SDValue();
1445}
1446
1447SDValue
1448LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1449 SelectionDAG &DAG) const {
1450 EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0);
1451 SDValue Idx = Op->getOperand(Num: 1);
1452 EVT EltTy = VecTy.getVectorElementType();
1453 unsigned NumElts = VecTy.getVectorNumElements();
1454
1455 if (isa<ConstantSDNode>(Val: Idx) &&
1456 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1457 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1458 return Op;
1459
1460 return SDValue();
1461}
1462
1463SDValue
1464LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1465 SelectionDAG &DAG) const {
1466 if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2)))
1467 return Op;
1468 return SDValue();
1469}
1470
1471SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1472 SelectionDAG &DAG) const {
1473 SDLoc DL(Op);
1474 SyncScope::ID FenceSSID =
1475 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
1476
1477 // singlethread fences only synchronize with signal handlers on the same
1478 // thread and thus only need to preserve instruction order, not actually
1479 // enforce memory ordering.
1480 if (FenceSSID == SyncScope::SingleThread)
1481 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1482 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
1483
1484 return Op;
1485}
1486
1487SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1488 SelectionDAG &DAG) const {
1489
1490 if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) {
1491 DAG.getContext()->emitError(
1492 ErrorStr: "On LA64, only 64-bit registers can be written.");
1493 return Op.getOperand(i: 0);
1494 }
1495
1496 if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) {
1497 DAG.getContext()->emitError(
1498 ErrorStr: "On LA32, only 32-bit registers can be written.");
1499 return Op.getOperand(i: 0);
1500 }
1501
1502 return Op;
1503}
1504
1505SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1506 SelectionDAG &DAG) const {
1507 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
1508 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
1509 "be a constant integer");
1510 return SDValue();
1511 }
1512
1513 MachineFunction &MF = DAG.getMachineFunction();
1514 MF.getFrameInfo().setFrameAddressIsTaken(true);
1515 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1516 EVT VT = Op.getValueType();
1517 SDLoc DL(Op);
1518 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
1519 unsigned Depth = Op.getConstantOperandVal(i: 0);
1520 int GRLenInBytes = Subtarget.getGRLen() / 8;
1521
1522 while (Depth--) {
1523 int Offset = -(GRLenInBytes * 2);
1524 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
1525 N2: DAG.getIntPtrConstant(Val: Offset, DL));
1526 FrameAddr =
1527 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
1528 }
1529 return FrameAddr;
1530}
1531
1532SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1533 SelectionDAG &DAG) const {
1534 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1535 return SDValue();
1536
1537 // Currently only support lowering return address for current frame.
1538 if (Op.getConstantOperandVal(i: 0) != 0) {
1539 DAG.getContext()->emitError(
1540 ErrorStr: "return address can only be determined for the current frame");
1541 return SDValue();
1542 }
1543
1544 MachineFunction &MF = DAG.getMachineFunction();
1545 MF.getFrameInfo().setReturnAddressIsTaken(true);
1546 MVT GRLenVT = Subtarget.getGRLenVT();
1547
1548 // Return the value of the return address register, marking it an implicit
1549 // live-in.
1550 Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
1551 RC: getRegClassFor(VT: GRLenVT));
1552 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT);
1553}
1554
1555SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1556 SelectionDAG &DAG) const {
1557 MachineFunction &MF = DAG.getMachineFunction();
1558 auto Size = Subtarget.getGRLen() / 8;
1559 auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false);
1560 return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
1561}
1562
1563SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1564 SelectionDAG &DAG) const {
1565 MachineFunction &MF = DAG.getMachineFunction();
1566 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1567
1568 SDLoc DL(Op);
1569 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
1570 VT: getPointerTy(DL: MF.getDataLayout()));
1571
1572 // vastart just stores the address of the VarArgsFrameIndex slot into the
1573 // memory location argument.
1574 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
1575 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
1576 PtrInfo: MachinePointerInfo(SV));
1577}
1578
1579SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1580 SelectionDAG &DAG) const {
1581 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1582 !Subtarget.hasBasicD() && "unexpected target features");
1583
1584 SDLoc DL(Op);
1585 SDValue Op0 = Op.getOperand(i: 0);
1586 if (Op0->getOpcode() == ISD::AND) {
1587 auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
1588 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1589 return Op;
1590 }
1591
1592 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1593 Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) &&
1594 Op0.getConstantOperandVal(i: 2) == UINT64_C(0))
1595 return Op;
1596
1597 if (Op0.getOpcode() == ISD::AssertZext &&
1598 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32))
1599 return Op;
1600
1601 EVT OpVT = Op0.getValueType();
1602 EVT RetVT = Op.getValueType();
1603 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1604 MakeLibCallOptions CallOptions;
1605 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
1606 SDValue Chain = SDValue();
1607 SDValue Result;
1608 std::tie(args&: Result, args&: Chain) =
1609 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
1610 return Result;
1611}
1612
1613SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1614 SelectionDAG &DAG) const {
1615 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1616 !Subtarget.hasBasicD() && "unexpected target features");
1617
1618 SDLoc DL(Op);
1619 SDValue Op0 = Op.getOperand(i: 0);
1620
1621 if ((Op0.getOpcode() == ISD::AssertSext ||
1622 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1623 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32))
1624 return Op;
1625
1626 EVT OpVT = Op0.getValueType();
1627 EVT RetVT = Op.getValueType();
1628 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1629 MakeLibCallOptions CallOptions;
1630 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
1631 SDValue Chain = SDValue();
1632 SDValue Result;
1633 std::tie(args&: Result, args&: Chain) =
1634 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
1635 return Result;
1636}
1637
1638SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1639 SelectionDAG &DAG) const {
1640
1641 SDLoc DL(Op);
1642 SDValue Op0 = Op.getOperand(i: 0);
1643
1644 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1645 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1646 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
1647 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
1648 }
1649 return Op;
1650}
1651
1652SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1653 SelectionDAG &DAG) const {
1654
1655 SDLoc DL(Op);
1656
1657 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1658 !Subtarget.hasBasicD()) {
1659 SDValue Dst =
1660 DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op.getOperand(i: 0));
1661 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
1662 }
1663
1664 EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
1665 SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op.getOperand(i: 0));
1666 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
1667}
1668
1669static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1670 SelectionDAG &DAG, unsigned Flags) {
1671 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
1672}
1673
1674static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1675 SelectionDAG &DAG, unsigned Flags) {
1676 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
1677 TargetFlags: Flags);
1678}
1679
1680static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1681 SelectionDAG &DAG, unsigned Flags) {
1682 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
1683 Offset: N->getOffset(), TargetFlags: Flags);
1684}
1685
1686static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1687 SelectionDAG &DAG, unsigned Flags) {
1688 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
1689}
1690
1691template <class NodeTy>
1692SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1693 CodeModel::Model M,
1694 bool IsLocal) const {
1695 SDLoc DL(N);
1696 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1697 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1698 SDValue Load;
1699
1700 switch (M) {
1701 default:
1702 report_fatal_error(reason: "Unsupported code model");
1703
1704 case CodeModel::Large: {
1705 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1706
1707 // This is not actually used, but is necessary for successfully matching
1708 // the PseudoLA_*_LARGE nodes.
1709 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
1710 if (IsLocal) {
1711 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1712 // eventually becomes the desired 5-insn code sequence.
1713 Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
1714 Op1: Tmp, Op2: Addr),
1715 0);
1716 } else {
1717 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1718 // eventually becomes the desired 5-insn code sequence.
1719 Load = SDValue(
1720 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
1721 0);
1722 }
1723 break;
1724 }
1725
1726 case CodeModel::Small:
1727 case CodeModel::Medium:
1728 if (IsLocal) {
1729 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1730 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1731 Load = SDValue(
1732 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0);
1733 } else {
1734 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1735 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1736 Load =
1737 SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0);
1738 }
1739 }
1740
1741 if (!IsLocal) {
1742 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1743 MachineFunction &MF = DAG.getMachineFunction();
1744 MachineMemOperand *MemOp = MF.getMachineMemOperand(
1745 PtrInfo: MachinePointerInfo::getGOT(MF),
1746 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1747 MachineMemOperand::MOInvariant,
1748 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
1749 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
1750 }
1751
1752 return Load;
1753}
1754
1755SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1756 SelectionDAG &DAG) const {
1757 return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
1758 M: DAG.getTarget().getCodeModel());
1759}
1760
1761SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1762 SelectionDAG &DAG) const {
1763 return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
1764 M: DAG.getTarget().getCodeModel());
1765}
1766
1767SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1768 SelectionDAG &DAG) const {
1769 return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
1770 M: DAG.getTarget().getCodeModel());
1771}
1772
1773SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1774 SelectionDAG &DAG) const {
1775 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
1776 assert(N->getOffset() == 0 && "unexpected offset in global node");
1777 auto CM = DAG.getTarget().getCodeModel();
1778 const GlobalValue *GV = N->getGlobal();
1779
1780 if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
1781 if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
1782 CM = *GCM;
1783 }
1784
1785 return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
1786}
1787
1788SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1789 SelectionDAG &DAG,
1790 unsigned Opc, bool UseGOT,
1791 bool Large) const {
1792 SDLoc DL(N);
1793 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1794 MVT GRLenVT = Subtarget.getGRLenVT();
1795
1796 // This is not actually used, but is necessary for successfully matching the
1797 // PseudoLA_*_LARGE nodes.
1798 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
1799 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
1800 SDValue Offset = Large
1801 ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
1802 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
1803 if (UseGOT) {
1804 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1805 MachineFunction &MF = DAG.getMachineFunction();
1806 MachineMemOperand *MemOp = MF.getMachineMemOperand(
1807 PtrInfo: MachinePointerInfo::getGOT(MF),
1808 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1809 MachineMemOperand::MOInvariant,
1810 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
1811 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
1812 }
1813
1814 // Add the thread pointer.
1815 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
1816 N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
1817}
1818
1819SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1820 SelectionDAG &DAG,
1821 unsigned Opc,
1822 bool Large) const {
1823 SDLoc DL(N);
1824 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1825 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
1826
1827 // This is not actually used, but is necessary for successfully matching the
1828 // PseudoLA_*_LARGE nodes.
1829 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
1830
1831 // Use a PC-relative addressing mode to access the dynamic GOT address.
1832 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
1833 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
1834 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
1835
1836 // Prepare argument list to generate call.
1837 ArgListTy Args;
1838 ArgListEntry Entry;
1839 Entry.Node = Load;
1840 Entry.Ty = CallTy;
1841 Args.push_back(x: Entry);
1842
1843 // Setup call to __tls_get_addr.
1844 TargetLowering::CallLoweringInfo CLI(DAG);
1845 CLI.setDebugLoc(DL)
1846 .setChain(DAG.getEntryNode())
1847 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
1848 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
1849 ArgsList: std::move(Args));
1850
1851 return LowerCallTo(CLI).first;
1852}
1853
1854SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1855 SelectionDAG &DAG, unsigned Opc,
1856 bool Large) const {
1857 SDLoc DL(N);
1858 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
1859 const GlobalValue *GV = N->getGlobal();
1860
1861 // This is not actually used, but is necessary for successfully matching the
1862 // PseudoLA_*_LARGE nodes.
1863 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
1864
1865 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1866 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1867 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
1868 return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
1869 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
1870}
1871
1872SDValue
1873LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1874 SelectionDAG &DAG) const {
1875 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1876 CallingConv::GHC)
1877 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
1878
1879 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1880 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1881
1882 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
1883 assert(N->getOffset() == 0 && "unexpected offset in global node");
1884
1885 if (DAG.getTarget().useEmulatedTLS())
1886 report_fatal_error(reason: "the emulated TLS is prohibited",
1887 /*GenCrashDiag=*/gen_crash_diag: false);
1888
1889 bool IsDesc = DAG.getTarget().useTLSDESC();
1890
1891 switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
1892 case TLSModel::GeneralDynamic:
1893 // In this model, application code calls the dynamic linker function
1894 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1895 // runtime.
1896 if (!IsDesc)
1897 return getDynamicTLSAddr(N, DAG,
1898 Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1899 : LoongArch::PseudoLA_TLS_GD,
1900 Large);
1901 break;
1902 case TLSModel::LocalDynamic:
1903 // Same as GeneralDynamic, except for assembly modifiers and relocation
1904 // records.
1905 if (!IsDesc)
1906 return getDynamicTLSAddr(N, DAG,
1907 Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1908 : LoongArch::PseudoLA_TLS_LD,
1909 Large);
1910 break;
1911 case TLSModel::InitialExec:
1912 // This model uses the GOT to resolve TLS offsets.
1913 return getStaticTLSAddr(N, DAG,
1914 Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1915 : LoongArch::PseudoLA_TLS_IE,
1916 /*UseGOT=*/true, Large);
1917 case TLSModel::LocalExec:
1918 // This model is used when static linking as the TLS offsets are resolved
1919 // during program linking.
1920 //
1921 // This node doesn't need an extra argument for the large code model.
1922 return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
1923 /*UseGOT=*/false);
1924 }
1925
1926 return getTLSDescAddr(N, DAG,
1927 Opc: Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1928 : LoongArch::PseudoLA_TLS_DESC_PC,
1929 Large);
1930}
1931
1932template <unsigned N>
1933static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
1934 SelectionDAG &DAG, bool IsSigned = false) {
1935 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
1936 // Check the ImmArg.
1937 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1938 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1939 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) +
1940 ": argument out of range.");
1941 return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType());
1942 }
1943 return SDValue();
1944}
1945
1946SDValue
1947LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1948 SelectionDAG &DAG) const {
1949 SDLoc DL(Op);
1950 switch (Op.getConstantOperandVal(i: 0)) {
1951 default:
1952 return SDValue(); // Don't custom lower most intrinsics.
1953 case Intrinsic::thread_pointer: {
1954 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1955 return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
1956 }
1957 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1958 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1959 case Intrinsic::loongarch_lsx_vreplvei_d:
1960 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1961 return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG);
1962 case Intrinsic::loongarch_lsx_vreplvei_w:
1963 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1964 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1965 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1966 case Intrinsic::loongarch_lasx_xvpickve_d:
1967 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1968 return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG);
1969 case Intrinsic::loongarch_lasx_xvinsve0_d:
1970 return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG);
1971 case Intrinsic::loongarch_lsx_vsat_b:
1972 case Intrinsic::loongarch_lsx_vsat_bu:
1973 case Intrinsic::loongarch_lsx_vrotri_b:
1974 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1975 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1976 case Intrinsic::loongarch_lsx_vsrlri_b:
1977 case Intrinsic::loongarch_lsx_vsrari_b:
1978 case Intrinsic::loongarch_lsx_vreplvei_h:
1979 case Intrinsic::loongarch_lasx_xvsat_b:
1980 case Intrinsic::loongarch_lasx_xvsat_bu:
1981 case Intrinsic::loongarch_lasx_xvrotri_b:
1982 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1983 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1984 case Intrinsic::loongarch_lasx_xvsrlri_b:
1985 case Intrinsic::loongarch_lasx_xvsrari_b:
1986 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1987 case Intrinsic::loongarch_lasx_xvpickve_w:
1988 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1989 return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG);
1990 case Intrinsic::loongarch_lasx_xvinsve0_w:
1991 return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG);
1992 case Intrinsic::loongarch_lsx_vsat_h:
1993 case Intrinsic::loongarch_lsx_vsat_hu:
1994 case Intrinsic::loongarch_lsx_vrotri_h:
1995 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1996 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1997 case Intrinsic::loongarch_lsx_vsrlri_h:
1998 case Intrinsic::loongarch_lsx_vsrari_h:
1999 case Intrinsic::loongarch_lsx_vreplvei_b:
2000 case Intrinsic::loongarch_lasx_xvsat_h:
2001 case Intrinsic::loongarch_lasx_xvsat_hu:
2002 case Intrinsic::loongarch_lasx_xvrotri_h:
2003 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2004 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2005 case Intrinsic::loongarch_lasx_xvsrlri_h:
2006 case Intrinsic::loongarch_lasx_xvsrari_h:
2007 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2008 return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG);
2009 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2010 case Intrinsic::loongarch_lsx_vsrani_b_h:
2011 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2012 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2013 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2014 case Intrinsic::loongarch_lsx_vssrani_b_h:
2015 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2016 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2017 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2018 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2019 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2020 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2021 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2022 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2023 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2024 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2025 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2026 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2027 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2028 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2029 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2030 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2031 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2032 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2033 return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG);
2034 case Intrinsic::loongarch_lsx_vsat_w:
2035 case Intrinsic::loongarch_lsx_vsat_wu:
2036 case Intrinsic::loongarch_lsx_vrotri_w:
2037 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2038 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2039 case Intrinsic::loongarch_lsx_vsrlri_w:
2040 case Intrinsic::loongarch_lsx_vsrari_w:
2041 case Intrinsic::loongarch_lsx_vslei_bu:
2042 case Intrinsic::loongarch_lsx_vslei_hu:
2043 case Intrinsic::loongarch_lsx_vslei_wu:
2044 case Intrinsic::loongarch_lsx_vslei_du:
2045 case Intrinsic::loongarch_lsx_vslti_bu:
2046 case Intrinsic::loongarch_lsx_vslti_hu:
2047 case Intrinsic::loongarch_lsx_vslti_wu:
2048 case Intrinsic::loongarch_lsx_vslti_du:
2049 case Intrinsic::loongarch_lsx_vbsll_v:
2050 case Intrinsic::loongarch_lsx_vbsrl_v:
2051 case Intrinsic::loongarch_lasx_xvsat_w:
2052 case Intrinsic::loongarch_lasx_xvsat_wu:
2053 case Intrinsic::loongarch_lasx_xvrotri_w:
2054 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2055 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2056 case Intrinsic::loongarch_lasx_xvsrlri_w:
2057 case Intrinsic::loongarch_lasx_xvsrari_w:
2058 case Intrinsic::loongarch_lasx_xvslei_bu:
2059 case Intrinsic::loongarch_lasx_xvslei_hu:
2060 case Intrinsic::loongarch_lasx_xvslei_wu:
2061 case Intrinsic::loongarch_lasx_xvslei_du:
2062 case Intrinsic::loongarch_lasx_xvslti_bu:
2063 case Intrinsic::loongarch_lasx_xvslti_hu:
2064 case Intrinsic::loongarch_lasx_xvslti_wu:
2065 case Intrinsic::loongarch_lasx_xvslti_du:
2066 case Intrinsic::loongarch_lasx_xvbsll_v:
2067 case Intrinsic::loongarch_lasx_xvbsrl_v:
2068 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG);
2069 case Intrinsic::loongarch_lsx_vseqi_b:
2070 case Intrinsic::loongarch_lsx_vseqi_h:
2071 case Intrinsic::loongarch_lsx_vseqi_w:
2072 case Intrinsic::loongarch_lsx_vseqi_d:
2073 case Intrinsic::loongarch_lsx_vslei_b:
2074 case Intrinsic::loongarch_lsx_vslei_h:
2075 case Intrinsic::loongarch_lsx_vslei_w:
2076 case Intrinsic::loongarch_lsx_vslei_d:
2077 case Intrinsic::loongarch_lsx_vslti_b:
2078 case Intrinsic::loongarch_lsx_vslti_h:
2079 case Intrinsic::loongarch_lsx_vslti_w:
2080 case Intrinsic::loongarch_lsx_vslti_d:
2081 case Intrinsic::loongarch_lasx_xvseqi_b:
2082 case Intrinsic::loongarch_lasx_xvseqi_h:
2083 case Intrinsic::loongarch_lasx_xvseqi_w:
2084 case Intrinsic::loongarch_lasx_xvseqi_d:
2085 case Intrinsic::loongarch_lasx_xvslei_b:
2086 case Intrinsic::loongarch_lasx_xvslei_h:
2087 case Intrinsic::loongarch_lasx_xvslei_w:
2088 case Intrinsic::loongarch_lasx_xvslei_d:
2089 case Intrinsic::loongarch_lasx_xvslti_b:
2090 case Intrinsic::loongarch_lasx_xvslti_h:
2091 case Intrinsic::loongarch_lasx_xvslti_w:
2092 case Intrinsic::loongarch_lasx_xvslti_d:
2093 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true);
2094 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2095 case Intrinsic::loongarch_lsx_vsrani_h_w:
2096 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2097 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2098 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2099 case Intrinsic::loongarch_lsx_vssrani_h_w:
2100 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2101 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2102 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2103 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2104 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2105 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2106 case Intrinsic::loongarch_lsx_vfrstpi_b:
2107 case Intrinsic::loongarch_lsx_vfrstpi_h:
2108 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2109 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2110 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2111 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2112 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2113 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2114 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2115 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2116 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2117 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2118 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2119 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2120 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2121 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2122 return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG);
2123 case Intrinsic::loongarch_lsx_vsat_d:
2124 case Intrinsic::loongarch_lsx_vsat_du:
2125 case Intrinsic::loongarch_lsx_vrotri_d:
2126 case Intrinsic::loongarch_lsx_vsrlri_d:
2127 case Intrinsic::loongarch_lsx_vsrari_d:
2128 case Intrinsic::loongarch_lasx_xvsat_d:
2129 case Intrinsic::loongarch_lasx_xvsat_du:
2130 case Intrinsic::loongarch_lasx_xvrotri_d:
2131 case Intrinsic::loongarch_lasx_xvsrlri_d:
2132 case Intrinsic::loongarch_lasx_xvsrari_d:
2133 return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG);
2134 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2135 case Intrinsic::loongarch_lsx_vsrani_w_d:
2136 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2137 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2138 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2139 case Intrinsic::loongarch_lsx_vssrani_w_d:
2140 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2141 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2142 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2143 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2144 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2145 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2146 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2147 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2148 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2149 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2150 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2151 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2152 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2153 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2154 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2155 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2156 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2157 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2158 return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG);
2159 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2160 case Intrinsic::loongarch_lsx_vsrani_d_q:
2161 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2162 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2163 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2164 case Intrinsic::loongarch_lsx_vssrani_d_q:
2165 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2166 case Intrinsic::loongarch_lsx_vssrani_du_q:
2167 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2168 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2169 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2170 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2171 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2172 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2173 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2174 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2175 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2176 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2177 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2178 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2179 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2180 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2181 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2182 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2183 return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG);
2184 case Intrinsic::loongarch_lsx_vnori_b:
2185 case Intrinsic::loongarch_lsx_vshuf4i_b:
2186 case Intrinsic::loongarch_lsx_vshuf4i_h:
2187 case Intrinsic::loongarch_lsx_vshuf4i_w:
2188 case Intrinsic::loongarch_lasx_xvnori_b:
2189 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2190 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2191 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2192 case Intrinsic::loongarch_lasx_xvpermi_d:
2193 return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG);
2194 case Intrinsic::loongarch_lsx_vshuf4i_d:
2195 case Intrinsic::loongarch_lsx_vpermi_w:
2196 case Intrinsic::loongarch_lsx_vbitseli_b:
2197 case Intrinsic::loongarch_lsx_vextrins_b:
2198 case Intrinsic::loongarch_lsx_vextrins_h:
2199 case Intrinsic::loongarch_lsx_vextrins_w:
2200 case Intrinsic::loongarch_lsx_vextrins_d:
2201 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2202 case Intrinsic::loongarch_lasx_xvpermi_w:
2203 case Intrinsic::loongarch_lasx_xvpermi_q:
2204 case Intrinsic::loongarch_lasx_xvbitseli_b:
2205 case Intrinsic::loongarch_lasx_xvextrins_b:
2206 case Intrinsic::loongarch_lasx_xvextrins_h:
2207 case Intrinsic::loongarch_lasx_xvextrins_w:
2208 case Intrinsic::loongarch_lasx_xvextrins_d:
2209 return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG);
2210 case Intrinsic::loongarch_lsx_vrepli_b:
2211 case Intrinsic::loongarch_lsx_vrepli_h:
2212 case Intrinsic::loongarch_lsx_vrepli_w:
2213 case Intrinsic::loongarch_lsx_vrepli_d:
2214 case Intrinsic::loongarch_lasx_xvrepli_b:
2215 case Intrinsic::loongarch_lasx_xvrepli_h:
2216 case Intrinsic::loongarch_lasx_xvrepli_w:
2217 case Intrinsic::loongarch_lasx_xvrepli_d:
2218 return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
2219 case Intrinsic::loongarch_lsx_vldi:
2220 case Intrinsic::loongarch_lasx_xvldi:
2221 return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
2222 }
2223}
2224
2225// Helper function that emits error message for intrinsics with chain and return
2226// merge values of a UNDEF and the chain.
2227static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2228 StringRef ErrorMsg,
2229 SelectionDAG &DAG) {
2230 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
2231 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)},
2232 dl: SDLoc(Op));
2233}
2234
2235SDValue
2236LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2237 SelectionDAG &DAG) const {
2238 SDLoc DL(Op);
2239 MVT GRLenVT = Subtarget.getGRLenVT();
2240 EVT VT = Op.getValueType();
2241 SDValue Chain = Op.getOperand(i: 0);
2242 const StringRef ErrorMsgOOR = "argument out of range";
2243 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2244 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2245
2246 switch (Op.getConstantOperandVal(i: 1)) {
2247 default:
2248 return Op;
2249 case Intrinsic::loongarch_crc_w_b_w:
2250 case Intrinsic::loongarch_crc_w_h_w:
2251 case Intrinsic::loongarch_crc_w_w_w:
2252 case Intrinsic::loongarch_crc_w_d_w:
2253 case Intrinsic::loongarch_crcc_w_b_w:
2254 case Intrinsic::loongarch_crcc_w_h_w:
2255 case Intrinsic::loongarch_crcc_w_w_w:
2256 case Intrinsic::loongarch_crcc_w_d_w:
2257 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
2258 case Intrinsic::loongarch_csrrd_w:
2259 case Intrinsic::loongarch_csrrd_d: {
2260 unsigned Imm = Op.getConstantOperandVal(i: 2);
2261 return !isUInt<14>(x: Imm)
2262 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2263 : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
2264 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2265 }
2266 case Intrinsic::loongarch_csrwr_w:
2267 case Intrinsic::loongarch_csrwr_d: {
2268 unsigned Imm = Op.getConstantOperandVal(i: 3);
2269 return !isUInt<14>(x: Imm)
2270 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2271 : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
2272 Ops: {Chain, Op.getOperand(i: 2),
2273 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2274 }
2275 case Intrinsic::loongarch_csrxchg_w:
2276 case Intrinsic::loongarch_csrxchg_d: {
2277 unsigned Imm = Op.getConstantOperandVal(i: 4);
2278 return !isUInt<14>(x: Imm)
2279 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2280 : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
2281 Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3),
2282 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2283 }
2284 case Intrinsic::loongarch_iocsrrd_d: {
2285 return DAG.getNode(
2286 Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
2287 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))});
2288 }
2289#define IOCSRRD_CASE(NAME, NODE) \
2290 case Intrinsic::loongarch_##NAME: { \
2291 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2292 {Chain, Op.getOperand(2)}); \
2293 }
2294 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2295 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2296 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2297#undef IOCSRRD_CASE
2298 case Intrinsic::loongarch_cpucfg: {
2299 return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
2300 Ops: {Chain, Op.getOperand(i: 2)});
2301 }
2302 case Intrinsic::loongarch_lddir_d: {
2303 unsigned Imm = Op.getConstantOperandVal(i: 3);
2304 return !isUInt<8>(x: Imm)
2305 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2306 : Op;
2307 }
2308 case Intrinsic::loongarch_movfcsr2gr: {
2309 if (!Subtarget.hasBasicF())
2310 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
2311 unsigned Imm = Op.getConstantOperandVal(i: 2);
2312 return !isUInt<2>(x: Imm)
2313 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2314 : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
2315 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2316 }
2317 case Intrinsic::loongarch_lsx_vld:
2318 case Intrinsic::loongarch_lsx_vldrepl_b:
2319 case Intrinsic::loongarch_lasx_xvld:
2320 case Intrinsic::loongarch_lasx_xvldrepl_b:
2321 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
2322 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2323 : SDValue();
2324 case Intrinsic::loongarch_lsx_vldrepl_h:
2325 case Intrinsic::loongarch_lasx_xvldrepl_h:
2326 return !isShiftedInt<11, 1>(
2327 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
2328 ? emitIntrinsicWithChainErrorMessage(
2329 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
2330 : SDValue();
2331 case Intrinsic::loongarch_lsx_vldrepl_w:
2332 case Intrinsic::loongarch_lasx_xvldrepl_w:
2333 return !isShiftedInt<10, 2>(
2334 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
2335 ? emitIntrinsicWithChainErrorMessage(
2336 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
2337 : SDValue();
2338 case Intrinsic::loongarch_lsx_vldrepl_d:
2339 case Intrinsic::loongarch_lasx_xvldrepl_d:
2340 return !isShiftedInt<9, 3>(
2341 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
2342 ? emitIntrinsicWithChainErrorMessage(
2343 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
2344 : SDValue();
2345 }
2346}
2347
2348// Helper function that emits error message for intrinsics with void return
2349// value and return the chain.
2350static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2351 SelectionDAG &DAG) {
2352
2353 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
2354 return Op.getOperand(i: 0);
2355}
2356
2357SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2358 SelectionDAG &DAG) const {
2359 SDLoc DL(Op);
2360 MVT GRLenVT = Subtarget.getGRLenVT();
2361 SDValue Chain = Op.getOperand(i: 0);
2362 uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1);
2363 SDValue Op2 = Op.getOperand(i: 2);
2364 const StringRef ErrorMsgOOR = "argument out of range";
2365 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2366 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2367 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2368
2369 switch (IntrinsicEnum) {
2370 default:
2371 // TODO: Add more Intrinsics.
2372 return SDValue();
2373 case Intrinsic::loongarch_cacop_d:
2374 case Intrinsic::loongarch_cacop_w: {
2375 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2376 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
2377 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2378 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
2379 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2380 unsigned Imm1 = Op2->getAsZExtVal();
2381 int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue();
2382 if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2))
2383 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
2384 return Op;
2385 }
2386 case Intrinsic::loongarch_dbar: {
2387 unsigned Imm = Op2->getAsZExtVal();
2388 return !isUInt<15>(x: Imm)
2389 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2390 : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
2391 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2392 }
2393 case Intrinsic::loongarch_ibar: {
2394 unsigned Imm = Op2->getAsZExtVal();
2395 return !isUInt<15>(x: Imm)
2396 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2397 : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
2398 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2399 }
2400 case Intrinsic::loongarch_break: {
2401 unsigned Imm = Op2->getAsZExtVal();
2402 return !isUInt<15>(x: Imm)
2403 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2404 : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
2405 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2406 }
2407 case Intrinsic::loongarch_movgr2fcsr: {
2408 if (!Subtarget.hasBasicF())
2409 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
2410 unsigned Imm = Op2->getAsZExtVal();
2411 return !isUInt<2>(x: Imm)
2412 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2413 : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
2414 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
2415 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
2416 Operand: Op.getOperand(i: 3)));
2417 }
2418 case Intrinsic::loongarch_syscall: {
2419 unsigned Imm = Op2->getAsZExtVal();
2420 return !isUInt<15>(x: Imm)
2421 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2422 : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
2423 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
2424 }
2425#define IOCSRWR_CASE(NAME, NODE) \
2426 case Intrinsic::loongarch_##NAME: { \
2427 SDValue Op3 = Op.getOperand(3); \
2428 return Subtarget.is64Bit() \
2429 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2430 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2432 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2433 Op3); \
2434 }
2435 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2436 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2437 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2438#undef IOCSRWR_CASE
2439 case Intrinsic::loongarch_iocsrwr_d: {
2440 return !Subtarget.is64Bit()
2441 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
2442 : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
2443 N2: Op2,
2444 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
2445 Operand: Op.getOperand(i: 3)));
2446 }
2447#define ASRT_LE_GT_CASE(NAME) \
2448 case Intrinsic::loongarch_##NAME: { \
2449 return !Subtarget.is64Bit() \
2450 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2451 : Op; \
2452 }
2453 ASRT_LE_GT_CASE(asrtle_d)
2454 ASRT_LE_GT_CASE(asrtgt_d)
2455#undef ASRT_LE_GT_CASE
2456 case Intrinsic::loongarch_ldpte_d: {
2457 unsigned Imm = Op.getConstantOperandVal(i: 3);
2458 return !Subtarget.is64Bit()
2459 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
2460 : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2461 : Op;
2462 }
2463 case Intrinsic::loongarch_lsx_vst:
2464 case Intrinsic::loongarch_lasx_xvst:
2465 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue())
2466 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2467 : SDValue();
2468 case Intrinsic::loongarch_lasx_xvstelm_b:
2469 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2470 !isUInt<5>(x: Op.getConstantOperandVal(i: 5)))
2471 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2472 : SDValue();
2473 case Intrinsic::loongarch_lsx_vstelm_b:
2474 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2475 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
2476 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
2477 : SDValue();
2478 case Intrinsic::loongarch_lasx_xvstelm_h:
2479 return (!isShiftedInt<8, 1>(
2480 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2481 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
2482 ? emitIntrinsicErrorMessage(
2483 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
2484 : SDValue();
2485 case Intrinsic::loongarch_lsx_vstelm_h:
2486 return (!isShiftedInt<8, 1>(
2487 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2488 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
2489 ? emitIntrinsicErrorMessage(
2490 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
2491 : SDValue();
2492 case Intrinsic::loongarch_lasx_xvstelm_w:
2493 return (!isShiftedInt<8, 2>(
2494 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2495 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
2496 ? emitIntrinsicErrorMessage(
2497 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
2498 : SDValue();
2499 case Intrinsic::loongarch_lsx_vstelm_w:
2500 return (!isShiftedInt<8, 2>(
2501 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2502 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
2503 ? emitIntrinsicErrorMessage(
2504 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
2505 : SDValue();
2506 case Intrinsic::loongarch_lasx_xvstelm_d:
2507 return (!isShiftedInt<8, 3>(
2508 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2509 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
2510 ? emitIntrinsicErrorMessage(
2511 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
2512 : SDValue();
2513 case Intrinsic::loongarch_lsx_vstelm_d:
2514 return (!isShiftedInt<8, 3>(
2515 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
2516 !isUInt<1>(x: Op.getConstantOperandVal(i: 5)))
2517 ? emitIntrinsicErrorMessage(
2518 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
2519 : SDValue();
2520 }
2521}
2522
2523SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2524 SelectionDAG &DAG) const {
2525 SDLoc DL(Op);
2526 SDValue Lo = Op.getOperand(i: 0);
2527 SDValue Hi = Op.getOperand(i: 1);
2528 SDValue Shamt = Op.getOperand(i: 2);
2529 EVT VT = Lo.getValueType();
2530
2531 // if Shamt-GRLen < 0: // Shamt < GRLen
2532 // Lo = Lo << Shamt
2533 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2534 // else:
2535 // Lo = 0
2536 // Hi = Lo << (Shamt-GRLen)
2537
2538 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
2539 SDValue One = DAG.getConstant(Val: 1, DL, VT);
2540 SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
2541 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
2542 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
2543 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
2544
2545 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
2546 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
2547 SDValue ShiftRightLo =
2548 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
2549 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
2550 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
2551 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
2552
2553 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
2554
2555 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
2556 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
2557
2558 SDValue Parts[2] = {Lo, Hi};
2559 return DAG.getMergeValues(Ops: Parts, dl: DL);
2560}
2561
2562SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2563 SelectionDAG &DAG,
2564 bool IsSRA) const {
2565 SDLoc DL(Op);
2566 SDValue Lo = Op.getOperand(i: 0);
2567 SDValue Hi = Op.getOperand(i: 1);
2568 SDValue Shamt = Op.getOperand(i: 2);
2569 EVT VT = Lo.getValueType();
2570
2571 // SRA expansion:
2572 // if Shamt-GRLen < 0: // Shamt < GRLen
2573 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2574 // Hi = Hi >>s Shamt
2575 // else:
2576 // Lo = Hi >>s (Shamt-GRLen);
2577 // Hi = Hi >>s (GRLen-1)
2578 //
2579 // SRL expansion:
2580 // if Shamt-GRLen < 0: // Shamt < GRLen
2581 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2582 // Hi = Hi >>u Shamt
2583 // else:
2584 // Lo = Hi >>u (Shamt-GRLen);
2585 // Hi = 0;
2586
2587 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2588
2589 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
2590 SDValue One = DAG.getConstant(Val: 1, DL, VT);
2591 SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
2592 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
2593 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
2594 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
2595
2596 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
2597 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
2598 SDValue ShiftLeftHi =
2599 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
2600 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
2601 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
2602 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
2603 SDValue HiFalse =
2604 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
2605
2606 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
2607
2608 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
2609 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
2610
2611 SDValue Parts[2] = {Lo, Hi};
2612 return DAG.getMergeValues(Ops: Parts, dl: DL);
2613}
2614
2615// Returns the opcode of the target-specific SDNode that implements the 32-bit
2616// form of the given Opcode.
2617static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2618 switch (Opcode) {
2619 default:
2620 llvm_unreachable("Unexpected opcode");
2621 case ISD::UDIV:
2622 return LoongArchISD::DIV_WU;
2623 case ISD::UREM:
2624 return LoongArchISD::MOD_WU;
2625 case ISD::SHL:
2626 return LoongArchISD::SLL_W;
2627 case ISD::SRA:
2628 return LoongArchISD::SRA_W;
2629 case ISD::SRL:
2630 return LoongArchISD::SRL_W;
2631 case ISD::ROTL:
2632 case ISD::ROTR:
2633 return LoongArchISD::ROTR_W;
2634 case ISD::CTTZ:
2635 return LoongArchISD::CTZ_W;
2636 case ISD::CTLZ:
2637 return LoongArchISD::CLZ_W;
2638 }
2639}
2640
2641// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2642// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2643// otherwise be promoted to i64, making it difficult to select the
2644// SLL_W/.../*W later one because the fact the operation was originally of
2645// type i8/i16/i32 is lost.
2646static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
2647 unsigned ExtOpc = ISD::ANY_EXTEND) {
2648 SDLoc DL(N);
2649 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
2650 SDValue NewOp0, NewRes;
2651
2652 switch (NumOp) {
2653 default:
2654 llvm_unreachable("Unexpected NumOp");
2655 case 1: {
2656 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
2657 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
2658 break;
2659 }
2660 case 2: {
2661 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
2662 SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
2663 if (N->getOpcode() == ISD::ROTL) {
2664 SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64);
2665 NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
2666 }
2667 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
2668 break;
2669 }
2670 // TODO:Handle more NumOp.
2671 }
2672
2673 // ReplaceNodeResults requires we maintain the same type for the return
2674 // value.
2675 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
2676}
2677
2678// Converts the given 32-bit operation to a i64 operation with signed extension
2679// semantic to reduce the signed extension instructions.
2680static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2681 SDLoc DL(N);
2682 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
2683 SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
2684 SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
2685 SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
2686 N2: DAG.getValueType(MVT::i32));
2687 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
2688}
2689
2690// Helper function that emits error message for intrinsics with/without chain
2691// and return a UNDEF or and the chain as the results.
2692static void emitErrorAndReplaceIntrinsicResults(
2693 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2694 StringRef ErrorMsg, bool WithChain = true) {
2695 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
2696 Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)));
2697 if (!WithChain)
2698 return;
2699 Results.push_back(Elt: N->getOperand(Num: 0));
2700}
2701
2702template <unsigned N>
2703static void
2704replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2705 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2706 unsigned ResOp) {
2707 const StringRef ErrorMsgOOR = "argument out of range";
2708 unsigned Imm = Node->getConstantOperandVal(Num: 2);
2709 if (!isUInt<N>(Imm)) {
2710 emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
2711 /*WithChain=*/false);
2712 return;
2713 }
2714 SDLoc DL(Node);
2715 SDValue Vec = Node->getOperand(Num: 1);
2716
2717 SDValue PickElt =
2718 DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
2719 N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
2720 N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
2721 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0),
2722 Operand: PickElt.getValue(R: 0)));
2723}
2724
2725static void replaceVecCondBranchResults(SDNode *N,
2726 SmallVectorImpl<SDValue> &Results,
2727 SelectionDAG &DAG,
2728 const LoongArchSubtarget &Subtarget,
2729 unsigned ResOp) {
2730 SDLoc DL(N);
2731 SDValue Vec = N->getOperand(Num: 1);
2732
2733 SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
2734 Results.push_back(
2735 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0)));
2736}
2737
2738static void
2739replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2740 SelectionDAG &DAG,
2741 const LoongArchSubtarget &Subtarget) {
2742 switch (N->getConstantOperandVal(Num: 0)) {
2743 default:
2744 llvm_unreachable("Unexpected Intrinsic.");
2745 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2746 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
2747 ResOp: LoongArchISD::VPICK_SEXT_ELT);
2748 break;
2749 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2750 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2751 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
2752 ResOp: LoongArchISD::VPICK_SEXT_ELT);
2753 break;
2754 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2755 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
2756 ResOp: LoongArchISD::VPICK_SEXT_ELT);
2757 break;
2758 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2759 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
2760 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
2761 break;
2762 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2763 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2764 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
2765 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
2766 break;
2767 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2768 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
2769 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
2770 break;
2771 case Intrinsic::loongarch_lsx_bz_b:
2772 case Intrinsic::loongarch_lsx_bz_h:
2773 case Intrinsic::loongarch_lsx_bz_w:
2774 case Intrinsic::loongarch_lsx_bz_d:
2775 case Intrinsic::loongarch_lasx_xbz_b:
2776 case Intrinsic::loongarch_lasx_xbz_h:
2777 case Intrinsic::loongarch_lasx_xbz_w:
2778 case Intrinsic::loongarch_lasx_xbz_d:
2779 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2780 ResOp: LoongArchISD::VALL_ZERO);
2781 break;
2782 case Intrinsic::loongarch_lsx_bz_v:
2783 case Intrinsic::loongarch_lasx_xbz_v:
2784 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2785 ResOp: LoongArchISD::VANY_ZERO);
2786 break;
2787 case Intrinsic::loongarch_lsx_bnz_b:
2788 case Intrinsic::loongarch_lsx_bnz_h:
2789 case Intrinsic::loongarch_lsx_bnz_w:
2790 case Intrinsic::loongarch_lsx_bnz_d:
2791 case Intrinsic::loongarch_lasx_xbnz_b:
2792 case Intrinsic::loongarch_lasx_xbnz_h:
2793 case Intrinsic::loongarch_lasx_xbnz_w:
2794 case Intrinsic::loongarch_lasx_xbnz_d:
2795 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2796 ResOp: LoongArchISD::VALL_NONZERO);
2797 break;
2798 case Intrinsic::loongarch_lsx_bnz_v:
2799 case Intrinsic::loongarch_lasx_xbnz_v:
2800 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2801 ResOp: LoongArchISD::VANY_NONZERO);
2802 break;
2803 }
2804}
2805
2806void LoongArchTargetLowering::ReplaceNodeResults(
2807 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2808 SDLoc DL(N);
2809 EVT VT = N->getValueType(ResNo: 0);
2810 switch (N->getOpcode()) {
2811 default:
2812 llvm_unreachable("Don't know how to legalize this operation");
2813 case ISD::ADD:
2814 case ISD::SUB:
2815 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2816 "Unexpected custom legalisation");
2817 Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
2818 break;
2819 case ISD::UDIV:
2820 case ISD::UREM:
2821 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2822 "Unexpected custom legalisation");
2823 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2, ExtOpc: ISD::SIGN_EXTEND));
2824 break;
2825 case ISD::SHL:
2826 case ISD::SRA:
2827 case ISD::SRL:
2828 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2829 "Unexpected custom legalisation");
2830 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
2831 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
2832 break;
2833 }
2834 break;
2835 case ISD::ROTL:
2836 case ISD::ROTR:
2837 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2838 "Unexpected custom legalisation");
2839 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
2840 break;
2841 case ISD::FP_TO_SINT: {
2842 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2843 "Unexpected custom legalisation");
2844 SDValue Src = N->getOperand(Num: 0);
2845 EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0));
2846 if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
2847 TargetLowering::TypeSoftenFloat) {
2848 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
2849 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
2850 return;
2851 }
2852 // If the FP type needs to be softened, emit a library call using the 'si'
2853 // version. If we left it to default legalization we'd end up with 'di'.
2854 RTLIB::Libcall LC;
2855 LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
2856 MakeLibCallOptions CallOptions;
2857 EVT OpVT = Src.getValueType();
2858 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true);
2859 SDValue Chain = SDValue();
2860 SDValue Result;
2861 std::tie(args&: Result, args&: Chain) =
2862 makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
2863 Results.push_back(Elt: Result);
2864 break;
2865 }
2866 case ISD::BITCAST: {
2867 SDValue Src = N->getOperand(Num: 0);
2868 EVT SrcVT = Src.getValueType();
2869 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2870 Subtarget.hasBasicF()) {
2871 SDValue Dst =
2872 DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
2873 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
2874 }
2875 break;
2876 }
2877 case ISD::FP_TO_UINT: {
2878 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2879 "Unexpected custom legalisation");
2880 auto &TLI = DAG.getTargetLoweringInfo();
2881 SDValue Tmp1, Tmp2;
2882 TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
2883 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
2884 break;
2885 }
2886 case ISD::BSWAP: {
2887 SDValue Src = N->getOperand(Num: 0);
2888 assert((VT == MVT::i16 || VT == MVT::i32) &&
2889 "Unexpected custom legalization");
2890 MVT GRLenVT = Subtarget.getGRLenVT();
2891 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
2892 SDValue Tmp;
2893 switch (VT.getSizeInBits()) {
2894 default:
2895 llvm_unreachable("Unexpected operand width");
2896 case 16:
2897 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
2898 break;
2899 case 32:
2900 // Only LA64 will get to here due to the size mismatch between VT and
2901 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2902 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
2903 break;
2904 }
2905 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
2906 break;
2907 }
2908 case ISD::BITREVERSE: {
2909 SDValue Src = N->getOperand(Num: 0);
2910 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2911 "Unexpected custom legalization");
2912 MVT GRLenVT = Subtarget.getGRLenVT();
2913 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
2914 SDValue Tmp;
2915 switch (VT.getSizeInBits()) {
2916 default:
2917 llvm_unreachable("Unexpected operand width");
2918 case 8:
2919 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
2920 break;
2921 case 32:
2922 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
2923 break;
2924 }
2925 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
2926 break;
2927 }
2928 case ISD::CTLZ:
2929 case ISD::CTTZ: {
2930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2931 "Unexpected custom legalisation");
2932 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1));
2933 break;
2934 }
2935 case ISD::INTRINSIC_W_CHAIN: {
2936 SDValue Chain = N->getOperand(Num: 0);
2937 SDValue Op2 = N->getOperand(Num: 2);
2938 MVT GRLenVT = Subtarget.getGRLenVT();
2939 const StringRef ErrorMsgOOR = "argument out of range";
2940 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2941 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2942
2943 switch (N->getConstantOperandVal(Num: 1)) {
2944 default:
2945 llvm_unreachable("Unexpected Intrinsic.");
2946 case Intrinsic::loongarch_movfcsr2gr: {
2947 if (!Subtarget.hasBasicF()) {
2948 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
2949 return;
2950 }
2951 unsigned Imm = Op2->getAsZExtVal();
2952 if (!isUInt<2>(x: Imm)) {
2953 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
2954 return;
2955 }
2956 SDValue MOVFCSR2GRResults = DAG.getNode(
2957 Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other},
2958 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
2959 Results.push_back(
2960 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0)));
2961 Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1));
2962 break;
2963 }
2964#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2965 case Intrinsic::loongarch_##NAME: { \
2966 SDValue NODE = DAG.getNode( \
2967 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2968 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2969 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2970 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2971 Results.push_back(NODE.getValue(1)); \
2972 break; \
2973 }
2974 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2975 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2976 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2977 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2978 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2979 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2980#undef CRC_CASE_EXT_BINARYOP
2981
2982#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2983 case Intrinsic::loongarch_##NAME: { \
2984 SDValue NODE = DAG.getNode( \
2985 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2986 {Chain, Op2, \
2987 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2989 Results.push_back(NODE.getValue(1)); \
2990 break; \
2991 }
2992 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2993 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2994#undef CRC_CASE_EXT_UNARYOP
2995#define CSR_CASE(ID) \
2996 case Intrinsic::loongarch_##ID: { \
2997 if (!Subtarget.is64Bit()) \
2998 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
2999 break; \
3000 }
3001 CSR_CASE(csrrd_d);
3002 CSR_CASE(csrwr_d);
3003 CSR_CASE(csrxchg_d);
3004 CSR_CASE(iocsrrd_d);
3005#undef CSR_CASE
3006 case Intrinsic::loongarch_csrrd_w: {
3007 unsigned Imm = Op2->getAsZExtVal();
3008 if (!isUInt<14>(x: Imm)) {
3009 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
3010 return;
3011 }
3012 SDValue CSRRDResults =
3013 DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
3014 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3015 Results.push_back(
3016 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0)));
3017 Results.push_back(Elt: CSRRDResults.getValue(R: 1));
3018 break;
3019 }
3020 case Intrinsic::loongarch_csrwr_w: {
3021 unsigned Imm = N->getConstantOperandVal(Num: 3);
3022 if (!isUInt<14>(x: Imm)) {
3023 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
3024 return;
3025 }
3026 SDValue CSRWRResults =
3027 DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
3028 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
3029 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3030 Results.push_back(
3031 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0)));
3032 Results.push_back(Elt: CSRWRResults.getValue(R: 1));
3033 break;
3034 }
3035 case Intrinsic::loongarch_csrxchg_w: {
3036 unsigned Imm = N->getConstantOperandVal(Num: 4);
3037 if (!isUInt<14>(x: Imm)) {
3038 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
3039 return;
3040 }
3041 SDValue CSRXCHGResults = DAG.getNode(
3042 Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
3043 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
3044 DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)),
3045 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3046 Results.push_back(
3047 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0)));
3048 Results.push_back(Elt: CSRXCHGResults.getValue(R: 1));
3049 break;
3050 }
3051#define IOCSRRD_CASE(NAME, NODE) \
3052 case Intrinsic::loongarch_##NAME: { \
3053 SDValue IOCSRRDResults = \
3054 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3055 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3056 Results.push_back( \
3057 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3058 Results.push_back(IOCSRRDResults.getValue(1)); \
3059 break; \
3060 }
3061 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3062 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3063 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3064#undef IOCSRRD_CASE
3065 case Intrinsic::loongarch_cpucfg: {
3066 SDValue CPUCFGResults =
3067 DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
3068 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
3069 Results.push_back(
3070 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0)));
3071 Results.push_back(Elt: CPUCFGResults.getValue(R: 1));
3072 break;
3073 }
3074 case Intrinsic::loongarch_lddir_d: {
3075 if (!Subtarget.is64Bit()) {
3076 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
3077 return;
3078 }
3079 break;
3080 }
3081 }
3082 break;
3083 }
3084 case ISD::READ_REGISTER: {
3085 if (Subtarget.is64Bit())
3086 DAG.getContext()->emitError(
3087 ErrorStr: "On LA64, only 64-bit registers can be read.");
3088 else
3089 DAG.getContext()->emitError(
3090 ErrorStr: "On LA32, only 32-bit registers can be read.");
3091 Results.push_back(Elt: DAG.getUNDEF(VT));
3092 Results.push_back(Elt: N->getOperand(Num: 0));
3093 break;
3094 }
3095 case ISD::INTRINSIC_WO_CHAIN: {
3096 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3097 break;
3098 }
3099 }
3100}
3101
3102static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3103 TargetLowering::DAGCombinerInfo &DCI,
3104 const LoongArchSubtarget &Subtarget) {
3105 if (DCI.isBeforeLegalizeOps())
3106 return SDValue();
3107
3108 SDValue FirstOperand = N->getOperand(Num: 0);
3109 SDValue SecondOperand = N->getOperand(Num: 1);
3110 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3111 EVT ValTy = N->getValueType(ResNo: 0);
3112 SDLoc DL(N);
3113 uint64_t lsb, msb;
3114 unsigned SMIdx, SMLen;
3115 ConstantSDNode *CN;
3116 SDValue NewOperand;
3117 MVT GRLenVT = Subtarget.getGRLenVT();
3118
3119 // Op's second operand must be a shifted mask.
3120 if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) ||
3121 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
3122 return SDValue();
3123
3124 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3125 // Pattern match BSTRPICK.
3126 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3127 // => BSTRPICK $dst, $src, msb, lsb
3128 // where msb = lsb + len - 1
3129
3130 // The second operand of the shift must be an immediate.
3131 if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))))
3132 return SDValue();
3133
3134 lsb = CN->getZExtValue();
3135
3136 // Return if the shifted mask does not start at bit 0 or the sum of its
3137 // length and lsb exceeds the word's size.
3138 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3139 return SDValue();
3140
3141 NewOperand = FirstOperand.getOperand(i: 0);
3142 } else {
3143 // Pattern match BSTRPICK.
3144 // $dst = and $src, (2**len- 1) , if len > 12
3145 // => BSTRPICK $dst, $src, msb, lsb
3146 // where lsb = 0 and msb = len - 1
3147
3148 // If the mask is <= 0xfff, andi can be used instead.
3149 if (CN->getZExtValue() <= 0xfff)
3150 return SDValue();
3151
3152 // Return if the MSB exceeds.
3153 if (SMIdx + SMLen > ValTy.getSizeInBits())
3154 return SDValue();
3155
3156 if (SMIdx > 0) {
3157 // Omit if the constant has more than 2 uses. This a conservative
3158 // decision. Whether it is a win depends on the HW microarchitecture.
3159 // However it should always be better for 1 and 2 uses.
3160 if (CN->use_size() > 2)
3161 return SDValue();
3162 // Return if the constant can be composed by a single LU12I.W.
3163 if ((CN->getZExtValue() & 0xfff) == 0)
3164 return SDValue();
3165 // Return if the constand can be composed by a single ADDI with
3166 // the zero register.
3167 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3168 return SDValue();
3169 }
3170
3171 lsb = SMIdx;
3172 NewOperand = FirstOperand;
3173 }
3174
3175 msb = lsb + SMLen - 1;
3176 SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
3177 N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
3178 N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
3179 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3180 return NR0;
3181 // Try to optimize to
3182 // bstrpick $Rd, $Rs, msb, lsb
3183 // slli $Rd, $Rd, lsb
3184 return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
3185 N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
3186}
3187
3188static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3189 TargetLowering::DAGCombinerInfo &DCI,
3190 const LoongArchSubtarget &Subtarget) {
3191 if (DCI.isBeforeLegalizeOps())
3192 return SDValue();
3193
3194 // $dst = srl (and $src, Mask), Shamt
3195 // =>
3196 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3197 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3198 //
3199
3200 SDValue FirstOperand = N->getOperand(Num: 0);
3201 ConstantSDNode *CN;
3202 EVT ValTy = N->getValueType(ResNo: 0);
3203 SDLoc DL(N);
3204 MVT GRLenVT = Subtarget.getGRLenVT();
3205 unsigned MaskIdx, MaskLen;
3206 uint64_t Shamt;
3207
3208 // The first operand must be an AND and the second operand of the AND must be
3209 // a shifted mask.
3210 if (FirstOperand.getOpcode() != ISD::AND ||
3211 !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) ||
3212 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
3213 return SDValue();
3214
3215 // The second operand (shift amount) must be an immediate.
3216 if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))))
3217 return SDValue();
3218
3219 Shamt = CN->getZExtValue();
3220 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3221 return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
3222 N1: FirstOperand->getOperand(Num: 0),
3223 N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
3224 N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
3225
3226 return SDValue();
3227}
3228
3229static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3230 TargetLowering::DAGCombinerInfo &DCI,
3231 const LoongArchSubtarget &Subtarget) {
3232 MVT GRLenVT = Subtarget.getGRLenVT();
3233 EVT ValTy = N->getValueType(ResNo: 0);
3234 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
3235 ConstantSDNode *CN0, *CN1;
3236 SDLoc DL(N);
3237 unsigned ValBits = ValTy.getSizeInBits();
3238 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3239 unsigned Shamt;
3240 bool SwapAndRetried = false;
3241
3242 if (DCI.isBeforeLegalizeOps())
3243 return SDValue();
3244
3245 if (ValBits != 32 && ValBits != 64)
3246 return SDValue();
3247
3248Retry:
3249 // 1st pattern to match BSTRINS:
3250 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3251 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3252 // =>
3253 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3254 if (N0.getOpcode() == ISD::AND &&
3255 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
3256 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3257 N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
3258 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
3259 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
3260 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3261 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
3262 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3263 (MaskIdx0 + MaskLen0 <= ValBits)) {
3264 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3265 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
3266 N2: N1.getOperand(i: 0).getOperand(i: 0),
3267 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
3268 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3269 }
3270
3271 // 2nd pattern to match BSTRINS:
3272 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3273 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3274 // =>
3275 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3276 if (N0.getOpcode() == ISD::AND &&
3277 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
3278 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3279 N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
3280 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
3281 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3282 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
3283 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
3284 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3285 (MaskIdx0 + MaskLen0 <= ValBits)) {
3286 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3287 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
3288 N2: N1.getOperand(i: 0).getOperand(i: 0),
3289 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
3290 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3291 }
3292
3293 // 3rd pattern to match BSTRINS:
3294 // R = or (and X, mask0), (and Y, mask1)
3295 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3296 // =>
3297 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3298 // where msb = lsb + size - 1
3299 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3300 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
3301 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3302 (MaskIdx0 + MaskLen0 <= 64) &&
3303 (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) &&
3304 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3305 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3306 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
3307 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1,
3308 N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
3309 N3: DAG.getConstant(Val: ValBits == 32
3310 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3311 : (MaskIdx0 + MaskLen0 - 1),
3312 DL, VT: GRLenVT),
3313 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3314 }
3315
3316 // 4th pattern to match BSTRINS:
3317 // R = or (and X, mask), (shl Y, shamt)
3318 // where mask = (2**shamt - 1)
3319 // =>
3320 // R = BSTRINS X, Y, ValBits - 1, shamt
3321 // where ValBits = 32 or 64
3322 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3323 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
3324 isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3325 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
3326 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3327 (MaskIdx0 + MaskLen0 <= ValBits)) {
3328 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3329 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
3330 N2: N1.getOperand(i: 0),
3331 N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT),
3332 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
3333 }
3334
3335 // 5th pattern to match BSTRINS:
3336 // R = or (and X, mask), const
3337 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3338 // =>
3339 // R = BSTRINS X, (const >> lsb), msb, lsb
3340 // where msb = lsb + size - 1
3341 if (N0.getOpcode() == ISD::AND &&
3342 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
3343 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
3344 (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
3345 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3346 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3347 return DAG.getNode(
3348 Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
3349 N2: DAG.getConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
3350 N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3351 : (MaskIdx0 + MaskLen0 - 1),
3352 DL, VT: GRLenVT),
3353 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
3354 }
3355
3356 // 6th pattern.
3357 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3358 // by the incoming bits are known to be zero.
3359 // =>
3360 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3361 //
3362 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3363 // pattern is more common than the 1st. So we put the 1st before the 6th in
3364 // order to match as many nodes as possible.
3365 ConstantSDNode *CNMask, *CNShamt;
3366 unsigned MaskIdx, MaskLen;
3367 if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
3368 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
3369 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3370 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
3371 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3372 Shamt = CNShamt->getZExtValue();
3373 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3374 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
3375 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3376 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
3377 N2: N1.getOperand(i: 0).getOperand(i: 0),
3378 N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT),
3379 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
3380 }
3381 }
3382
3383 // 7th pattern.
3384 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3385 // overwritten by the incoming bits are known to be zero.
3386 // =>
3387 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3388 //
3389 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3390 // before the 7th in order to match as many nodes as possible.
3391 if (N1.getOpcode() == ISD::AND &&
3392 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
3393 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3394 N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
3395 (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
3396 CNShamt->getZExtValue() == MaskIdx) {
3397 APInt ShMask(ValBits, CNMask->getZExtValue());
3398 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
3399 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3400 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
3401 N2: N1.getOperand(i: 0).getOperand(i: 0),
3402 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
3403 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
3404 }
3405 }
3406
3407 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3408 if (!SwapAndRetried) {
3409 std::swap(a&: N0, b&: N1);
3410 SwapAndRetried = true;
3411 goto Retry;
3412 }
3413
3414 SwapAndRetried = false;
3415Retry2:
3416 // 8th pattern.
3417 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3418 // the incoming bits are known to be zero.
3419 // =>
3420 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3421 //
3422 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3423 // we put it here in order to match as many nodes as possible or generate less
3424 // instructions.
3425 if (N1.getOpcode() == ISD::AND &&
3426 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
3427 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3428 APInt ShMask(ValBits, CNMask->getZExtValue());
3429 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
3430 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3431 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
3432 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0),
3433 N1: N1->getOperand(Num: 0),
3434 N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
3435 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
3436 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
3437 }
3438 }
3439 // Swap N0/N1 and retry.
3440 if (!SwapAndRetried) {
3441 std::swap(a&: N0, b&: N1);
3442 SwapAndRetried = true;
3443 goto Retry2;
3444 }
3445
3446 return SDValue();
3447}
3448
3449static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3450 ExtType = ISD::NON_EXTLOAD;
3451
3452 switch (V.getNode()->getOpcode()) {
3453 case ISD::LOAD: {
3454 LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
3455 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3456 (LoadNode->getMemoryVT() == MVT::i16)) {
3457 ExtType = LoadNode->getExtensionType();
3458 return true;
3459 }
3460 return false;
3461 }
3462 case ISD::AssertSext: {
3463 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
3464 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3465 ExtType = ISD::SEXTLOAD;
3466 return true;
3467 }
3468 return false;
3469 }
3470 case ISD::AssertZext: {
3471 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
3472 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3473 ExtType = ISD::ZEXTLOAD;
3474 return true;
3475 }
3476 return false;
3477 }
3478 default:
3479 return false;
3480 }
3481
3482 return false;
3483}
3484
3485// Eliminate redundant truncation and zero-extension nodes.
3486// * Case 1:
3487// +------------+ +------------+ +------------+
3488// | Input1 | | Input2 | | CC |
3489// +------------+ +------------+ +------------+
3490// | | |
3491// V V +----+
3492// +------------+ +------------+ |
3493// | TRUNCATE | | TRUNCATE | |
3494// +------------+ +------------+ |
3495// | | |
3496// V V |
3497// +------------+ +------------+ |
3498// | ZERO_EXT | | ZERO_EXT | |
3499// +------------+ +------------+ |
3500// | | |
3501// | +-------------+ |
3502// V V | |
3503// +----------------+ | |
3504// | AND | | |
3505// +----------------+ | |
3506// | | |
3507// +---------------+ | |
3508// | | |
3509// V V V
3510// +-------------+
3511// | CMP |
3512// +-------------+
3513// * Case 2:
3514// +------------+ +------------+ +-------------+ +------------+ +------------+
3515// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3516// +------------+ +------------+ +-------------+ +------------+ +------------+
3517// | | | | |
3518// V | | | |
3519// +------------+ | | | |
3520// | XOR |<---------------------+ | |
3521// +------------+ | | |
3522// | | | |
3523// V V +---------------+ |
3524// +------------+ +------------+ | |
3525// | TRUNCATE | | TRUNCATE | | +-------------------------+
3526// +------------+ +------------+ | |
3527// | | | |
3528// V V | |
3529// +------------+ +------------+ | |
3530// | ZERO_EXT | | ZERO_EXT | | |
3531// +------------+ +------------+ | |
3532// | | | |
3533// V V | |
3534// +----------------+ | |
3535// | AND | | |
3536// +----------------+ | |
3537// | | |
3538// +---------------+ | |
3539// | | |
3540// V V V
3541// +-------------+
3542// | CMP |
3543// +-------------+
3544static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3545 TargetLowering::DAGCombinerInfo &DCI,
3546 const LoongArchSubtarget &Subtarget) {
3547 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3548
3549 SDNode *AndNode = N->getOperand(Num: 0).getNode();
3550 if (AndNode->getOpcode() != ISD::AND)
3551 return SDValue();
3552
3553 SDValue AndInputValue2 = AndNode->getOperand(Num: 1);
3554 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3555 return SDValue();
3556
3557 SDValue CmpInputValue = N->getOperand(Num: 1);
3558 SDValue AndInputValue1 = AndNode->getOperand(Num: 0);
3559 if (AndInputValue1.getOpcode() == ISD::XOR) {
3560 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3561 return SDValue();
3562 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1));
3563 if (!CN || CN->getSExtValue() != -1)
3564 return SDValue();
3565 CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
3566 if (!CN || CN->getSExtValue() != 0)
3567 return SDValue();
3568 AndInputValue1 = AndInputValue1.getOperand(i: 0);
3569 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3570 return SDValue();
3571 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3572 if (AndInputValue2 != CmpInputValue)
3573 return SDValue();
3574 } else {
3575 return SDValue();
3576 }
3577
3578 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0);
3579 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3580 return SDValue();
3581
3582 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0);
3583 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3584 return SDValue();
3585
3586 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0);
3587 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0);
3588 ISD::LoadExtType ExtType1;
3589 ISD::LoadExtType ExtType2;
3590
3591 if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) ||
3592 !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
3593 return SDValue();
3594
3595 if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) ||
3596 AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0))
3597 return SDValue();
3598
3599 if ((ExtType2 != ISD::ZEXTLOAD) &&
3600 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3601 return SDValue();
3602
3603 // These truncation and zero-extension nodes are not necessary, remove them.
3604 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0),
3605 N1: TruncInputValue1, N2: TruncInputValue2);
3606 SDValue NewSetCC =
3607 DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
3608 DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
3609 return SDValue(N, 0);
3610}
3611
3612// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3613static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3614 TargetLowering::DAGCombinerInfo &DCI,
3615 const LoongArchSubtarget &Subtarget) {
3616 if (DCI.isBeforeLegalizeOps())
3617 return SDValue();
3618
3619 SDValue Src = N->getOperand(Num: 0);
3620 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3621 return SDValue();
3622
3623 return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
3624 Operand: Src.getOperand(i: 0));
3625}
3626
3627template <unsigned N>
3628static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
3629 SelectionDAG &DAG,
3630 const LoongArchSubtarget &Subtarget,
3631 bool IsSigned = false) {
3632 SDLoc DL(Node);
3633 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
3634 // Check the ImmArg.
3635 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3636 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3637 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
3638 ": argument out of range.");
3639 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
3640 }
3641 return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
3642}
3643
3644template <unsigned N>
3645static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3646 SelectionDAG &DAG, bool IsSigned = false) {
3647 SDLoc DL(Node);
3648 EVT ResTy = Node->getValueType(ResNo: 0);
3649 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
3650
3651 // Check the ImmArg.
3652 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3653 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3654 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
3655 ": argument out of range.");
3656 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3657 }
3658 return DAG.getConstant(
3659 Val: APInt(ResTy.getScalarType().getSizeInBits(),
3660 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3661 DL, VT: ResTy);
3662}
3663
3664static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3665 SDLoc DL(Node);
3666 EVT ResTy = Node->getValueType(ResNo: 0);
3667 SDValue Vec = Node->getOperand(Num: 2);
3668 SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy);
3669 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
3670}
3671
3672static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3673 SDLoc DL(Node);
3674 EVT ResTy = Node->getValueType(ResNo: 0);
3675 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
3676 SDValue Bit =
3677 DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
3678
3679 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1),
3680 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
3681}
3682
3683template <unsigned N>
3684static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3685 SDLoc DL(Node);
3686 EVT ResTy = Node->getValueType(ResNo: 0);
3687 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
3688 // Check the unsigned ImmArg.
3689 if (!isUInt<N>(CImm->getZExtValue())) {
3690 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
3691 ": argument out of range.");
3692 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3693 }
3694
3695 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3696 SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
3697
3698 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask);
3699}
3700
3701template <unsigned N>
3702static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3703 SDLoc DL(Node);
3704 EVT ResTy = Node->getValueType(ResNo: 0);
3705 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
3706 // Check the unsigned ImmArg.
3707 if (!isUInt<N>(CImm->getZExtValue())) {
3708 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
3709 ": argument out of range.");
3710 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3711 }
3712
3713 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3714 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
3715 return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
3716}
3717
3718template <unsigned N>
3719static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3720 SDLoc DL(Node);
3721 EVT ResTy = Node->getValueType(ResNo: 0);
3722 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
3723 // Check the unsigned ImmArg.
3724 if (!isUInt<N>(CImm->getZExtValue())) {
3725 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
3726 ": argument out of range.");
3727 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
3728 }
3729
3730 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3731 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
3732 return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
3733}
3734
3735static SDValue
3736performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3737 TargetLowering::DAGCombinerInfo &DCI,
3738 const LoongArchSubtarget &Subtarget) {
3739 SDLoc DL(N);
3740 switch (N->getConstantOperandVal(Num: 0)) {
3741 default:
3742 break;
3743 case Intrinsic::loongarch_lsx_vadd_b:
3744 case Intrinsic::loongarch_lsx_vadd_h:
3745 case Intrinsic::loongarch_lsx_vadd_w:
3746 case Intrinsic::loongarch_lsx_vadd_d:
3747 case Intrinsic::loongarch_lasx_xvadd_b:
3748 case Intrinsic::loongarch_lasx_xvadd_h:
3749 case Intrinsic::loongarch_lasx_xvadd_w:
3750 case Intrinsic::loongarch_lasx_xvadd_d:
3751 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3752 N2: N->getOperand(Num: 2));
3753 case Intrinsic::loongarch_lsx_vaddi_bu:
3754 case Intrinsic::loongarch_lsx_vaddi_hu:
3755 case Intrinsic::loongarch_lsx_vaddi_wu:
3756 case Intrinsic::loongarch_lsx_vaddi_du:
3757 case Intrinsic::loongarch_lasx_xvaddi_bu:
3758 case Intrinsic::loongarch_lasx_xvaddi_hu:
3759 case Intrinsic::loongarch_lasx_xvaddi_wu:
3760 case Intrinsic::loongarch_lasx_xvaddi_du:
3761 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3762 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
3763 case Intrinsic::loongarch_lsx_vsub_b:
3764 case Intrinsic::loongarch_lsx_vsub_h:
3765 case Intrinsic::loongarch_lsx_vsub_w:
3766 case Intrinsic::loongarch_lsx_vsub_d:
3767 case Intrinsic::loongarch_lasx_xvsub_b:
3768 case Intrinsic::loongarch_lasx_xvsub_h:
3769 case Intrinsic::loongarch_lasx_xvsub_w:
3770 case Intrinsic::loongarch_lasx_xvsub_d:
3771 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3772 N2: N->getOperand(Num: 2));
3773 case Intrinsic::loongarch_lsx_vsubi_bu:
3774 case Intrinsic::loongarch_lsx_vsubi_hu:
3775 case Intrinsic::loongarch_lsx_vsubi_wu:
3776 case Intrinsic::loongarch_lsx_vsubi_du:
3777 case Intrinsic::loongarch_lasx_xvsubi_bu:
3778 case Intrinsic::loongarch_lasx_xvsubi_hu:
3779 case Intrinsic::loongarch_lasx_xvsubi_wu:
3780 case Intrinsic::loongarch_lasx_xvsubi_du:
3781 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3782 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
3783 case Intrinsic::loongarch_lsx_vneg_b:
3784 case Intrinsic::loongarch_lsx_vneg_h:
3785 case Intrinsic::loongarch_lsx_vneg_w:
3786 case Intrinsic::loongarch_lsx_vneg_d:
3787 case Intrinsic::loongarch_lasx_xvneg_b:
3788 case Intrinsic::loongarch_lasx_xvneg_h:
3789 case Intrinsic::loongarch_lasx_xvneg_w:
3790 case Intrinsic::loongarch_lasx_xvneg_d:
3791 return DAG.getNode(
3792 Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0),
3793 N1: DAG.getConstant(
3794 Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0,
3795 /*isSigned=*/true),
3796 DL: SDLoc(N), VT: N->getValueType(ResNo: 0)),
3797 N2: N->getOperand(Num: 1));
3798 case Intrinsic::loongarch_lsx_vmax_b:
3799 case Intrinsic::loongarch_lsx_vmax_h:
3800 case Intrinsic::loongarch_lsx_vmax_w:
3801 case Intrinsic::loongarch_lsx_vmax_d:
3802 case Intrinsic::loongarch_lasx_xvmax_b:
3803 case Intrinsic::loongarch_lasx_xvmax_h:
3804 case Intrinsic::loongarch_lasx_xvmax_w:
3805 case Intrinsic::loongarch_lasx_xvmax_d:
3806 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3807 N2: N->getOperand(Num: 2));
3808 case Intrinsic::loongarch_lsx_vmax_bu:
3809 case Intrinsic::loongarch_lsx_vmax_hu:
3810 case Intrinsic::loongarch_lsx_vmax_wu:
3811 case Intrinsic::loongarch_lsx_vmax_du:
3812 case Intrinsic::loongarch_lasx_xvmax_bu:
3813 case Intrinsic::loongarch_lasx_xvmax_hu:
3814 case Intrinsic::loongarch_lasx_xvmax_wu:
3815 case Intrinsic::loongarch_lasx_xvmax_du:
3816 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3817 N2: N->getOperand(Num: 2));
3818 case Intrinsic::loongarch_lsx_vmaxi_b:
3819 case Intrinsic::loongarch_lsx_vmaxi_h:
3820 case Intrinsic::loongarch_lsx_vmaxi_w:
3821 case Intrinsic::loongarch_lsx_vmaxi_d:
3822 case Intrinsic::loongarch_lasx_xvmaxi_b:
3823 case Intrinsic::loongarch_lasx_xvmaxi_h:
3824 case Intrinsic::loongarch_lasx_xvmaxi_w:
3825 case Intrinsic::loongarch_lasx_xvmaxi_d:
3826 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3827 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
3828 case Intrinsic::loongarch_lsx_vmaxi_bu:
3829 case Intrinsic::loongarch_lsx_vmaxi_hu:
3830 case Intrinsic::loongarch_lsx_vmaxi_wu:
3831 case Intrinsic::loongarch_lsx_vmaxi_du:
3832 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3833 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3834 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3835 case Intrinsic::loongarch_lasx_xvmaxi_du:
3836 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3837 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
3838 case Intrinsic::loongarch_lsx_vmin_b:
3839 case Intrinsic::loongarch_lsx_vmin_h:
3840 case Intrinsic::loongarch_lsx_vmin_w:
3841 case Intrinsic::loongarch_lsx_vmin_d:
3842 case Intrinsic::loongarch_lasx_xvmin_b:
3843 case Intrinsic::loongarch_lasx_xvmin_h:
3844 case Intrinsic::loongarch_lasx_xvmin_w:
3845 case Intrinsic::loongarch_lasx_xvmin_d:
3846 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3847 N2: N->getOperand(Num: 2));
3848 case Intrinsic::loongarch_lsx_vmin_bu:
3849 case Intrinsic::loongarch_lsx_vmin_hu:
3850 case Intrinsic::loongarch_lsx_vmin_wu:
3851 case Intrinsic::loongarch_lsx_vmin_du:
3852 case Intrinsic::loongarch_lasx_xvmin_bu:
3853 case Intrinsic::loongarch_lasx_xvmin_hu:
3854 case Intrinsic::loongarch_lasx_xvmin_wu:
3855 case Intrinsic::loongarch_lasx_xvmin_du:
3856 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3857 N2: N->getOperand(Num: 2));
3858 case Intrinsic::loongarch_lsx_vmini_b:
3859 case Intrinsic::loongarch_lsx_vmini_h:
3860 case Intrinsic::loongarch_lsx_vmini_w:
3861 case Intrinsic::loongarch_lsx_vmini_d:
3862 case Intrinsic::loongarch_lasx_xvmini_b:
3863 case Intrinsic::loongarch_lasx_xvmini_h:
3864 case Intrinsic::loongarch_lasx_xvmini_w:
3865 case Intrinsic::loongarch_lasx_xvmini_d:
3866 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3867 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
3868 case Intrinsic::loongarch_lsx_vmini_bu:
3869 case Intrinsic::loongarch_lsx_vmini_hu:
3870 case Intrinsic::loongarch_lsx_vmini_wu:
3871 case Intrinsic::loongarch_lsx_vmini_du:
3872 case Intrinsic::loongarch_lasx_xvmini_bu:
3873 case Intrinsic::loongarch_lasx_xvmini_hu:
3874 case Intrinsic::loongarch_lasx_xvmini_wu:
3875 case Intrinsic::loongarch_lasx_xvmini_du:
3876 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3877 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
3878 case Intrinsic::loongarch_lsx_vmul_b:
3879 case Intrinsic::loongarch_lsx_vmul_h:
3880 case Intrinsic::loongarch_lsx_vmul_w:
3881 case Intrinsic::loongarch_lsx_vmul_d:
3882 case Intrinsic::loongarch_lasx_xvmul_b:
3883 case Intrinsic::loongarch_lasx_xvmul_h:
3884 case Intrinsic::loongarch_lasx_xvmul_w:
3885 case Intrinsic::loongarch_lasx_xvmul_d:
3886 return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3887 N2: N->getOperand(Num: 2));
3888 case Intrinsic::loongarch_lsx_vmadd_b:
3889 case Intrinsic::loongarch_lsx_vmadd_h:
3890 case Intrinsic::loongarch_lsx_vmadd_w:
3891 case Intrinsic::loongarch_lsx_vmadd_d:
3892 case Intrinsic::loongarch_lasx_xvmadd_b:
3893 case Intrinsic::loongarch_lasx_xvmadd_h:
3894 case Intrinsic::loongarch_lasx_xvmadd_w:
3895 case Intrinsic::loongarch_lasx_xvmadd_d: {
3896 EVT ResTy = N->getValueType(ResNo: 0);
3897 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
3898 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
3899 N2: N->getOperand(Num: 3)));
3900 }
3901 case Intrinsic::loongarch_lsx_vmsub_b:
3902 case Intrinsic::loongarch_lsx_vmsub_h:
3903 case Intrinsic::loongarch_lsx_vmsub_w:
3904 case Intrinsic::loongarch_lsx_vmsub_d:
3905 case Intrinsic::loongarch_lasx_xvmsub_b:
3906 case Intrinsic::loongarch_lasx_xvmsub_h:
3907 case Intrinsic::loongarch_lasx_xvmsub_w:
3908 case Intrinsic::loongarch_lasx_xvmsub_d: {
3909 EVT ResTy = N->getValueType(ResNo: 0);
3910 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
3911 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
3912 N2: N->getOperand(Num: 3)));
3913 }
3914 case Intrinsic::loongarch_lsx_vdiv_b:
3915 case Intrinsic::loongarch_lsx_vdiv_h:
3916 case Intrinsic::loongarch_lsx_vdiv_w:
3917 case Intrinsic::loongarch_lsx_vdiv_d:
3918 case Intrinsic::loongarch_lasx_xvdiv_b:
3919 case Intrinsic::loongarch_lasx_xvdiv_h:
3920 case Intrinsic::loongarch_lasx_xvdiv_w:
3921 case Intrinsic::loongarch_lasx_xvdiv_d:
3922 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3923 N2: N->getOperand(Num: 2));
3924 case Intrinsic::loongarch_lsx_vdiv_bu:
3925 case Intrinsic::loongarch_lsx_vdiv_hu:
3926 case Intrinsic::loongarch_lsx_vdiv_wu:
3927 case Intrinsic::loongarch_lsx_vdiv_du:
3928 case Intrinsic::loongarch_lasx_xvdiv_bu:
3929 case Intrinsic::loongarch_lasx_xvdiv_hu:
3930 case Intrinsic::loongarch_lasx_xvdiv_wu:
3931 case Intrinsic::loongarch_lasx_xvdiv_du:
3932 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3933 N2: N->getOperand(Num: 2));
3934 case Intrinsic::loongarch_lsx_vmod_b:
3935 case Intrinsic::loongarch_lsx_vmod_h:
3936 case Intrinsic::loongarch_lsx_vmod_w:
3937 case Intrinsic::loongarch_lsx_vmod_d:
3938 case Intrinsic::loongarch_lasx_xvmod_b:
3939 case Intrinsic::loongarch_lasx_xvmod_h:
3940 case Intrinsic::loongarch_lasx_xvmod_w:
3941 case Intrinsic::loongarch_lasx_xvmod_d:
3942 return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3943 N2: N->getOperand(Num: 2));
3944 case Intrinsic::loongarch_lsx_vmod_bu:
3945 case Intrinsic::loongarch_lsx_vmod_hu:
3946 case Intrinsic::loongarch_lsx_vmod_wu:
3947 case Intrinsic::loongarch_lsx_vmod_du:
3948 case Intrinsic::loongarch_lasx_xvmod_bu:
3949 case Intrinsic::loongarch_lasx_xvmod_hu:
3950 case Intrinsic::loongarch_lasx_xvmod_wu:
3951 case Intrinsic::loongarch_lasx_xvmod_du:
3952 return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3953 N2: N->getOperand(Num: 2));
3954 case Intrinsic::loongarch_lsx_vand_v:
3955 case Intrinsic::loongarch_lasx_xvand_v:
3956 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3957 N2: N->getOperand(Num: 2));
3958 case Intrinsic::loongarch_lsx_vor_v:
3959 case Intrinsic::loongarch_lasx_xvor_v:
3960 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3961 N2: N->getOperand(Num: 2));
3962 case Intrinsic::loongarch_lsx_vxor_v:
3963 case Intrinsic::loongarch_lasx_xvxor_v:
3964 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3965 N2: N->getOperand(Num: 2));
3966 case Intrinsic::loongarch_lsx_vnor_v:
3967 case Intrinsic::loongarch_lasx_xvnor_v: {
3968 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3969 N2: N->getOperand(Num: 2));
3970 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
3971 }
3972 case Intrinsic::loongarch_lsx_vandi_b:
3973 case Intrinsic::loongarch_lasx_xvandi_b:
3974 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3975 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
3976 case Intrinsic::loongarch_lsx_vori_b:
3977 case Intrinsic::loongarch_lasx_xvori_b:
3978 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3979 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
3980 case Intrinsic::loongarch_lsx_vxori_b:
3981 case Intrinsic::loongarch_lasx_xvxori_b:
3982 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3983 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
3984 case Intrinsic::loongarch_lsx_vsll_b:
3985 case Intrinsic::loongarch_lsx_vsll_h:
3986 case Intrinsic::loongarch_lsx_vsll_w:
3987 case Intrinsic::loongarch_lsx_vsll_d:
3988 case Intrinsic::loongarch_lasx_xvsll_b:
3989 case Intrinsic::loongarch_lasx_xvsll_h:
3990 case Intrinsic::loongarch_lasx_xvsll_w:
3991 case Intrinsic::loongarch_lasx_xvsll_d:
3992 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3993 N2: truncateVecElts(Node: N, DAG));
3994 case Intrinsic::loongarch_lsx_vslli_b:
3995 case Intrinsic::loongarch_lasx_xvslli_b:
3996 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
3997 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
3998 case Intrinsic::loongarch_lsx_vslli_h:
3999 case Intrinsic::loongarch_lasx_xvslli_h:
4000 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4001 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
4002 case Intrinsic::loongarch_lsx_vslli_w:
4003 case Intrinsic::loongarch_lasx_xvslli_w:
4004 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4005 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
4006 case Intrinsic::loongarch_lsx_vslli_d:
4007 case Intrinsic::loongarch_lasx_xvslli_d:
4008 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4009 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
4010 case Intrinsic::loongarch_lsx_vsrl_b:
4011 case Intrinsic::loongarch_lsx_vsrl_h:
4012 case Intrinsic::loongarch_lsx_vsrl_w:
4013 case Intrinsic::loongarch_lsx_vsrl_d:
4014 case Intrinsic::loongarch_lasx_xvsrl_b:
4015 case Intrinsic::loongarch_lasx_xvsrl_h:
4016 case Intrinsic::loongarch_lasx_xvsrl_w:
4017 case Intrinsic::loongarch_lasx_xvsrl_d:
4018 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4019 N2: truncateVecElts(Node: N, DAG));
4020 case Intrinsic::loongarch_lsx_vsrli_b:
4021 case Intrinsic::loongarch_lasx_xvsrli_b:
4022 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4023 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
4024 case Intrinsic::loongarch_lsx_vsrli_h:
4025 case Intrinsic::loongarch_lasx_xvsrli_h:
4026 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4027 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
4028 case Intrinsic::loongarch_lsx_vsrli_w:
4029 case Intrinsic::loongarch_lasx_xvsrli_w:
4030 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4031 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
4032 case Intrinsic::loongarch_lsx_vsrli_d:
4033 case Intrinsic::loongarch_lasx_xvsrli_d:
4034 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4035 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
4036 case Intrinsic::loongarch_lsx_vsra_b:
4037 case Intrinsic::loongarch_lsx_vsra_h:
4038 case Intrinsic::loongarch_lsx_vsra_w:
4039 case Intrinsic::loongarch_lsx_vsra_d:
4040 case Intrinsic::loongarch_lasx_xvsra_b:
4041 case Intrinsic::loongarch_lasx_xvsra_h:
4042 case Intrinsic::loongarch_lasx_xvsra_w:
4043 case Intrinsic::loongarch_lasx_xvsra_d:
4044 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4045 N2: truncateVecElts(Node: N, DAG));
4046 case Intrinsic::loongarch_lsx_vsrai_b:
4047 case Intrinsic::loongarch_lasx_xvsrai_b:
4048 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4049 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
4050 case Intrinsic::loongarch_lsx_vsrai_h:
4051 case Intrinsic::loongarch_lasx_xvsrai_h:
4052 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4053 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
4054 case Intrinsic::loongarch_lsx_vsrai_w:
4055 case Intrinsic::loongarch_lasx_xvsrai_w:
4056 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4057 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
4058 case Intrinsic::loongarch_lsx_vsrai_d:
4059 case Intrinsic::loongarch_lasx_xvsrai_d:
4060 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4061 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
4062 case Intrinsic::loongarch_lsx_vclz_b:
4063 case Intrinsic::loongarch_lsx_vclz_h:
4064 case Intrinsic::loongarch_lsx_vclz_w:
4065 case Intrinsic::loongarch_lsx_vclz_d:
4066 case Intrinsic::loongarch_lasx_xvclz_b:
4067 case Intrinsic::loongarch_lasx_xvclz_h:
4068 case Intrinsic::loongarch_lasx_xvclz_w:
4069 case Intrinsic::loongarch_lasx_xvclz_d:
4070 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
4071 case Intrinsic::loongarch_lsx_vpcnt_b:
4072 case Intrinsic::loongarch_lsx_vpcnt_h:
4073 case Intrinsic::loongarch_lsx_vpcnt_w:
4074 case Intrinsic::loongarch_lsx_vpcnt_d:
4075 case Intrinsic::loongarch_lasx_xvpcnt_b:
4076 case Intrinsic::loongarch_lasx_xvpcnt_h:
4077 case Intrinsic::loongarch_lasx_xvpcnt_w:
4078 case Intrinsic::loongarch_lasx_xvpcnt_d:
4079 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
4080 case Intrinsic::loongarch_lsx_vbitclr_b:
4081 case Intrinsic::loongarch_lsx_vbitclr_h:
4082 case Intrinsic::loongarch_lsx_vbitclr_w:
4083 case Intrinsic::loongarch_lsx_vbitclr_d:
4084 case Intrinsic::loongarch_lasx_xvbitclr_b:
4085 case Intrinsic::loongarch_lasx_xvbitclr_h:
4086 case Intrinsic::loongarch_lasx_xvbitclr_w:
4087 case Intrinsic::loongarch_lasx_xvbitclr_d:
4088 return lowerVectorBitClear(Node: N, DAG);
4089 case Intrinsic::loongarch_lsx_vbitclri_b:
4090 case Intrinsic::loongarch_lasx_xvbitclri_b:
4091 return lowerVectorBitClearImm<3>(Node: N, DAG);
4092 case Intrinsic::loongarch_lsx_vbitclri_h:
4093 case Intrinsic::loongarch_lasx_xvbitclri_h:
4094 return lowerVectorBitClearImm<4>(Node: N, DAG);
4095 case Intrinsic::loongarch_lsx_vbitclri_w:
4096 case Intrinsic::loongarch_lasx_xvbitclri_w:
4097 return lowerVectorBitClearImm<5>(Node: N, DAG);
4098 case Intrinsic::loongarch_lsx_vbitclri_d:
4099 case Intrinsic::loongarch_lasx_xvbitclri_d:
4100 return lowerVectorBitClearImm<6>(Node: N, DAG);
4101 case Intrinsic::loongarch_lsx_vbitset_b:
4102 case Intrinsic::loongarch_lsx_vbitset_h:
4103 case Intrinsic::loongarch_lsx_vbitset_w:
4104 case Intrinsic::loongarch_lsx_vbitset_d:
4105 case Intrinsic::loongarch_lasx_xvbitset_b:
4106 case Intrinsic::loongarch_lasx_xvbitset_h:
4107 case Intrinsic::loongarch_lasx_xvbitset_w:
4108 case Intrinsic::loongarch_lasx_xvbitset_d: {
4109 EVT VecTy = N->getValueType(ResNo: 0);
4110 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
4111 return DAG.getNode(
4112 Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
4113 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
4114 }
4115 case Intrinsic::loongarch_lsx_vbitseti_b:
4116 case Intrinsic::loongarch_lasx_xvbitseti_b:
4117 return lowerVectorBitSetImm<3>(Node: N, DAG);
4118 case Intrinsic::loongarch_lsx_vbitseti_h:
4119 case Intrinsic::loongarch_lasx_xvbitseti_h:
4120 return lowerVectorBitSetImm<4>(Node: N, DAG);
4121 case Intrinsic::loongarch_lsx_vbitseti_w:
4122 case Intrinsic::loongarch_lasx_xvbitseti_w:
4123 return lowerVectorBitSetImm<5>(Node: N, DAG);
4124 case Intrinsic::loongarch_lsx_vbitseti_d:
4125 case Intrinsic::loongarch_lasx_xvbitseti_d:
4126 return lowerVectorBitSetImm<6>(Node: N, DAG);
4127 case Intrinsic::loongarch_lsx_vbitrev_b:
4128 case Intrinsic::loongarch_lsx_vbitrev_h:
4129 case Intrinsic::loongarch_lsx_vbitrev_w:
4130 case Intrinsic::loongarch_lsx_vbitrev_d:
4131 case Intrinsic::loongarch_lasx_xvbitrev_b:
4132 case Intrinsic::loongarch_lasx_xvbitrev_h:
4133 case Intrinsic::loongarch_lasx_xvbitrev_w:
4134 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4135 EVT VecTy = N->getValueType(ResNo: 0);
4136 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
4137 return DAG.getNode(
4138 Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
4139 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
4140 }
4141 case Intrinsic::loongarch_lsx_vbitrevi_b:
4142 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4143 return lowerVectorBitRevImm<3>(Node: N, DAG);
4144 case Intrinsic::loongarch_lsx_vbitrevi_h:
4145 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4146 return lowerVectorBitRevImm<4>(Node: N, DAG);
4147 case Intrinsic::loongarch_lsx_vbitrevi_w:
4148 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4149 return lowerVectorBitRevImm<5>(Node: N, DAG);
4150 case Intrinsic::loongarch_lsx_vbitrevi_d:
4151 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4152 return lowerVectorBitRevImm<6>(Node: N, DAG);
4153 case Intrinsic::loongarch_lsx_vfadd_s:
4154 case Intrinsic::loongarch_lsx_vfadd_d:
4155 case Intrinsic::loongarch_lasx_xvfadd_s:
4156 case Intrinsic::loongarch_lasx_xvfadd_d:
4157 return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4158 N2: N->getOperand(Num: 2));
4159 case Intrinsic::loongarch_lsx_vfsub_s:
4160 case Intrinsic::loongarch_lsx_vfsub_d:
4161 case Intrinsic::loongarch_lasx_xvfsub_s:
4162 case Intrinsic::loongarch_lasx_xvfsub_d:
4163 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4164 N2: N->getOperand(Num: 2));
4165 case Intrinsic::loongarch_lsx_vfmul_s:
4166 case Intrinsic::loongarch_lsx_vfmul_d:
4167 case Intrinsic::loongarch_lasx_xvfmul_s:
4168 case Intrinsic::loongarch_lasx_xvfmul_d:
4169 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4170 N2: N->getOperand(Num: 2));
4171 case Intrinsic::loongarch_lsx_vfdiv_s:
4172 case Intrinsic::loongarch_lsx_vfdiv_d:
4173 case Intrinsic::loongarch_lasx_xvfdiv_s:
4174 case Intrinsic::loongarch_lasx_xvfdiv_d:
4175 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4176 N2: N->getOperand(Num: 2));
4177 case Intrinsic::loongarch_lsx_vfmadd_s:
4178 case Intrinsic::loongarch_lsx_vfmadd_d:
4179 case Intrinsic::loongarch_lasx_xvfmadd_s:
4180 case Intrinsic::loongarch_lasx_xvfmadd_d:
4181 return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
4182 N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3));
4183 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4184 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
4185 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
4186 N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget));
4187 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4188 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4189 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
4190 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
4191 N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget));
4192 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4193 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4194 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
4195 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
4196 N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget));
4197 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4198 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
4199 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
4200 N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget));
4201 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4202 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4203 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4204 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4205 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4206 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4207 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4208 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4209 EVT ResTy = N->getValueType(ResNo: 0);
4210 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(Num: 1));
4211 return DAG.getBuildVector(VT: ResTy, DL, Ops);
4212 }
4213 case Intrinsic::loongarch_lsx_vreplve_b:
4214 case Intrinsic::loongarch_lsx_vreplve_h:
4215 case Intrinsic::loongarch_lsx_vreplve_w:
4216 case Intrinsic::loongarch_lsx_vreplve_d:
4217 case Intrinsic::loongarch_lasx_xvreplve_b:
4218 case Intrinsic::loongarch_lasx_xvreplve_h:
4219 case Intrinsic::loongarch_lasx_xvreplve_w:
4220 case Intrinsic::loongarch_lasx_xvreplve_d:
4221 return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0),
4222 N1: N->getOperand(Num: 1),
4223 N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
4224 Operand: N->getOperand(Num: 2)));
4225 }
4226 return SDValue();
4227}
4228
4229SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4230 DAGCombinerInfo &DCI) const {
4231 SelectionDAG &DAG = DCI.DAG;
4232 switch (N->getOpcode()) {
4233 default:
4234 break;
4235 case ISD::AND:
4236 return performANDCombine(N, DAG, DCI, Subtarget);
4237 case ISD::OR:
4238 return performORCombine(N, DAG, DCI, Subtarget);
4239 case ISD::SETCC:
4240 return performSETCCCombine(N, DAG, DCI, Subtarget);
4241 case ISD::SRL:
4242 return performSRLCombine(N, DAG, DCI, Subtarget);
4243 case LoongArchISD::BITREV_W:
4244 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4245 case ISD::INTRINSIC_WO_CHAIN:
4246 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4247 }
4248 return SDValue();
4249}
4250
4251static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4252 MachineBasicBlock *MBB) {
4253 if (!ZeroDivCheck)
4254 return MBB;
4255
4256 // Build instructions:
4257 // MBB:
4258 // div(or mod) $dst, $dividend, $divisor
4259 // bnez $divisor, SinkMBB
4260 // BreakMBB:
4261 // break 7 // BRK_DIVZERO
4262 // SinkMBB:
4263 // fallthrough
4264 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4265 MachineFunction::iterator It = ++MBB->getIterator();
4266 MachineFunction *MF = MBB->getParent();
4267 auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
4268 auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
4269 MF->insert(MBBI: It, MBB: BreakMBB);
4270 MF->insert(MBBI: It, MBB: SinkMBB);
4271
4272 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4273 SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
4274 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
4275
4276 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4277 DebugLoc DL = MI.getDebugLoc();
4278 MachineOperand &Divisor = MI.getOperand(i: 2);
4279 Register DivisorReg = Divisor.getReg();
4280
4281 // MBB:
4282 BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNEZ))
4283 .addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill()))
4284 .addMBB(MBB: SinkMBB);
4285 MBB->addSuccessor(Succ: BreakMBB);
4286 MBB->addSuccessor(Succ: SinkMBB);
4287
4288 // BreakMBB:
4289 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4290 // definition of BRK_DIVZERO.
4291 BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/);
4292 BreakMBB->addSuccessor(Succ: SinkMBB);
4293
4294 // Clear Divisor's kill flag.
4295 Divisor.setIsKill(false);
4296
4297 return SinkMBB;
4298}
4299
4300static MachineBasicBlock *
4301emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4302 const LoongArchSubtarget &Subtarget) {
4303 unsigned CondOpc;
4304 switch (MI.getOpcode()) {
4305 default:
4306 llvm_unreachable("Unexpected opcode");
4307 case LoongArch::PseudoVBZ:
4308 CondOpc = LoongArch::VSETEQZ_V;
4309 break;
4310 case LoongArch::PseudoVBZ_B:
4311 CondOpc = LoongArch::VSETANYEQZ_B;
4312 break;
4313 case LoongArch::PseudoVBZ_H:
4314 CondOpc = LoongArch::VSETANYEQZ_H;
4315 break;
4316 case LoongArch::PseudoVBZ_W:
4317 CondOpc = LoongArch::VSETANYEQZ_W;
4318 break;
4319 case LoongArch::PseudoVBZ_D:
4320 CondOpc = LoongArch::VSETANYEQZ_D;
4321 break;
4322 case LoongArch::PseudoVBNZ:
4323 CondOpc = LoongArch::VSETNEZ_V;
4324 break;
4325 case LoongArch::PseudoVBNZ_B:
4326 CondOpc = LoongArch::VSETALLNEZ_B;
4327 break;
4328 case LoongArch::PseudoVBNZ_H:
4329 CondOpc = LoongArch::VSETALLNEZ_H;
4330 break;
4331 case LoongArch::PseudoVBNZ_W:
4332 CondOpc = LoongArch::VSETALLNEZ_W;
4333 break;
4334 case LoongArch::PseudoVBNZ_D:
4335 CondOpc = LoongArch::VSETALLNEZ_D;
4336 break;
4337 case LoongArch::PseudoXVBZ:
4338 CondOpc = LoongArch::XVSETEQZ_V;
4339 break;
4340 case LoongArch::PseudoXVBZ_B:
4341 CondOpc = LoongArch::XVSETANYEQZ_B;
4342 break;
4343 case LoongArch::PseudoXVBZ_H:
4344 CondOpc = LoongArch::XVSETANYEQZ_H;
4345 break;
4346 case LoongArch::PseudoXVBZ_W:
4347 CondOpc = LoongArch::XVSETANYEQZ_W;
4348 break;
4349 case LoongArch::PseudoXVBZ_D:
4350 CondOpc = LoongArch::XVSETANYEQZ_D;
4351 break;
4352 case LoongArch::PseudoXVBNZ:
4353 CondOpc = LoongArch::XVSETNEZ_V;
4354 break;
4355 case LoongArch::PseudoXVBNZ_B:
4356 CondOpc = LoongArch::XVSETALLNEZ_B;
4357 break;
4358 case LoongArch::PseudoXVBNZ_H:
4359 CondOpc = LoongArch::XVSETALLNEZ_H;
4360 break;
4361 case LoongArch::PseudoXVBNZ_W:
4362 CondOpc = LoongArch::XVSETALLNEZ_W;
4363 break;
4364 case LoongArch::PseudoXVBNZ_D:
4365 CondOpc = LoongArch::XVSETALLNEZ_D;
4366 break;
4367 }
4368
4369 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4370 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4371 DebugLoc DL = MI.getDebugLoc();
4372 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4373 MachineFunction::iterator It = ++BB->getIterator();
4374
4375 MachineFunction *F = BB->getParent();
4376 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
4377 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
4378 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
4379
4380 F->insert(MBBI: It, MBB: FalseBB);
4381 F->insert(MBBI: It, MBB: TrueBB);
4382 F->insert(MBBI: It, MBB: SinkBB);
4383
4384 // Transfer the remainder of MBB and its successor edges to Sink.
4385 SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
4386 SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
4387
4388 // Insert the real instruction to BB.
4389 Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
4390 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg());
4391
4392 // Insert branch.
4393 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
4394 BB->addSuccessor(Succ: FalseBB);
4395 BB->addSuccessor(Succ: TrueBB);
4396
4397 // FalseBB.
4398 Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
4399 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
4400 .addReg(RegNo: LoongArch::R0)
4401 .addImm(Val: 0);
4402 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
4403 FalseBB->addSuccessor(Succ: SinkBB);
4404
4405 // TrueBB.
4406 Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
4407 BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
4408 .addReg(RegNo: LoongArch::R0)
4409 .addImm(Val: 1);
4410 TrueBB->addSuccessor(Succ: SinkBB);
4411
4412 // SinkBB: merge the results.
4413 BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
4414 DestReg: MI.getOperand(i: 0).getReg())
4415 .addReg(RegNo: RD1)
4416 .addMBB(MBB: FalseBB)
4417 .addReg(RegNo: RD2)
4418 .addMBB(MBB: TrueBB);
4419
4420 // The pseudo instruction is gone now.
4421 MI.eraseFromParent();
4422 return SinkBB;
4423}
4424
4425static MachineBasicBlock *
4426emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4427 const LoongArchSubtarget &Subtarget) {
4428 unsigned InsOp;
4429 unsigned HalfSize;
4430 switch (MI.getOpcode()) {
4431 default:
4432 llvm_unreachable("Unexpected opcode");
4433 case LoongArch::PseudoXVINSGR2VR_B:
4434 HalfSize = 16;
4435 InsOp = LoongArch::VINSGR2VR_B;
4436 break;
4437 case LoongArch::PseudoXVINSGR2VR_H:
4438 HalfSize = 8;
4439 InsOp = LoongArch::VINSGR2VR_H;
4440 break;
4441 }
4442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4443 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4444 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4445 DebugLoc DL = MI.getDebugLoc();
4446 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4447 // XDst = vector_insert XSrc, Elt, Idx
4448 Register XDst = MI.getOperand(i: 0).getReg();
4449 Register XSrc = MI.getOperand(i: 1).getReg();
4450 Register Elt = MI.getOperand(i: 2).getReg();
4451 unsigned Idx = MI.getOperand(i: 3).getImm();
4452
4453 Register ScratchReg1 = XSrc;
4454 if (Idx >= HalfSize) {
4455 ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
4456 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1)
4457 .addReg(RegNo: XSrc)
4458 .addReg(RegNo: XSrc)
4459 .addImm(Val: 1);
4460 }
4461
4462 Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
4463 Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
4464 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
4465 .addReg(RegNo: ScratchReg1, flags: 0, SubReg: LoongArch::sub_128);
4466 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2)
4467 .addReg(RegNo: ScratchSubReg1)
4468 .addReg(RegNo: Elt)
4469 .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx);
4470
4471 Register ScratchReg2 = XDst;
4472 if (Idx >= HalfSize)
4473 ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
4474
4475 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2)
4476 .addImm(Val: 0)
4477 .addReg(RegNo: ScratchSubReg2)
4478 .addImm(Val: LoongArch::sub_128);
4479
4480 if (Idx >= HalfSize)
4481 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst)
4482 .addReg(RegNo: XSrc)
4483 .addReg(RegNo: ScratchReg2)
4484 .addImm(Val: 2);
4485
4486 MI.eraseFromParent();
4487 return BB;
4488}
4489
4490MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4491 MachineInstr &MI, MachineBasicBlock *BB) const {
4492 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4493 DebugLoc DL = MI.getDebugLoc();
4494
4495 switch (MI.getOpcode()) {
4496 default:
4497 llvm_unreachable("Unexpected instr type to insert");
4498 case LoongArch::DIV_W:
4499 case LoongArch::DIV_WU:
4500 case LoongArch::MOD_W:
4501 case LoongArch::MOD_WU:
4502 case LoongArch::DIV_D:
4503 case LoongArch::DIV_DU:
4504 case LoongArch::MOD_D:
4505 case LoongArch::MOD_DU:
4506 return insertDivByZeroTrap(MI, MBB: BB);
4507 break;
4508 case LoongArch::WRFCSR: {
4509 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
4510 DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm())
4511 .addReg(RegNo: MI.getOperand(i: 1).getReg());
4512 MI.eraseFromParent();
4513 return BB;
4514 }
4515 case LoongArch::RDFCSR: {
4516 MachineInstr *ReadFCSR =
4517 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
4518 DestReg: MI.getOperand(i: 0).getReg())
4519 .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm());
4520 ReadFCSR->getOperand(i: 1).setIsUndef();
4521 MI.eraseFromParent();
4522 return BB;
4523 }
4524 case LoongArch::PseudoVBZ:
4525 case LoongArch::PseudoVBZ_B:
4526 case LoongArch::PseudoVBZ_H:
4527 case LoongArch::PseudoVBZ_W:
4528 case LoongArch::PseudoVBZ_D:
4529 case LoongArch::PseudoVBNZ:
4530 case LoongArch::PseudoVBNZ_B:
4531 case LoongArch::PseudoVBNZ_H:
4532 case LoongArch::PseudoVBNZ_W:
4533 case LoongArch::PseudoVBNZ_D:
4534 case LoongArch::PseudoXVBZ:
4535 case LoongArch::PseudoXVBZ_B:
4536 case LoongArch::PseudoXVBZ_H:
4537 case LoongArch::PseudoXVBZ_W:
4538 case LoongArch::PseudoXVBZ_D:
4539 case LoongArch::PseudoXVBNZ:
4540 case LoongArch::PseudoXVBNZ_B:
4541 case LoongArch::PseudoXVBNZ_H:
4542 case LoongArch::PseudoXVBNZ_W:
4543 case LoongArch::PseudoXVBNZ_D:
4544 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4545 case LoongArch::PseudoXVINSGR2VR_B:
4546 case LoongArch::PseudoXVINSGR2VR_H:
4547 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4548 }
4549}
4550
4551bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4552 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4553 unsigned *Fast) const {
4554 if (!Subtarget.hasUAL())
4555 return false;
4556
4557 // TODO: set reasonable speed number.
4558 if (Fast)
4559 *Fast = 1;
4560 return true;
4561}
4562
4563const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4564 switch ((LoongArchISD::NodeType)Opcode) {
4565 case LoongArchISD::FIRST_NUMBER:
4566 break;
4567
4568#define NODE_NAME_CASE(node) \
4569 case LoongArchISD::node: \
4570 return "LoongArchISD::" #node;
4571
4572 // TODO: Add more target-dependent nodes later.
4573 NODE_NAME_CASE(CALL)
4574 NODE_NAME_CASE(CALL_MEDIUM)
4575 NODE_NAME_CASE(CALL_LARGE)
4576 NODE_NAME_CASE(RET)
4577 NODE_NAME_CASE(TAIL)
4578 NODE_NAME_CASE(TAIL_MEDIUM)
4579 NODE_NAME_CASE(TAIL_LARGE)
4580 NODE_NAME_CASE(SLL_W)
4581 NODE_NAME_CASE(SRA_W)
4582 NODE_NAME_CASE(SRL_W)
4583 NODE_NAME_CASE(BSTRINS)
4584 NODE_NAME_CASE(BSTRPICK)
4585 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4586 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4587 NODE_NAME_CASE(FTINT)
4588 NODE_NAME_CASE(REVB_2H)
4589 NODE_NAME_CASE(REVB_2W)
4590 NODE_NAME_CASE(BITREV_4B)
4591 NODE_NAME_CASE(BITREV_W)
4592 NODE_NAME_CASE(ROTR_W)
4593 NODE_NAME_CASE(ROTL_W)
4594 NODE_NAME_CASE(DIV_WU)
4595 NODE_NAME_CASE(MOD_WU)
4596 NODE_NAME_CASE(CLZ_W)
4597 NODE_NAME_CASE(CTZ_W)
4598 NODE_NAME_CASE(DBAR)
4599 NODE_NAME_CASE(IBAR)
4600 NODE_NAME_CASE(BREAK)
4601 NODE_NAME_CASE(SYSCALL)
4602 NODE_NAME_CASE(CRC_W_B_W)
4603 NODE_NAME_CASE(CRC_W_H_W)
4604 NODE_NAME_CASE(CRC_W_W_W)
4605 NODE_NAME_CASE(CRC_W_D_W)
4606 NODE_NAME_CASE(CRCC_W_B_W)
4607 NODE_NAME_CASE(CRCC_W_H_W)
4608 NODE_NAME_CASE(CRCC_W_W_W)
4609 NODE_NAME_CASE(CRCC_W_D_W)
4610 NODE_NAME_CASE(CSRRD)
4611 NODE_NAME_CASE(CSRWR)
4612 NODE_NAME_CASE(CSRXCHG)
4613 NODE_NAME_CASE(IOCSRRD_B)
4614 NODE_NAME_CASE(IOCSRRD_H)
4615 NODE_NAME_CASE(IOCSRRD_W)
4616 NODE_NAME_CASE(IOCSRRD_D)
4617 NODE_NAME_CASE(IOCSRWR_B)
4618 NODE_NAME_CASE(IOCSRWR_H)
4619 NODE_NAME_CASE(IOCSRWR_W)
4620 NODE_NAME_CASE(IOCSRWR_D)
4621 NODE_NAME_CASE(CPUCFG)
4622 NODE_NAME_CASE(MOVGR2FCSR)
4623 NODE_NAME_CASE(MOVFCSR2GR)
4624 NODE_NAME_CASE(CACOP_D)
4625 NODE_NAME_CASE(CACOP_W)
4626 NODE_NAME_CASE(VSHUF)
4627 NODE_NAME_CASE(VPICKEV)
4628 NODE_NAME_CASE(VPICKOD)
4629 NODE_NAME_CASE(VPACKEV)
4630 NODE_NAME_CASE(VPACKOD)
4631 NODE_NAME_CASE(VILVL)
4632 NODE_NAME_CASE(VILVH)
4633 NODE_NAME_CASE(VSHUF4I)
4634 NODE_NAME_CASE(VREPLVEI)
4635 NODE_NAME_CASE(XVPERMI)
4636 NODE_NAME_CASE(VPICK_SEXT_ELT)
4637 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4638 NODE_NAME_CASE(VREPLVE)
4639 NODE_NAME_CASE(VALL_ZERO)
4640 NODE_NAME_CASE(VANY_ZERO)
4641 NODE_NAME_CASE(VALL_NONZERO)
4642 NODE_NAME_CASE(VANY_NONZERO)
4643 }
4644#undef NODE_NAME_CASE
4645 return nullptr;
4646}
4647
4648//===----------------------------------------------------------------------===//
4649// Calling Convention Implementation
4650//===----------------------------------------------------------------------===//
4651
4652// Eight general-purpose registers a0-a7 used for passing integer arguments,
4653// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4654// fixed-point arguments, and floating-point arguments when no FPR is available
4655// or with soft float ABI.
4656const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4657 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4658 LoongArch::R10, LoongArch::R11};
4659// Eight floating-point registers fa0-fa7 used for passing floating-point
4660// arguments, and fa0-fa1 are also used to return values.
4661const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4662 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4663 LoongArch::F6, LoongArch::F7};
4664// FPR32 and FPR64 alias each other.
4665const MCPhysReg ArgFPR64s[] = {
4666 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4667 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4668
4669const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4670 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4671 LoongArch::VR6, LoongArch::VR7};
4672
4673const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4674 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4675 LoongArch::XR6, LoongArch::XR7};
4676
4677// Pass a 2*GRLen argument that has been split into two GRLen values through
4678// registers or the stack as necessary.
4679static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4680 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4681 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4682 ISD::ArgFlagsTy ArgFlags2) {
4683 unsigned GRLenInBytes = GRLen / 8;
4684 if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
4685 // At least one half can be passed via register.
4686 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg,
4687 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
4688 } else {
4689 // Both halves must be passed on the stack, with proper alignment.
4690 Align StackAlign =
4691 std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
4692 State.addLoc(
4693 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
4694 Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
4695 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
4696 State.addLoc(V: CCValAssign::getMem(
4697 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
4698 LocVT: LocVT2, HTP: CCValAssign::Full));
4699 return false;
4700 }
4701 if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
4702 // The second half can also be passed via register.
4703 State.addLoc(
4704 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
4705 } else {
4706 // The second half is passed via the stack, without additional alignment.
4707 State.addLoc(V: CCValAssign::getMem(
4708 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
4709 LocVT: LocVT2, HTP: CCValAssign::Full));
4710 }
4711 return false;
4712}
4713
4714// Implements the LoongArch calling convention. Returns true upon failure.
4715static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4716 unsigned ValNo, MVT ValVT,
4717 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4718 CCState &State, bool IsFixed, bool IsRet,
4719 Type *OrigTy) {
4720 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4721 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4722 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4723 MVT LocVT = ValVT;
4724
4725 // Any return value split into more than two values can't be returned
4726 // directly.
4727 if (IsRet && ValNo > 1)
4728 return true;
4729
4730 // If passing a variadic argument, or if no FPR is available.
4731 bool UseGPRForFloat = true;
4732
4733 switch (ABI) {
4734 default:
4735 llvm_unreachable("Unexpected ABI");
4736 break;
4737 case LoongArchABI::ABI_ILP32F:
4738 case LoongArchABI::ABI_LP64F:
4739 case LoongArchABI::ABI_ILP32D:
4740 case LoongArchABI::ABI_LP64D:
4741 UseGPRForFloat = !IsFixed;
4742 break;
4743 case LoongArchABI::ABI_ILP32S:
4744 case LoongArchABI::ABI_LP64S:
4745 break;
4746 }
4747
4748 // FPR32 and FPR64 alias each other.
4749 if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
4750 UseGPRForFloat = true;
4751
4752 if (UseGPRForFloat && ValVT == MVT::f32) {
4753 LocVT = GRLenVT;
4754 LocInfo = CCValAssign::BCvt;
4755 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4756 LocVT = MVT::i64;
4757 LocInfo = CCValAssign::BCvt;
4758 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4759 // TODO: Handle passing f64 on LA32 with D feature.
4760 report_fatal_error(reason: "Passing f64 with GPR on LA32 is undefined");
4761 }
4762
4763 // If this is a variadic argument, the LoongArch calling convention requires
4764 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4765 // byte alignment. An aligned register should be used regardless of whether
4766 // the original argument was split during legalisation or not. The argument
4767 // will not be passed by registers if the original type is larger than
4768 // 2*GRLen, so the register alignment rule does not apply.
4769 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4770 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4771 DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
4772 unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
4773 // Skip 'odd' register if necessary.
4774 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4775 State.AllocateReg(Regs: ArgGPRs);
4776 }
4777
4778 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4779 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4780 State.getPendingArgFlags();
4781
4782 assert(PendingLocs.size() == PendingArgFlags.size() &&
4783 "PendingLocs and PendingArgFlags out of sync");
4784
4785 // Split arguments might be passed indirectly, so keep track of the pending
4786 // values.
4787 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4788 LocVT = GRLenVT;
4789 LocInfo = CCValAssign::Indirect;
4790 PendingLocs.push_back(
4791 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
4792 PendingArgFlags.push_back(Elt: ArgFlags);
4793 if (!ArgFlags.isSplitEnd()) {
4794 return false;
4795 }
4796 }
4797
4798 // If the split argument only had two elements, it should be passed directly
4799 // in registers or on the stack.
4800 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4801 PendingLocs.size() <= 2) {
4802 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4803 // Apply the normal calling convention rules to the first half of the
4804 // split argument.
4805 CCValAssign VA = PendingLocs[0];
4806 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4807 PendingLocs.clear();
4808 PendingArgFlags.clear();
4809 return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
4810 ArgFlags2: ArgFlags);
4811 }
4812
4813 // Allocate to a register if possible, or else a stack slot.
4814 Register Reg;
4815 unsigned StoreSizeBytes = GRLen / 8;
4816 Align StackAlign = Align(GRLen / 8);
4817
4818 if (ValVT == MVT::f32 && !UseGPRForFloat)
4819 Reg = State.AllocateReg(Regs: ArgFPR32s);
4820 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4821 Reg = State.AllocateReg(Regs: ArgFPR64s);
4822 else if (ValVT.is128BitVector())
4823 Reg = State.AllocateReg(Regs: ArgVRs);
4824 else if (ValVT.is256BitVector())
4825 Reg = State.AllocateReg(Regs: ArgXRs);
4826 else
4827 Reg = State.AllocateReg(Regs: ArgGPRs);
4828
4829 unsigned StackOffset =
4830 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
4831
4832 // If we reach this point and PendingLocs is non-empty, we must be at the
4833 // end of a split argument that must be passed indirectly.
4834 if (!PendingLocs.empty()) {
4835 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4836 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4837 for (auto &It : PendingLocs) {
4838 if (Reg)
4839 It.convertToReg(RegNo: Reg);
4840 else
4841 It.convertToMem(Offset: StackOffset);
4842 State.addLoc(V: It);
4843 }
4844 PendingLocs.clear();
4845 PendingArgFlags.clear();
4846 return false;
4847 }
4848 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4849 "Expected an GRLenVT at this stage");
4850
4851 if (Reg) {
4852 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
4853 return false;
4854 }
4855
4856 // When a floating-point value is passed on the stack, no bit-cast is needed.
4857 if (ValVT.isFloatingPoint()) {
4858 LocVT = ValVT;
4859 LocInfo = CCValAssign::Full;
4860 }
4861
4862 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
4863 return false;
4864}
4865
4866void LoongArchTargetLowering::analyzeInputArgs(
4867 MachineFunction &MF, CCState &CCInfo,
4868 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4869 LoongArchCCAssignFn Fn) const {
4870 FunctionType *FType = MF.getFunction().getFunctionType();
4871 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4872 MVT ArgVT = Ins[i].VT;
4873 Type *ArgTy = nullptr;
4874 if (IsRet)
4875 ArgTy = FType->getReturnType();
4876 else if (Ins[i].isOrigArg())
4877 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
4878 LoongArchABI::ABI ABI =
4879 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4880 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4881 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4882 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4883 << '\n');
4884 llvm_unreachable("");
4885 }
4886 }
4887}
4888
4889void LoongArchTargetLowering::analyzeOutputArgs(
4890 MachineFunction &MF, CCState &CCInfo,
4891 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4892 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4893 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4894 MVT ArgVT = Outs[i].VT;
4895 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4896 LoongArchABI::ABI ABI =
4897 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4898 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4899 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4900 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4901 << "\n");
4902 llvm_unreachable("");
4903 }
4904 }
4905}
4906
4907// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4908// values.
4909static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
4910 const CCValAssign &VA, const SDLoc &DL) {
4911 switch (VA.getLocInfo()) {
4912 default:
4913 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4914 case CCValAssign::Full:
4915 case CCValAssign::Indirect:
4916 break;
4917 case CCValAssign::BCvt:
4918 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4919 Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
4920 else
4921 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
4922 break;
4923 }
4924 return Val;
4925}
4926
4927static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
4928 const CCValAssign &VA, const SDLoc &DL,
4929 const ISD::InputArg &In,
4930 const LoongArchTargetLowering &TLI) {
4931 MachineFunction &MF = DAG.getMachineFunction();
4932 MachineRegisterInfo &RegInfo = MF.getRegInfo();
4933 EVT LocVT = VA.getLocVT();
4934 SDValue Val;
4935 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
4936 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
4937 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
4938 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
4939
4940 // If input is sign extended from 32 bits, note it for the OptW pass.
4941 if (In.isOrigArg()) {
4942 Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
4943 if (OrigArg->getType()->isIntegerTy()) {
4944 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4945 // An input zero extended from i31 can also be considered sign extended.
4946 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4947 (BitWidth < 32 && In.Flags.isZExt())) {
4948 LoongArchMachineFunctionInfo *LAFI =
4949 MF.getInfo<LoongArchMachineFunctionInfo>();
4950 LAFI->addSExt32Register(Reg: VReg);
4951 }
4952 }
4953 }
4954
4955 return convertLocVTToValVT(DAG, Val, VA, DL);
4956}
4957
4958// The caller is responsible for loading the full value if the argument is
4959// passed with CCValAssign::Indirect.
4960static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
4961 const CCValAssign &VA, const SDLoc &DL) {
4962 MachineFunction &MF = DAG.getMachineFunction();
4963 MachineFrameInfo &MFI = MF.getFrameInfo();
4964 EVT ValVT = VA.getValVT();
4965 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
4966 /*IsImmutable=*/true);
4967 SDValue FIN = DAG.getFrameIndex(
4968 FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)));
4969
4970 ISD::LoadExtType ExtType;
4971 switch (VA.getLocInfo()) {
4972 default:
4973 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4974 case CCValAssign::Full:
4975 case CCValAssign::Indirect:
4976 case CCValAssign::BCvt:
4977 ExtType = ISD::NON_EXTLOAD;
4978 break;
4979 }
4980 return DAG.getExtLoad(
4981 ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
4982 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
4983}
4984
4985static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
4986 const CCValAssign &VA, const SDLoc &DL) {
4987 EVT LocVT = VA.getLocVT();
4988
4989 switch (VA.getLocInfo()) {
4990 default:
4991 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4992 case CCValAssign::Full:
4993 break;
4994 case CCValAssign::BCvt:
4995 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4996 Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
4997 else
4998 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
4999 break;
5000 }
5001 return Val;
5002}
5003
5004static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5005 CCValAssign::LocInfo LocInfo,
5006 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5007 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5008 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5009 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5010 static const MCPhysReg GPRList[] = {
5011 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5012 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5013 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5014 if (unsigned Reg = State.AllocateReg(Regs: GPRList)) {
5015 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
5016 return false;
5017 }
5018 }
5019
5020 if (LocVT == MVT::f32) {
5021 // Pass in STG registers: F1, F2, F3, F4
5022 // fs0,fs1,fs2,fs3
5023 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5024 LoongArch::F26, LoongArch::F27};
5025 if (unsigned Reg = State.AllocateReg(Regs: FPR32List)) {
5026 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
5027 return false;
5028 }
5029 }
5030
5031 if (LocVT == MVT::f64) {
5032 // Pass in STG registers: D1, D2, D3, D4
5033 // fs4,fs5,fs6,fs7
5034 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5035 LoongArch::F30_64, LoongArch::F31_64};
5036 if (unsigned Reg = State.AllocateReg(Regs: FPR64List)) {
5037 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo));
5038 return false;
5039 }
5040 }
5041
5042 report_fatal_error(reason: "No registers left in GHC calling convention");
5043 return true;
5044}
5045
5046// Transform physical registers into virtual registers.
5047SDValue LoongArchTargetLowering::LowerFormalArguments(
5048 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5049 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5050 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5051
5052 MachineFunction &MF = DAG.getMachineFunction();
5053
5054 switch (CallConv) {
5055 default:
5056 llvm_unreachable("Unsupported calling convention");
5057 case CallingConv::C:
5058 case CallingConv::Fast:
5059 break;
5060 case CallingConv::GHC:
5061 if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) ||
5062 !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
5063 report_fatal_error(
5064 reason: "GHC calling convention requires the F and D extensions");
5065 }
5066
5067 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
5068 MVT GRLenVT = Subtarget.getGRLenVT();
5069 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5070 // Used with varargs to acumulate store chains.
5071 std::vector<SDValue> OutChains;
5072
5073 // Assign locations to all of the incoming arguments.
5074 SmallVector<CCValAssign> ArgLocs;
5075 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5076
5077 if (CallConv == CallingConv::GHC)
5078 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
5079 else
5080 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch);
5081
5082 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5083 CCValAssign &VA = ArgLocs[i];
5084 SDValue ArgValue;
5085 if (VA.isRegLoc())
5086 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[i], TLI: *this);
5087 else
5088 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5089 if (VA.getLocInfo() == CCValAssign::Indirect) {
5090 // If the original argument was split and passed by reference, we need to
5091 // load all parts of it here (using the same address).
5092 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
5093 PtrInfo: MachinePointerInfo()));
5094 unsigned ArgIndex = Ins[i].OrigArgIndex;
5095 unsigned ArgPartOffset = Ins[i].PartOffset;
5096 assert(ArgPartOffset == 0);
5097 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5098 CCValAssign &PartVA = ArgLocs[i + 1];
5099 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5100 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
5101 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
5102 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
5103 PtrInfo: MachinePointerInfo()));
5104 ++i;
5105 }
5106 continue;
5107 }
5108 InVals.push_back(Elt: ArgValue);
5109 }
5110
5111 if (IsVarArg) {
5112 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5113 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
5114 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5115 MachineFrameInfo &MFI = MF.getFrameInfo();
5116 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5117 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5118
5119 // Offset of the first variable argument from stack pointer, and size of
5120 // the vararg save area. For now, the varargs save area is either zero or
5121 // large enough to hold a0-a7.
5122 int VaArgOffset, VarArgsSaveSize;
5123
5124 // If all registers are allocated, then all varargs must be passed on the
5125 // stack and we don't need to save any argregs.
5126 if (ArgRegs.size() == Idx) {
5127 VaArgOffset = CCInfo.getStackSize();
5128 VarArgsSaveSize = 0;
5129 } else {
5130 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5131 VaArgOffset = -VarArgsSaveSize;
5132 }
5133
5134 // Record the frame index of the first variable argument
5135 // which is a value necessary to VASTART.
5136 int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
5137 LoongArchFI->setVarArgsFrameIndex(FI);
5138
5139 // If saving an odd number of registers then create an extra stack slot to
5140 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5141 // offsets to even-numbered registered remain 2*GRLen-aligned.
5142 if (Idx % 2) {
5143 MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
5144 IsImmutable: true);
5145 VarArgsSaveSize += GRLenInBytes;
5146 }
5147
5148 // Copy the integer registers that may have been used for passing varargs
5149 // to the vararg save area.
5150 for (unsigned I = Idx; I < ArgRegs.size();
5151 ++I, VaArgOffset += GRLenInBytes) {
5152 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
5153 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
5154 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
5155 FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
5156 SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
5157 SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
5158 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
5159 cast<StoreSDNode>(Val: Store.getNode())
5160 ->getMemOperand()
5161 ->setValue((Value *)nullptr);
5162 OutChains.push_back(x: Store);
5163 }
5164 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5165 }
5166
5167 // All stores are grouped in one node to allow the matching between
5168 // the size of Ins and InVals. This only happens for vararg functions.
5169 if (!OutChains.empty()) {
5170 OutChains.push_back(x: Chain);
5171 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
5172 }
5173
5174 return Chain;
5175}
5176
5177bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5178 return CI->isTailCall();
5179}
5180
5181// Check if the return value is used as only a return value, as otherwise
5182// we can't perform a tail-call.
5183bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5184 SDValue &Chain) const {
5185 if (N->getNumValues() != 1)
5186 return false;
5187 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
5188 return false;
5189
5190 SDNode *Copy = *N->use_begin();
5191 if (Copy->getOpcode() != ISD::CopyToReg)
5192 return false;
5193
5194 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5195 // isn't safe to perform a tail call.
5196 if (Copy->getGluedNode())
5197 return false;
5198
5199 // The copy must be used by a LoongArchISD::RET, and nothing else.
5200 bool HasRet = false;
5201 for (SDNode *Node : Copy->uses()) {
5202 if (Node->getOpcode() != LoongArchISD::RET)
5203 return false;
5204 HasRet = true;
5205 }
5206
5207 if (!HasRet)
5208 return false;
5209
5210 Chain = Copy->getOperand(Num: 0);
5211 return true;
5212}
5213
5214// Check whether the call is eligible for tail call optimization.
5215bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5216 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5217 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5218
5219 auto CalleeCC = CLI.CallConv;
5220 auto &Outs = CLI.Outs;
5221 auto &Caller = MF.getFunction();
5222 auto CallerCC = Caller.getCallingConv();
5223
5224 // Do not tail call opt if the stack is used to pass parameters.
5225 if (CCInfo.getStackSize() != 0)
5226 return false;
5227
5228 // Do not tail call opt if any parameters need to be passed indirectly.
5229 for (auto &VA : ArgLocs)
5230 if (VA.getLocInfo() == CCValAssign::Indirect)
5231 return false;
5232
5233 // Do not tail call opt if either caller or callee uses struct return
5234 // semantics.
5235 auto IsCallerStructRet = Caller.hasStructRetAttr();
5236 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5237 if (IsCallerStructRet || IsCalleeStructRet)
5238 return false;
5239
5240 // Do not tail call opt if either the callee or caller has a byval argument.
5241 for (auto &Arg : Outs)
5242 if (Arg.Flags.isByVal())
5243 return false;
5244
5245 // The callee has to preserve all registers the caller needs to preserve.
5246 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5247 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5248 if (CalleeCC != CallerCC) {
5249 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5250 if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
5251 return false;
5252 }
5253 return true;
5254}
5255
5256static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5257 return DAG.getDataLayout().getPrefTypeAlign(
5258 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
5259}
5260
5261// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5262// and output parameter nodes.
5263SDValue
5264LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5265 SmallVectorImpl<SDValue> &InVals) const {
5266 SelectionDAG &DAG = CLI.DAG;
5267 SDLoc &DL = CLI.DL;
5268 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5269 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5270 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5271 SDValue Chain = CLI.Chain;
5272 SDValue Callee = CLI.Callee;
5273 CallingConv::ID CallConv = CLI.CallConv;
5274 bool IsVarArg = CLI.IsVarArg;
5275 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
5276 MVT GRLenVT = Subtarget.getGRLenVT();
5277 bool &IsTailCall = CLI.IsTailCall;
5278
5279 MachineFunction &MF = DAG.getMachineFunction();
5280
5281 // Analyze the operands of the call, assigning locations to each operand.
5282 SmallVector<CCValAssign> ArgLocs;
5283 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5284
5285 if (CallConv == CallingConv::GHC)
5286 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
5287 else
5288 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch);
5289
5290 // Check if it's really possible to do a tail call.
5291 if (IsTailCall)
5292 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
5293
5294 if (IsTailCall)
5295 ++NumTailCalls;
5296 else if (CLI.CB && CLI.CB->isMustTailCall())
5297 report_fatal_error(reason: "failed to perform tail call elimination on a call "
5298 "site marked musttail");
5299
5300 // Get a count of how many bytes are to be pushed on the stack.
5301 unsigned NumBytes = ArgCCInfo.getStackSize();
5302
5303 // Create local copies for byval args.
5304 SmallVector<SDValue> ByValArgs;
5305 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5306 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5307 if (!Flags.isByVal())
5308 continue;
5309
5310 SDValue Arg = OutVals[i];
5311 unsigned Size = Flags.getByValSize();
5312 Align Alignment = Flags.getNonZeroByValAlign();
5313
5314 int FI =
5315 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
5316 SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
5317 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
5318
5319 Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
5320 /*IsVolatile=*/isVol: false,
5321 /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
5322 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
5323 ByValArgs.push_back(Elt: FIPtr);
5324 }
5325
5326 if (!IsTailCall)
5327 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
5328
5329 // Copy argument values to their designated locations.
5330 SmallVector<std::pair<Register, SDValue>> RegsToPass;
5331 SmallVector<SDValue> MemOpChains;
5332 SDValue StackPtr;
5333 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5334 CCValAssign &VA = ArgLocs[i];
5335 SDValue ArgValue = OutVals[i];
5336 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5337
5338 // Promote the value if needed.
5339 // For now, only handle fully promoted and indirect arguments.
5340 if (VA.getLocInfo() == CCValAssign::Indirect) {
5341 // Store the argument in a stack slot and pass its address.
5342 Align StackAlign =
5343 std::max(a: getPrefTypeAlign(VT: Outs[i].ArgVT, DAG),
5344 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
5345 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5346 // If the original argument was split and passed by reference, we need to
5347 // store the required parts of it here (and pass just one address).
5348 unsigned ArgIndex = Outs[i].OrigArgIndex;
5349 unsigned ArgPartOffset = Outs[i].PartOffset;
5350 assert(ArgPartOffset == 0);
5351 // Calculate the total size to store. We don't have access to what we're
5352 // actually storing other than performing the loop and collecting the
5353 // info.
5354 SmallVector<std::pair<SDValue, SDValue>> Parts;
5355 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5356 SDValue PartValue = OutVals[i + 1];
5357 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5358 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
5359 EVT PartVT = PartValue.getValueType();
5360
5361 StoredSize += PartVT.getStoreSize();
5362 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
5363 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
5364 ++i;
5365 }
5366 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
5367 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
5368 MemOpChains.push_back(
5369 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
5370 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
5371 for (const auto &Part : Parts) {
5372 SDValue PartValue = Part.first;
5373 SDValue PartOffset = Part.second;
5374 SDValue Address =
5375 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
5376 MemOpChains.push_back(
5377 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
5378 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
5379 }
5380 ArgValue = SpillSlot;
5381 } else {
5382 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
5383 }
5384
5385 // Use local copy if it is a byval arg.
5386 if (Flags.isByVal())
5387 ArgValue = ByValArgs[j++];
5388
5389 if (VA.isRegLoc()) {
5390 // Queue up the argument copies and emit them at the end.
5391 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
5392 } else {
5393 assert(VA.isMemLoc() && "Argument not register or memory");
5394 assert(!IsTailCall && "Tail call not allowed if stack is used "
5395 "for passing parameters");
5396
5397 // Work out the address of the stack slot.
5398 if (!StackPtr.getNode())
5399 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
5400 SDValue Address =
5401 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
5402 N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
5403
5404 // Emit the store.
5405 MemOpChains.push_back(
5406 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo()));
5407 }
5408 }
5409
5410 // Join the stores, which are independent of one another.
5411 if (!MemOpChains.empty())
5412 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
5413
5414 SDValue Glue;
5415
5416 // Build a sequence of copy-to-reg nodes, chained and glued together.
5417 for (auto &Reg : RegsToPass) {
5418 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
5419 Glue = Chain.getValue(R: 1);
5420 }
5421
5422 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5423 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5424 // split it and then direct call can be matched by PseudoCALL.
5425 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
5426 const GlobalValue *GV = S->getGlobal();
5427 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5428 ? LoongArchII::MO_CALL
5429 : LoongArchII::MO_CALL_PLT;
5430 Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags);
5431 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
5432 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
5433 ? LoongArchII::MO_CALL
5434 : LoongArchII::MO_CALL_PLT;
5435 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
5436 }
5437
5438 // The first call operand is the chain and the second is the target address.
5439 SmallVector<SDValue> Ops;
5440 Ops.push_back(Elt: Chain);
5441 Ops.push_back(Elt: Callee);
5442
5443 // Add argument registers to the end of the list so that they are
5444 // known live into the call.
5445 for (auto &Reg : RegsToPass)
5446 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
5447
5448 if (!IsTailCall) {
5449 // Add a register mask operand representing the call-preserved registers.
5450 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5451 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5452 assert(Mask && "Missing call preserved mask for calling convention");
5453 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5454 }
5455
5456 // Glue the call to the argument copies, if any.
5457 if (Glue.getNode())
5458 Ops.push_back(Elt: Glue);
5459
5460 // Emit the call.
5461 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
5462 unsigned Op;
5463 switch (DAG.getTarget().getCodeModel()) {
5464 default:
5465 report_fatal_error(reason: "Unsupported code model");
5466 case CodeModel::Small:
5467 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5468 break;
5469 case CodeModel::Medium:
5470 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5471 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5472 break;
5473 case CodeModel::Large:
5474 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5475 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5476 break;
5477 }
5478
5479 if (IsTailCall) {
5480 MF.getFrameInfo().setHasTailCall();
5481 SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
5482 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
5483 return Ret;
5484 }
5485
5486 Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
5487 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
5488 Glue = Chain.getValue(R: 1);
5489
5490 // Mark the end of the call, which is glued to the call itself.
5491 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
5492 Glue = Chain.getValue(R: 1);
5493
5494 // Assign locations to each value returned by this call.
5495 SmallVector<CCValAssign> RVLocs;
5496 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5497 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch);
5498
5499 // Copy all of the result registers out of their specified physreg.
5500 for (auto &VA : RVLocs) {
5501 // Copy the value out.
5502 SDValue RetValue =
5503 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
5504 // Glue the RetValue to the end of the call sequence.
5505 Chain = RetValue.getValue(R: 1);
5506 Glue = RetValue.getValue(R: 2);
5507
5508 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
5509
5510 InVals.push_back(Elt: RetValue);
5511 }
5512
5513 return Chain;
5514}
5515
5516bool LoongArchTargetLowering::CanLowerReturn(
5517 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5518 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5519 SmallVector<CCValAssign> RVLocs;
5520 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5521
5522 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5523 LoongArchABI::ABI ABI =
5524 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5525 if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full,
5526 ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5527 OrigTy: nullptr))
5528 return false;
5529 }
5530 return true;
5531}
5532
5533SDValue LoongArchTargetLowering::LowerReturn(
5534 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5535 const SmallVectorImpl<ISD::OutputArg> &Outs,
5536 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5537 SelectionDAG &DAG) const {
5538 // Stores the assignment of the return value to a location.
5539 SmallVector<CCValAssign> RVLocs;
5540
5541 // Info about the registers and stack slot.
5542 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5543 *DAG.getContext());
5544
5545 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5546 CLI: nullptr, Fn: CC_LoongArch);
5547 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5548 report_fatal_error(reason: "GHC functions return void only");
5549 SDValue Glue;
5550 SmallVector<SDValue, 4> RetOps(1, Chain);
5551
5552 // Copy the result values into the output registers.
5553 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5554 CCValAssign &VA = RVLocs[i];
5555 assert(VA.isRegLoc() && "Can only return in registers!");
5556
5557 // Handle a 'normal' return.
5558 SDValue Val = convertValVTToLocVT(DAG, Val: OutVals[i], VA, DL);
5559 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
5560
5561 // Guarantee that all emitted copies are stuck together.
5562 Glue = Chain.getValue(R: 1);
5563 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
5564 }
5565
5566 RetOps[0] = Chain; // Update chain.
5567
5568 // Add the glue node if we have it.
5569 if (Glue.getNode())
5570 RetOps.push_back(Elt: Glue);
5571
5572 return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
5573}
5574
5575bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5576 bool ForCodeSize) const {
5577 // TODO: Maybe need more checks here after vector extension is supported.
5578 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5579 return false;
5580 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5581 return false;
5582 return (Imm.isZero() || Imm.isExactlyValue(V: +1.0));
5583}
5584
5585bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
5586 return true;
5587}
5588
5589bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
5590 return true;
5591}
5592
5593bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5594 const Instruction *I) const {
5595 if (!Subtarget.is64Bit())
5596 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I);
5597
5598 if (isa<LoadInst>(Val: I))
5599 return true;
5600
5601 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5602 // require fences beacuse we can use amswap_db.[w/d].
5603 if (isa<StoreInst>(Val: I)) {
5604 unsigned Size = I->getOperand(i: 0)->getType()->getIntegerBitWidth();
5605 return (Size == 8 || Size == 16);
5606 }
5607
5608 return false;
5609}
5610
5611EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5612 LLVMContext &Context,
5613 EVT VT) const {
5614 if (!VT.isVector())
5615 return getPointerTy(DL);
5616 return VT.changeVectorElementTypeToInteger();
5617}
5618
5619bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5620 // TODO: Support vectors.
5621 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
5622}
5623
5624bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5625 const CallInst &I,
5626 MachineFunction &MF,
5627 unsigned Intrinsic) const {
5628 switch (Intrinsic) {
5629 default:
5630 return false;
5631 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5632 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5633 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5634 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5635 Info.opc = ISD::INTRINSIC_W_CHAIN;
5636 Info.memVT = MVT::i32;
5637 Info.ptrVal = I.getArgOperand(i: 0);
5638 Info.offset = 0;
5639 Info.align = Align(4);
5640 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5641 MachineMemOperand::MOVolatile;
5642 return true;
5643 // TODO: Add more Intrinsics later.
5644 }
5645}
5646
5647TargetLowering::AtomicExpansionKind
5648LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5649 // TODO: Add more AtomicRMWInst that needs to be extended.
5650
5651 // Since floating-point operation requires a non-trivial set of data
5652 // operations, use CmpXChg to expand.
5653 if (AI->isFloatingPointOperation() ||
5654 AI->getOperation() == AtomicRMWInst::UIncWrap ||
5655 AI->getOperation() == AtomicRMWInst::UDecWrap)
5656 return AtomicExpansionKind::CmpXChg;
5657
5658 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5659 if (Size == 8 || Size == 16)
5660 return AtomicExpansionKind::MaskedIntrinsic;
5661 return AtomicExpansionKind::None;
5662}
5663
5664static Intrinsic::ID
5665getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5666 AtomicRMWInst::BinOp BinOp) {
5667 if (GRLen == 64) {
5668 switch (BinOp) {
5669 default:
5670 llvm_unreachable("Unexpected AtomicRMW BinOp");
5671 case AtomicRMWInst::Xchg:
5672 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5673 case AtomicRMWInst::Add:
5674 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5675 case AtomicRMWInst::Sub:
5676 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5677 case AtomicRMWInst::Nand:
5678 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5679 case AtomicRMWInst::UMax:
5680 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5681 case AtomicRMWInst::UMin:
5682 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5683 case AtomicRMWInst::Max:
5684 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5685 case AtomicRMWInst::Min:
5686 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5687 // TODO: support other AtomicRMWInst.
5688 }
5689 }
5690
5691 if (GRLen == 32) {
5692 switch (BinOp) {
5693 default:
5694 llvm_unreachable("Unexpected AtomicRMW BinOp");
5695 case AtomicRMWInst::Xchg:
5696 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5697 case AtomicRMWInst::Add:
5698 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5699 case AtomicRMWInst::Sub:
5700 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5701 case AtomicRMWInst::Nand:
5702 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5703 // TODO: support other AtomicRMWInst.
5704 }
5705 }
5706
5707 llvm_unreachable("Unexpected GRLen\n");
5708}
5709
5710TargetLowering::AtomicExpansionKind
5711LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5712 AtomicCmpXchgInst *CI) const {
5713 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5714 if (Size == 8 || Size == 16)
5715 return AtomicExpansionKind::MaskedIntrinsic;
5716 return AtomicExpansionKind::None;
5717}
5718
5719Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5720 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5721 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5722 AtomicOrdering FailOrd = CI->getFailureOrdering();
5723 Value *FailureOrdering =
5724 Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
5725
5726 // TODO: Support cmpxchg on LA32.
5727 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5728 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
5729 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
5730 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
5731 Type *Tys[] = {AlignedAddr->getType()};
5732 Function *MaskedCmpXchg =
5733 Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys);
5734 Value *Result = Builder.CreateCall(
5735 Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5736 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5737 return Result;
5738}
5739
5740Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5741 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5742 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5743 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5744 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5745 // mask, as this produces better code than the LL/SC loop emitted by
5746 // int_loongarch_masked_atomicrmw_xchg.
5747 if (AI->getOperation() == AtomicRMWInst::Xchg &&
5748 isa<ConstantInt>(Val: AI->getValOperand())) {
5749 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
5750 if (CVal->isZero())
5751 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
5752 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
5753 Align: AI->getAlign(), Ordering: Ord);
5754 if (CVal->isMinusOne())
5755 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
5756 Align: AI->getAlign(), Ordering: Ord);
5757 }
5758
5759 unsigned GRLen = Subtarget.getGRLen();
5760 Value *Ordering =
5761 Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
5762 Type *Tys[] = {AlignedAddr->getType()};
5763 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5764 M: AI->getModule(),
5765 id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
5766
5767 if (GRLen == 64) {
5768 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
5769 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
5770 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
5771 }
5772
5773 Value *Result;
5774
5775 // Must pass the shift amount needed to sign extend the loaded value prior
5776 // to performing a signed comparison for min/max. ShiftAmt is the number of
5777 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5778 // is the number of bits to left+right shift the value in order to
5779 // sign-extend.
5780 if (AI->getOperation() == AtomicRMWInst::Min ||
5781 AI->getOperation() == AtomicRMWInst::Max) {
5782 const DataLayout &DL = AI->getDataLayout();
5783 unsigned ValWidth =
5784 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
5785 Value *SextShamt =
5786 Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
5787 Result = Builder.CreateCall(Callee: LlwOpScwLoop,
5788 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5789 } else {
5790 Result =
5791 Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
5792 }
5793
5794 if (GRLen == 64)
5795 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
5796 return Result;
5797}
5798
5799bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5800 const MachineFunction &MF, EVT VT) const {
5801 VT = VT.getScalarType();
5802
5803 if (!VT.isSimple())
5804 return false;
5805
5806 switch (VT.getSimpleVT().SimpleTy) {
5807 case MVT::f32:
5808 case MVT::f64:
5809 return true;
5810 default:
5811 break;
5812 }
5813
5814 return false;
5815}
5816
5817Register LoongArchTargetLowering::getExceptionPointerRegister(
5818 const Constant *PersonalityFn) const {
5819 return LoongArch::R4;
5820}
5821
5822Register LoongArchTargetLowering::getExceptionSelectorRegister(
5823 const Constant *PersonalityFn) const {
5824 return LoongArch::R5;
5825}
5826
5827//===----------------------------------------------------------------------===//
5828// LoongArch Inline Assembly Support
5829//===----------------------------------------------------------------------===//
5830
5831LoongArchTargetLowering::ConstraintType
5832LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5833 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
5834 //
5835 // 'f': A floating-point register (if available).
5836 // 'k': A memory operand whose address is formed by a base register and
5837 // (optionally scaled) index register.
5838 // 'l': A signed 16-bit constant.
5839 // 'm': A memory operand whose address is formed by a base register and
5840 // offset that is suitable for use in instructions with the same
5841 // addressing mode as st.w and ld.w.
5842 // 'I': A signed 12-bit constant (for arithmetic instructions).
5843 // 'J': Integer zero.
5844 // 'K': An unsigned 12-bit constant (for logic instructions).
5845 // "ZB": An address that is held in a general-purpose register. The offset is
5846 // zero.
5847 // "ZC": A memory operand whose address is formed by a base register and
5848 // offset that is suitable for use in instructions with the same
5849 // addressing mode as ll.w and sc.w.
5850 if (Constraint.size() == 1) {
5851 switch (Constraint[0]) {
5852 default:
5853 break;
5854 case 'f':
5855 return C_RegisterClass;
5856 case 'l':
5857 case 'I':
5858 case 'J':
5859 case 'K':
5860 return C_Immediate;
5861 case 'k':
5862 return C_Memory;
5863 }
5864 }
5865
5866 if (Constraint == "ZC" || Constraint == "ZB")
5867 return C_Memory;
5868
5869 // 'm' is handled here.
5870 return TargetLowering::getConstraintType(Constraint);
5871}
5872
5873InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5874 StringRef ConstraintCode) const {
5875 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5876 .Case(S: "k", Value: InlineAsm::ConstraintCode::k)
5877 .Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
5878 .Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
5879 .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
5880}
5881
5882std::pair<unsigned, const TargetRegisterClass *>
5883LoongArchTargetLowering::getRegForInlineAsmConstraint(
5884 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5885 // First, see if this is a constraint that directly corresponds to a LoongArch
5886 // register class.
5887 if (Constraint.size() == 1) {
5888 switch (Constraint[0]) {
5889 case 'r':
5890 // TODO: Support fixed vectors up to GRLen?
5891 if (VT.isVector())
5892 break;
5893 return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass);
5894 case 'f':
5895 if (Subtarget.hasBasicF() && VT == MVT::f32)
5896 return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass);
5897 if (Subtarget.hasBasicD() && VT == MVT::f64)
5898 return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass);
5899 if (Subtarget.hasExtLSX() &&
5900 TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
5901 return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass);
5902 if (Subtarget.hasExtLASX() &&
5903 TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
5904 return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass);
5905 break;
5906 default:
5907 break;
5908 }
5909 }
5910
5911 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5912 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5913 // constraints while the official register name is prefixed with a '$'. So we
5914 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5915 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5916 // case insensitive, so no need to convert the constraint to upper case here.
5917 //
5918 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5919 // decode the usage of register name aliases into their official names. And
5920 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5921 // official register names.
5922 if (Constraint.starts_with(Prefix: "{$r") || Constraint.starts_with(Prefix: "{$f") ||
5923 Constraint.starts_with(Prefix: "{$vr") || Constraint.starts_with(Prefix: "{$xr")) {
5924 bool IsFP = Constraint[2] == 'f';
5925 std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$');
5926 std::pair<unsigned, const TargetRegisterClass *> R;
5927 R = TargetLowering::getRegForInlineAsmConstraint(
5928 TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
5929 // Match those names to the widest floating point register type available.
5930 if (IsFP) {
5931 unsigned RegNo = R.first;
5932 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5933 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
5934 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5935 return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
5936 }
5937 }
5938 }
5939 return R;
5940 }
5941
5942 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5943}
5944
5945void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5946 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5947 SelectionDAG &DAG) const {
5948 // Currently only support length 1 constraints.
5949 if (Constraint.size() == 1) {
5950 switch (Constraint[0]) {
5951 case 'l':
5952 // Validate & create a 16-bit signed immediate operand.
5953 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
5954 uint64_t CVal = C->getSExtValue();
5955 if (isInt<16>(x: CVal))
5956 Ops.push_back(
5957 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
5958 }
5959 return;
5960 case 'I':
5961 // Validate & create a 12-bit signed immediate operand.
5962 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
5963 uint64_t CVal = C->getSExtValue();
5964 if (isInt<12>(x: CVal))
5965 Ops.push_back(
5966 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
5967 }
5968 return;
5969 case 'J':
5970 // Validate & create an integer zero operand.
5971 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
5972 if (C->getZExtValue() == 0)
5973 Ops.push_back(
5974 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
5975 return;
5976 case 'K':
5977 // Validate & create a 12-bit unsigned immediate operand.
5978 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
5979 uint64_t CVal = C->getZExtValue();
5980 if (isUInt<12>(x: CVal))
5981 Ops.push_back(
5982 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
5983 }
5984 return;
5985 default:
5986 break;
5987 }
5988 }
5989 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5990}
5991
5992#define GET_REGISTER_MATCHER
5993#include "LoongArchGenAsmMatcher.inc"
5994
5995Register
5996LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5997 const MachineFunction &MF) const {
5998 std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$');
5999 std::string NewRegName = Name.second.str();
6000 Register Reg = MatchRegisterAltName(Name: NewRegName);
6001 if (Reg == LoongArch::NoRegister)
6002 Reg = MatchRegisterName(Name: NewRegName);
6003 if (Reg == LoongArch::NoRegister)
6004 report_fatal_error(
6005 reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6006 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6007 if (!ReservedRegs.test(Idx: Reg))
6008 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
6009 StringRef(RegName) + "\"."));
6010 return Reg;
6011}
6012
6013bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6014 EVT VT, SDValue C) const {
6015 // TODO: Support vectors.
6016 if (!VT.isScalarInteger())
6017 return false;
6018
6019 // Omit the optimization if the data size exceeds GRLen.
6020 if (VT.getSizeInBits() > Subtarget.getGRLen())
6021 return false;
6022
6023 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
6024 const APInt &Imm = ConstNode->getAPIntValue();
6025 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6026 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6027 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6028 return true;
6029 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6030 if (ConstNode->hasOneUse() &&
6031 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6032 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6033 return true;
6034 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6035 // in which the immediate has two set bits. Or Break (MUL x, imm)
6036 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6037 // equals to (1 << s0) - (1 << s1).
6038 if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) {
6039 unsigned Shifts = Imm.countr_zero();
6040 // Reject immediates which can be composed via a single LUI.
6041 if (Shifts >= 12)
6042 return false;
6043 // Reject multiplications can be optimized to
6044 // (SLLI (ALSL x, x, 1/2/3/4), s).
6045 APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
6046 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6047 return false;
6048 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6049 // since it needs one more instruction than other 3 cases.
6050 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6051 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6052 (ImmSmall - Imm).isPowerOf2())
6053 return true;
6054 }
6055 }
6056
6057 return false;
6058}
6059
6060bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6061 const AddrMode &AM,
6062 Type *Ty, unsigned AS,
6063 Instruction *I) const {
6064 // LoongArch has four basic addressing modes:
6065 // 1. reg
6066 // 2. reg + 12-bit signed offset
6067 // 3. reg + 14-bit signed offset left-shifted by 2
6068 // 4. reg1 + reg2
6069 // TODO: Add more checks after support vector extension.
6070
6071 // No global is ever allowed as a base.
6072 if (AM.BaseGV)
6073 return false;
6074
6075 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6076 // with `UAL` feature.
6077 if (!isInt<12>(x: AM.BaseOffs) &&
6078 !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL()))
6079 return false;
6080
6081 switch (AM.Scale) {
6082 case 0:
6083 // "r+i" or just "i", depending on HasBaseReg.
6084 break;
6085 case 1:
6086 // "r+r+i" is not allowed.
6087 if (AM.HasBaseReg && AM.BaseOffs)
6088 return false;
6089 // Otherwise we have "r+r" or "r+i".
6090 break;
6091 case 2:
6092 // "2*r+r" or "2*r+i" is not allowed.
6093 if (AM.HasBaseReg || AM.BaseOffs)
6094 return false;
6095 // Allow "2*r" as "r+r".
6096 break;
6097 default:
6098 return false;
6099 }
6100
6101 return true;
6102}
6103
6104bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6105 return isInt<12>(x: Imm);
6106}
6107
6108bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6109 return isInt<12>(x: Imm);
6110}
6111
6112bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6113 // Zexts are free if they can be combined with a load.
6114 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6115 // poorly with type legalization of compares preferring sext.
6116 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6117 EVT MemVT = LD->getMemoryVT();
6118 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6119 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6120 LD->getExtensionType() == ISD::ZEXTLOAD))
6121 return true;
6122 }
6123
6124 return TargetLowering::isZExtFree(Val, VT2);
6125}
6126
6127bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6128 EVT DstVT) const {
6129 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6130}
6131
6132bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
6133 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32);
6134}
6135
6136bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6137 // TODO: Support vectors.
6138 if (Y.getValueType().isVector())
6139 return false;
6140
6141 return !isa<ConstantSDNode>(Val: Y);
6142}
6143
6144ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6145 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6146 return ISD::SIGN_EXTEND;
6147}
6148
6149bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6150 EVT Type, bool IsSigned) const {
6151 if (Subtarget.is64Bit() && Type == MVT::i32)
6152 return true;
6153
6154 return IsSigned;
6155}
6156
6157bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6158 // Return false to suppress the unnecessary extensions if the LibCall
6159 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6160 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6161 Type.getSizeInBits() < Subtarget.getGRLen()))
6162 return false;
6163 return true;
6164}
6165