1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArchISelLowering.h"
15#include "LoongArch.h"
16#include "LoongArchMachineFunctionInfo.h"
17#include "LoongArchRegisterInfo.h"
18#include "LoongArchSubtarget.h"
19#include "MCTargetDesc/LoongArchBaseInfo.h"
20#include "MCTargetDesc/LoongArchMCTargetDesc.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/CodeGen/ISDOpcodes.h"
25#include "llvm/CodeGen/RuntimeLibcallUtil.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicInst.h"
29#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/CodeGen.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/KnownBits.h"
34#include "llvm/Support/MathExtras.h"
35#include <llvm/Analysis/VectorUtils.h>
36
37using namespace llvm;
38
39#define DEBUG_TYPE "loongarch-isel-lowering"
40
41STATISTIC(NumTailCalls, "Number of tail calls");
42
43static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
44 cl::desc("Trap on integer division by zero."),
45 cl::init(Val: false));
46
47LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
48 const LoongArchSubtarget &STI)
49 : TargetLowering(TM), Subtarget(STI) {
50
51 MVT GRLenVT = Subtarget.getGRLenVT();
52
53 // Set up the register classes.
54
55 addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass);
56 if (Subtarget.hasBasicF())
57 addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass);
58 if (Subtarget.hasBasicD())
59 addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass);
60
61 static const MVT::SimpleValueType LSXVTs[] = {
62 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
63 static const MVT::SimpleValueType LASXVTs[] = {
64 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
65
66 if (Subtarget.hasExtLSX())
67 for (MVT VT : LSXVTs)
68 addRegisterClass(VT, RC: &LoongArch::LSX128RegClass);
69
70 if (Subtarget.hasExtLASX())
71 for (MVT VT : LASXVTs)
72 addRegisterClass(VT, RC: &LoongArch::LASX256RegClass);
73
74 // Set operations for LA32 and LA64.
75
76 setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT,
77 MemVT: MVT::i1, Action: Promote);
78
79 setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom);
80 setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom);
81 setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom);
82 setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom);
83 setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand);
84 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand);
85
86 setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
87 ISD::JumpTable, ISD::GlobalTLSAddress},
88 VT: GRLenVT, Action: Custom);
89
90 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom);
91
92 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand);
93 setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand);
94 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
95 setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand);
96
97 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
98 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
99
100 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
101 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
102 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
103
104 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
105
106 // BITREV/REVB requires the 32S feature.
107 if (STI.has32S()) {
108 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
109 // we get to know which of sll and revb.2h is faster.
110 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom);
111 setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal);
112
113 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
114 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
115 // and i32 could still be byte-swapped relatively cheaply.
116 setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom);
117 } else {
118 setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand);
119 setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand);
120 setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand);
121 setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand);
122 setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom);
123 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
124 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
125 }
126
127 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
128 setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand);
129 setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand);
130 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
131 setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand);
132
133 setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom);
134 setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand);
135
136 // Set operations for LA64 only.
137
138 if (Subtarget.is64Bit()) {
139 setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom);
140 setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom);
141 setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom);
142 setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom);
143 setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom);
144 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
145 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
146 setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom);
147 setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom);
148 setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom);
149 setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom);
150 setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
151 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom);
152 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom);
153 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
154 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom);
155 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom);
156
157 setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom);
158 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom);
159 setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32,
160 Action: Custom);
161 setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom);
162 }
163
164 // Set operations for LA32 only.
165
166 if (!Subtarget.is64Bit()) {
167 setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom);
168 setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom);
169 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom);
170 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
171 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
172 if (Subtarget.hasBasicD())
173 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
174 }
175
176 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
177
178 static const ISD::CondCode FPCCToExpand[] = {
179 ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
180 ISD::SETGE, ISD::SETNE, ISD::SETGT};
181
182 // Set operations for 'F' feature.
183
184 if (Subtarget.hasBasicF()) {
185 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
186 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
187 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
188 setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand);
189 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand);
190
191 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
192 setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand);
193 setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal);
194 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
195 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
196 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
197 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
198 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
199 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
200 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal);
201 setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand);
202 setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand);
203 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
204 setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand);
205 setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand);
206 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32,
207 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
208 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32,
209 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
210 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom);
211 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32,
212 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
213
214 if (Subtarget.is64Bit())
215 setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
216
217 if (!Subtarget.hasBasicD()) {
218 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
219 if (Subtarget.is64Bit()) {
220 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
221 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
222 }
223 }
224 }
225
226 // Set operations for 'D' feature.
227
228 if (Subtarget.hasBasicD()) {
229 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
230 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
231 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
232 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand);
233 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
234 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
235 setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand);
236
237 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
238 setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand);
239 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
240 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
241 setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal);
242 setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
243 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
244 setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
245 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
246 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal);
247 setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand);
248 setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand);
249 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
250 setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand);
251 setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand);
252 setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
253 setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64,
254 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
255 setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom);
256 setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64,
257 Action: Subtarget.isSoftFPABI() ? LibCall : Custom);
258
259 if (Subtarget.is64Bit())
260 setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
261 }
262
263 // Set operations for 'LSX' feature.
264
265 if (Subtarget.hasExtLSX()) {
266 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
267 // Expand all truncating stores and extending loads.
268 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
269 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
270 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
271 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
272 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
273 }
274 // By default everything must be expanded. Then we will selectively turn
275 // on ones that can be effectively codegen'd.
276 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
277 setOperationAction(Op, VT, Action: Expand);
278 }
279
280 for (MVT VT : LSXVTs) {
281 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
282 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
283 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
284
285 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
286 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
287 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
288
289 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
290 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
291 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
292 }
293 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
294 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
295 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
296 Action: Legal);
297 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
298 VT, Action: Legal);
299 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
300 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
301 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
302 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
303 setCondCodeAction(
304 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
305 Action: Expand);
306 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
307 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
308 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
309 }
310 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
311 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
312 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
313 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
314 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
315 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
316 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
317 }
318 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
319 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
320 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
321 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
322 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
323 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
324 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
325 ISD::SETUGE, ISD::SETUGT},
326 VT, Action: Expand);
327 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
328 }
329 setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal);
330 setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal);
331 setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal);
332 setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal);
333 setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal);
334
335 for (MVT VT :
336 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
337 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
338 setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom);
339 }
340 }
341
342 // Set operations for 'LASX' feature.
343
344 if (Subtarget.hasExtLASX()) {
345 for (MVT VT : LASXVTs) {
346 setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal);
347 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
348 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
349
350 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom);
351 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom);
352 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
353 setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Legal);
354
355 setOperationAction(Op: ISD::SETCC, VT, Action: Legal);
356 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
357 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
358 }
359 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
360 setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal);
361 setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
362 Action: Legal);
363 setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
364 VT, Action: Legal);
365 setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal);
366 setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal);
367 setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal);
368 setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal);
369 setCondCodeAction(
370 CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
371 Action: Expand);
372 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
373 setOperationAction(Op: ISD::ABDS, VT, Action: Legal);
374 setOperationAction(Op: ISD::ABDU, VT, Action: Legal);
375 }
376 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
377 setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom);
378 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
379 setOperationAction(Op: ISD::BSWAP, VT, Action: Legal);
380 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
381 setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal);
382 setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal);
383 }
384 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
385 setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal);
386 setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal);
387 setOperationAction(Op: ISD::FMA, VT, Action: Legal);
388 setOperationAction(Op: ISD::FSQRT, VT, Action: Legal);
389 setOperationAction(Op: ISD::FNEG, VT, Action: Legal);
390 setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
391 ISD::SETUGE, ISD::SETUGT},
392 VT, Action: Expand);
393 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal);
394 }
395 }
396
397 // Set DAG combine for LA32 and LA64.
398
399 setTargetDAGCombine(ISD::AND);
400 setTargetDAGCombine(ISD::OR);
401 setTargetDAGCombine(ISD::SRL);
402 setTargetDAGCombine(ISD::SETCC);
403
404 // Set DAG combine for 'LSX' feature.
405
406 if (Subtarget.hasExtLSX()) {
407 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
408 setTargetDAGCombine(ISD::BITCAST);
409 }
410
411 // Compute derived properties from the register classes.
412 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
413
414 setStackPointerRegisterToSaveRestore(LoongArch::R3);
415
416 setBooleanContents(ZeroOrOneBooleanContent);
417 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
418
419 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
420
421 setMinCmpXchgSizeInBits(32);
422
423 // Function alignments.
424 setMinFunctionAlignment(Align(4));
425 // Set preferred alignments.
426 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
427 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
428 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
429
430 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
431 if (Subtarget.hasLAMCAS())
432 setMinCmpXchgSizeInBits(8);
433
434 if (Subtarget.hasSCQ()) {
435 setMaxAtomicSizeInBitsSupported(128);
436 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom);
437 }
438}
439
440bool LoongArchTargetLowering::isOffsetFoldingLegal(
441 const GlobalAddressSDNode *GA) const {
442 // In order to maximise the opportunity for common subexpression elimination,
443 // keep a separate ADD node for the global address offset instead of folding
444 // it in the global address node. Later peephole optimisations may choose to
445 // fold it back in when profitable.
446 return false;
447}
448
449SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
450 SelectionDAG &DAG) const {
451 switch (Op.getOpcode()) {
452 case ISD::ATOMIC_FENCE:
453 return lowerATOMIC_FENCE(Op, DAG);
454 case ISD::EH_DWARF_CFA:
455 return lowerEH_DWARF_CFA(Op, DAG);
456 case ISD::GlobalAddress:
457 return lowerGlobalAddress(Op, DAG);
458 case ISD::GlobalTLSAddress:
459 return lowerGlobalTLSAddress(Op, DAG);
460 case ISD::INTRINSIC_WO_CHAIN:
461 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
462 case ISD::INTRINSIC_W_CHAIN:
463 return lowerINTRINSIC_W_CHAIN(Op, DAG);
464 case ISD::INTRINSIC_VOID:
465 return lowerINTRINSIC_VOID(Op, DAG);
466 case ISD::BlockAddress:
467 return lowerBlockAddress(Op, DAG);
468 case ISD::JumpTable:
469 return lowerJumpTable(Op, DAG);
470 case ISD::SHL_PARTS:
471 return lowerShiftLeftParts(Op, DAG);
472 case ISD::SRA_PARTS:
473 return lowerShiftRightParts(Op, DAG, IsSRA: true);
474 case ISD::SRL_PARTS:
475 return lowerShiftRightParts(Op, DAG, IsSRA: false);
476 case ISD::ConstantPool:
477 return lowerConstantPool(Op, DAG);
478 case ISD::FP_TO_SINT:
479 return lowerFP_TO_SINT(Op, DAG);
480 case ISD::BITCAST:
481 return lowerBITCAST(Op, DAG);
482 case ISD::UINT_TO_FP:
483 return lowerUINT_TO_FP(Op, DAG);
484 case ISD::SINT_TO_FP:
485 return lowerSINT_TO_FP(Op, DAG);
486 case ISD::VASTART:
487 return lowerVASTART(Op, DAG);
488 case ISD::FRAMEADDR:
489 return lowerFRAMEADDR(Op, DAG);
490 case ISD::RETURNADDR:
491 return lowerRETURNADDR(Op, DAG);
492 case ISD::WRITE_REGISTER:
493 return lowerWRITE_REGISTER(Op, DAG);
494 case ISD::INSERT_VECTOR_ELT:
495 return lowerINSERT_VECTOR_ELT(Op, DAG);
496 case ISD::EXTRACT_VECTOR_ELT:
497 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
498 case ISD::BUILD_VECTOR:
499 return lowerBUILD_VECTOR(Op, DAG);
500 case ISD::VECTOR_SHUFFLE:
501 return lowerVECTOR_SHUFFLE(Op, DAG);
502 case ISD::BITREVERSE:
503 return lowerBITREVERSE(Op, DAG);
504 case ISD::SCALAR_TO_VECTOR:
505 return lowerSCALAR_TO_VECTOR(Op, DAG);
506 case ISD::PREFETCH:
507 return lowerPREFETCH(Op, DAG);
508 case ISD::SELECT:
509 return lowerSELECT(Op, DAG);
510 case ISD::FP_TO_FP16:
511 return lowerFP_TO_FP16(Op, DAG);
512 case ISD::FP16_TO_FP:
513 return lowerFP16_TO_FP(Op, DAG);
514 case ISD::FP_TO_BF16:
515 return lowerFP_TO_BF16(Op, DAG);
516 case ISD::BF16_TO_FP:
517 return lowerBF16_TO_FP(Op, DAG);
518 }
519 return SDValue();
520}
521
522SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
523 SelectionDAG &DAG) const {
524 unsigned IsData = Op.getConstantOperandVal(i: 4);
525
526 // We don't support non-data prefetch.
527 // Just preserve the chain.
528 if (!IsData)
529 return Op.getOperand(i: 0);
530
531 return Op;
532}
533
534// Return true if Val is equal to (setcc LHS, RHS, CC).
535// Return false if Val is the inverse of (setcc LHS, RHS, CC).
536// Otherwise, return std::nullopt.
537static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
538 ISD::CondCode CC, SDValue Val) {
539 assert(Val->getOpcode() == ISD::SETCC);
540 SDValue LHS2 = Val.getOperand(i: 0);
541 SDValue RHS2 = Val.getOperand(i: 1);
542 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get();
543
544 if (LHS == LHS2 && RHS == RHS2) {
545 if (CC == CC2)
546 return true;
547 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
548 return false;
549 } else if (LHS == RHS2 && RHS == LHS2) {
550 CC2 = ISD::getSetCCSwappedOperands(Operation: CC2);
551 if (CC == CC2)
552 return true;
553 if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType()))
554 return false;
555 }
556
557 return std::nullopt;
558}
559
560static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
561 const LoongArchSubtarget &Subtarget) {
562 SDValue CondV = N->getOperand(Num: 0);
563 SDValue TrueV = N->getOperand(Num: 1);
564 SDValue FalseV = N->getOperand(Num: 2);
565 MVT VT = N->getSimpleValueType(ResNo: 0);
566 SDLoc DL(N);
567
568 // (select c, -1, y) -> -c | y
569 if (isAllOnesConstant(V: TrueV)) {
570 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
571 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
572 }
573 // (select c, y, -1) -> (c-1) | y
574 if (isAllOnesConstant(V: FalseV)) {
575 SDValue Neg =
576 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
577 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
578 }
579
580 // (select c, 0, y) -> (c-1) & y
581 if (isNullConstant(V: TrueV)) {
582 SDValue Neg =
583 DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT));
584 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV));
585 }
586 // (select c, y, 0) -> -c & y
587 if (isNullConstant(V: FalseV)) {
588 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
589 return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV));
590 }
591
592 // select c, ~x, x --> xor -c, x
593 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) {
594 const APInt &TrueVal = TrueV->getAsAPIntVal();
595 const APInt &FalseVal = FalseV->getAsAPIntVal();
596 if (~TrueVal == FalseVal) {
597 SDValue Neg = DAG.getNegative(Val: CondV, DL, VT);
598 return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV);
599 }
600 }
601
602 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
603 // when both truev and falsev are also setcc.
604 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
605 FalseV.getOpcode() == ISD::SETCC) {
606 SDValue LHS = CondV.getOperand(i: 0);
607 SDValue RHS = CondV.getOperand(i: 1);
608 ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
609
610 // (select x, x, y) -> x | y
611 // (select !x, x, y) -> x & y
612 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) {
613 return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV,
614 N2: DAG.getFreeze(V: FalseV));
615 }
616 // (select x, y, x) -> x & y
617 // (select !x, y, x) -> x | y
618 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) {
619 return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT,
620 N1: DAG.getFreeze(V: TrueV), N2: FalseV);
621 }
622 }
623
624 return SDValue();
625}
626
627// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
628// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
629// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
630// being `0` or `-1`. In such cases we can replace `select` with `and`.
631// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
632// than `c0`?
633static SDValue
634foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
635 const LoongArchSubtarget &Subtarget) {
636 unsigned SelOpNo = 0;
637 SDValue Sel = BO->getOperand(Num: 0);
638 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
639 SelOpNo = 1;
640 Sel = BO->getOperand(Num: 1);
641 }
642
643 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
644 return SDValue();
645
646 unsigned ConstSelOpNo = 1;
647 unsigned OtherSelOpNo = 2;
648 if (!isa<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) {
649 ConstSelOpNo = 2;
650 OtherSelOpNo = 1;
651 }
652 SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo);
653 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp);
654 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
655 return SDValue();
656
657 SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1);
658 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp);
659 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
660 return SDValue();
661
662 SDLoc DL(Sel);
663 EVT VT = BO->getValueType(ResNo: 0);
664
665 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
666 if (SelOpNo == 1)
667 std::swap(a&: NewConstOps[0], b&: NewConstOps[1]);
668
669 SDValue NewConstOp =
670 DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps);
671 if (!NewConstOp)
672 return SDValue();
673
674 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
675 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
676 return SDValue();
677
678 SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo);
679 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
680 if (SelOpNo == 1)
681 std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]);
682 SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps);
683
684 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
685 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
686 return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF);
687}
688
689// Changes the condition code and swaps operands if necessary, so the SetCC
690// operation matches one of the comparisons supported directly by branches
691// in the LoongArch ISA. May adjust compares to favor compare with 0 over
692// compare with 1/-1.
693static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
694 ISD::CondCode &CC, SelectionDAG &DAG) {
695 // If this is a single bit test that can't be handled by ANDI, shift the
696 // bit to be tested to the MSB and perform a signed compare with 0.
697 if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) &&
698 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
699 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) {
700 uint64_t Mask = LHS.getConstantOperandVal(i: 1);
701 if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) {
702 unsigned ShAmt = 0;
703 if (isPowerOf2_64(Value: Mask)) {
704 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
705 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask);
706 } else {
707 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask);
708 }
709
710 LHS = LHS.getOperand(i: 0);
711 if (ShAmt != 0)
712 LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS,
713 N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType()));
714 return;
715 }
716 }
717
718 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) {
719 int64_t C = RHSC->getSExtValue();
720 switch (CC) {
721 default:
722 break;
723 case ISD::SETGT:
724 // Convert X > -1 to X >= 0.
725 if (C == -1) {
726 RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
727 CC = ISD::SETGE;
728 return;
729 }
730 break;
731 case ISD::SETLT:
732 // Convert X < 1 to 0 >= X.
733 if (C == 1) {
734 RHS = LHS;
735 LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType());
736 CC = ISD::SETGE;
737 return;
738 }
739 break;
740 }
741 }
742
743 switch (CC) {
744 default:
745 break;
746 case ISD::SETGT:
747 case ISD::SETLE:
748 case ISD::SETUGT:
749 case ISD::SETULE:
750 CC = ISD::getSetCCSwappedOperands(Operation: CC);
751 std::swap(a&: LHS, b&: RHS);
752 break;
753 }
754}
755
756SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
757 SelectionDAG &DAG) const {
758 SDValue CondV = Op.getOperand(i: 0);
759 SDValue TrueV = Op.getOperand(i: 1);
760 SDValue FalseV = Op.getOperand(i: 2);
761 SDLoc DL(Op);
762 MVT VT = Op.getSimpleValueType();
763 MVT GRLenVT = Subtarget.getGRLenVT();
764
765 if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget))
766 return V;
767
768 if (Op.hasOneUse()) {
769 unsigned UseOpc = Op->user_begin()->getOpcode();
770 if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) {
771 SDNode *BinOp = *Op->user_begin();
772 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->user_begin(),
773 DAG, Subtarget)) {
774 DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel);
775 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
776 // may return a constant node and cause crash in lowerSELECT.
777 if (NewSel.getOpcode() == ISD::SELECT)
778 return lowerSELECT(Op: NewSel, DAG);
779 return NewSel;
780 }
781 }
782 }
783
784 // If the condition is not an integer SETCC which operates on GRLenVT, we need
785 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
786 // (select condv, truev, falsev)
787 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
788 if (CondV.getOpcode() != ISD::SETCC ||
789 CondV.getOperand(i: 0).getSimpleValueType() != GRLenVT) {
790 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: GRLenVT);
791 SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE);
792
793 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
794
795 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
796 }
797
798 // If the CondV is the output of a SETCC node which operates on GRLenVT
799 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
800 // to take advantage of the integer compare+branch instructions. i.e.: (select
801 // (setcc lhs, rhs, cc), truev, falsev)
802 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
803 SDValue LHS = CondV.getOperand(i: 0);
804 SDValue RHS = CondV.getOperand(i: 1);
805 ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get();
806
807 // Special case for a select of 2 constants that have a difference of 1.
808 // Normally this is done by DAGCombine, but if the select is introduced by
809 // type legalization or op legalization, we miss it. Restricting to SETLT
810 // case for now because that is what signed saturating add/sub need.
811 // FIXME: We don't need the condition to be SETLT or even a SETCC,
812 // but we would probably want to swap the true/false values if the condition
813 // is SETGE/SETLE to avoid an XORI.
814 if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) &&
815 CCVal == ISD::SETLT) {
816 const APInt &TrueVal = TrueV->getAsAPIntVal();
817 const APInt &FalseVal = FalseV->getAsAPIntVal();
818 if (TrueVal - 1 == FalseVal)
819 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV);
820 if (TrueVal + 1 == FalseVal)
821 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV);
822 }
823
824 translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG);
825 // 1 < x ? x : 1 -> 0 < x ? x : 1
826 if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
827 RHS == TrueV && LHS == FalseV) {
828 LHS = DAG.getConstant(Val: 0, DL, VT);
829 // 0 <u x is the same as x != 0.
830 if (CCVal == ISD::SETULT) {
831 std::swap(a&: LHS, b&: RHS);
832 CCVal = ISD::SETNE;
833 }
834 }
835
836 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
837 if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
838 RHS == FalseV) {
839 RHS = DAG.getConstant(Val: 0, DL, VT);
840 }
841
842 SDValue TargetCC = DAG.getCondCode(Cond: CCVal);
843
844 if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) {
845 // (select (setcc lhs, rhs, CC), constant, falsev)
846 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
847 std::swap(a&: TrueV, b&: FalseV);
848 TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType()));
849 }
850
851 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
852 return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops);
853}
854
855SDValue
856LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
857 SelectionDAG &DAG) const {
858 SDLoc DL(Op);
859 MVT OpVT = Op.getSimpleValueType();
860
861 SDValue Vector = DAG.getUNDEF(VT: OpVT);
862 SDValue Val = Op.getOperand(i: 0);
863 SDValue Idx = DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT());
864
865 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx);
866}
867
868SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
869 SelectionDAG &DAG) const {
870 EVT ResTy = Op->getValueType(ResNo: 0);
871 SDValue Src = Op->getOperand(Num: 0);
872 SDLoc DL(Op);
873
874 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
875 unsigned int OrigEltNum = ResTy.getVectorNumElements();
876 unsigned int NewEltNum = NewVT.getVectorNumElements();
877
878 SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src);
879
880 SmallVector<SDValue, 8> Ops;
881 for (unsigned int i = 0; i < NewEltNum; i++) {
882 SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc,
883 N2: DAG.getConstant(Val: i, DL, VT: MVT::i64));
884 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
885 ? (unsigned)LoongArchISD::BITREV_8B
886 : (unsigned)ISD::BITREVERSE;
887 Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op));
888 }
889 SDValue Res =
890 DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops));
891
892 switch (ResTy.getSimpleVT().SimpleTy) {
893 default:
894 return SDValue();
895 case MVT::v16i8:
896 case MVT::v32i8:
897 return Res;
898 case MVT::v8i16:
899 case MVT::v16i16:
900 case MVT::v4i32:
901 case MVT::v8i32: {
902 SmallVector<int, 32> Mask;
903 for (unsigned int i = 0; i < NewEltNum; i++)
904 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
905 Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i);
906 return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask);
907 }
908 }
909}
910
911// Widen element type to get a new mask value (if possible).
912// For example:
913// shufflevector <4 x i32> %a, <4 x i32> %b,
914// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
915// is equivalent to:
916// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
917// can be lowered to:
918// VPACKOD_D vr0, vr0, vr1
919static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
920 SDValue V1, SDValue V2, SelectionDAG &DAG) {
921 unsigned EltBits = VT.getScalarSizeInBits();
922
923 if (EltBits > 32 || EltBits == 1)
924 return SDValue();
925
926 SmallVector<int, 8> NewMask;
927 if (widenShuffleMaskElts(M: Mask, NewMask)) {
928 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * 2)
929 : MVT::getIntegerVT(BitWidth: EltBits * 2);
930 MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / 2);
931 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) {
932 SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1);
933 SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2);
934 return DAG.getBitcast(
935 VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask));
936 }
937 }
938
939 return SDValue();
940}
941
942/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
943/// instruction.
944// The funciton matches elements from one of the input vector shuffled to the
945// left or right with zeroable elements 'shifted in'. It handles both the
946// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
947// lane.
948// Mostly copied from X86.
949static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
950 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
951 int MaskOffset, const APInt &Zeroable) {
952 int Size = Mask.size();
953 unsigned SizeInBits = Size * ScalarSizeInBits;
954
955 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
956 for (int i = 0; i < Size; i += Scale)
957 for (int j = 0; j < Shift; ++j)
958 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
959 return false;
960
961 return true;
962 };
963
964 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
965 int Step = 1) {
966 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
967 if (!(Mask[i] == -1 || Mask[i] == Low))
968 return false;
969 return true;
970 };
971
972 auto MatchShift = [&](int Shift, int Scale, bool Left) {
973 for (int i = 0; i != Size; i += Scale) {
974 unsigned Pos = Left ? i + Shift : i;
975 unsigned Low = Left ? i : i + Shift;
976 unsigned Len = Scale - Shift;
977 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
978 return -1;
979 }
980
981 int ShiftEltBits = ScalarSizeInBits * Scale;
982 bool ByteShift = ShiftEltBits > 64;
983 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
984 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
985 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
986
987 // Normalize the scale for byte shifts to still produce an i64 element
988 // type.
989 Scale = ByteShift ? Scale / 2 : Scale;
990
991 // We need to round trip through the appropriate type for the shift.
992 MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale);
993 ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / 8)
994 : MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale);
995 return (int)ShiftAmt;
996 };
997
998 unsigned MaxWidth = 128;
999 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1000 for (int Shift = 1; Shift != Scale; ++Shift)
1001 for (bool Left : {true, false})
1002 if (CheckZeros(Shift, Scale, Left)) {
1003 int ShiftAmt = MatchShift(Shift, Scale, Left);
1004 if (0 < ShiftAmt)
1005 return ShiftAmt;
1006 }
1007
1008 // no match
1009 return -1;
1010}
1011
1012/// Lower VECTOR_SHUFFLE as shift (if possible).
1013///
1014/// For example:
1015/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1016/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1017/// is lowered to:
1018/// (VBSLL_V $v0, $v0, 4)
1019///
1020/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1021/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1022/// is lowered to:
1023/// (VSLLI_D $v0, $v0, 32)
1024static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
1025 MVT VT, SDValue V1, SDValue V2,
1026 SelectionDAG &DAG,
1027 const APInt &Zeroable) {
1028 int Size = Mask.size();
1029 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1030
1031 MVT ShiftVT;
1032 SDValue V = V1;
1033 unsigned Opcode;
1034
1035 // Try to match shuffle against V1 shift.
1036 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1037 Mask, MaskOffset: 0, Zeroable);
1038
1039 // If V1 failed, try to match shuffle against V2 shift.
1040 if (ShiftAmt < 0) {
1041 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(),
1042 Mask, MaskOffset: Size, Zeroable);
1043 V = V2;
1044 }
1045
1046 if (ShiftAmt < 0)
1047 return SDValue();
1048
1049 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1050 "Illegal integer vector type");
1051 V = DAG.getBitcast(VT: ShiftVT, V);
1052 V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V,
1053 N2: DAG.getConstant(Val: ShiftAmt, DL, VT: MVT::i64));
1054 return DAG.getBitcast(VT, V);
1055}
1056
1057/// Determine whether a range fits a regular pattern of values.
1058/// This function accounts for the possibility of jumping over the End iterator.
1059template <typename ValType>
1060static bool
1061fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
1062 unsigned CheckStride,
1063 typename SmallVectorImpl<ValType>::const_iterator End,
1064 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1065 auto &I = Begin;
1066
1067 while (I != End) {
1068 if (*I != -1 && *I != ExpectedIndex)
1069 return false;
1070 ExpectedIndex += ExpectedIndexStride;
1071
1072 // Incrementing past End is undefined behaviour so we must increment one
1073 // step at a time and check for End at each step.
1074 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1075 ; // Empty loop body.
1076 }
1077 return true;
1078}
1079
1080/// Compute whether each element of a shuffle is zeroable.
1081///
1082/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1083static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
1084 SDValue V2, APInt &KnownUndef,
1085 APInt &KnownZero) {
1086 int Size = Mask.size();
1087 KnownUndef = KnownZero = APInt::getZero(numBits: Size);
1088
1089 V1 = peekThroughBitcasts(V: V1);
1090 V2 = peekThroughBitcasts(V: V2);
1091
1092 bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode());
1093 bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode());
1094
1095 int VectorSizeInBits = V1.getValueSizeInBits();
1096 int ScalarSizeInBits = VectorSizeInBits / Size;
1097 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1098 (void)ScalarSizeInBits;
1099
1100 for (int i = 0; i < Size; ++i) {
1101 int M = Mask[i];
1102 if (M < 0) {
1103 KnownUndef.setBit(i);
1104 continue;
1105 }
1106 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1107 KnownZero.setBit(i);
1108 continue;
1109 }
1110 }
1111}
1112
1113/// Test whether a shuffle mask is equivalent within each sub-lane.
1114///
1115/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1116/// non-trivial to compute in the face of undef lanes. The representation is
1117/// suitable for use with existing 128-bit shuffles as entries from the second
1118/// vector have been remapped to [LaneSize, 2*LaneSize).
1119static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1120 ArrayRef<int> Mask,
1121 SmallVectorImpl<int> &RepeatedMask) {
1122 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1123 RepeatedMask.assign(NumElts: LaneSize, Elt: -1);
1124 int Size = Mask.size();
1125 for (int i = 0; i < Size; ++i) {
1126 assert(Mask[i] == -1 || Mask[i] >= 0);
1127 if (Mask[i] < 0)
1128 continue;
1129 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1130 // This entry crosses lanes, so there is no way to model this shuffle.
1131 return false;
1132
1133 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1134 // Adjust second vector indices to start at LaneSize instead of Size.
1135 int LocalM =
1136 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1137 if (RepeatedMask[i % LaneSize] < 0)
1138 // This is the first non-undef entry in this slot of a 128-bit lane.
1139 RepeatedMask[i % LaneSize] = LocalM;
1140 else if (RepeatedMask[i % LaneSize] != LocalM)
1141 // Found a mismatch with the repeated mask.
1142 return false;
1143 }
1144 return true;
1145}
1146
1147/// Attempts to match vector shuffle as byte rotation.
1148static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
1149 ArrayRef<int> Mask) {
1150
1151 SDValue Lo, Hi;
1152 SmallVector<int, 16> RepeatedMask;
1153
1154 if (!isRepeatedShuffleMask(LaneSizeInBits: 128, VT, Mask, RepeatedMask))
1155 return -1;
1156
1157 int NumElts = RepeatedMask.size();
1158 int Rotation = 0;
1159 int Scale = 16 / NumElts;
1160
1161 for (int i = 0; i < NumElts; ++i) {
1162 int M = RepeatedMask[i];
1163 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1164 "Unexpected mask index.");
1165 if (M < 0)
1166 continue;
1167
1168 // Determine where a rotated vector would have started.
1169 int StartIdx = i - (M % NumElts);
1170 if (StartIdx == 0)
1171 return -1;
1172
1173 // If we found the tail of a vector the rotation must be the missing
1174 // front. If we found the head of a vector, it must be how much of the
1175 // head.
1176 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1177
1178 if (Rotation == 0)
1179 Rotation = CandidateRotation;
1180 else if (Rotation != CandidateRotation)
1181 return -1;
1182
1183 // Compute which value this mask is pointing at.
1184 SDValue MaskV = M < NumElts ? V1 : V2;
1185
1186 // Compute which of the two target values this index should be assigned
1187 // to. This reflects whether the high elements are remaining or the low
1188 // elements are remaining.
1189 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1190
1191 // Either set up this value if we've not encountered it before, or check
1192 // that it remains consistent.
1193 if (!TargetV)
1194 TargetV = MaskV;
1195 else if (TargetV != MaskV)
1196 return -1;
1197 }
1198
1199 // Check that we successfully analyzed the mask, and normalize the results.
1200 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1201 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1202 if (!Lo)
1203 Lo = Hi;
1204 else if (!Hi)
1205 Hi = Lo;
1206
1207 V1 = Lo;
1208 V2 = Hi;
1209
1210 return Rotation * Scale;
1211}
1212
1213/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1214///
1215/// For example:
1216/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1217/// <2 x i32> <i32 3, i32 0>
1218/// is lowered to:
1219/// (VBSRL_V $v1, $v1, 8)
1220/// (VBSLL_V $v0, $v0, 8)
1221/// (VOR_V $v0, $V0, $v1)
1222static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL,
1223 ArrayRef<int> Mask, MVT VT,
1224 SDValue V1, SDValue V2,
1225 SelectionDAG &DAG) {
1226
1227 SDValue Lo = V1, Hi = V2;
1228 int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask);
1229 if (ByteRotation <= 0)
1230 return SDValue();
1231
1232 MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / 8);
1233 Lo = DAG.getBitcast(VT: ByteVT, V: Lo);
1234 Hi = DAG.getBitcast(VT: ByteVT, V: Hi);
1235
1236 int LoByteShift = 16 - ByteRotation;
1237 int HiByteShift = ByteRotation;
1238
1239 SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo,
1240 N2: DAG.getConstant(Val: LoByteShift, DL, VT: MVT::i64));
1241 SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi,
1242 N2: DAG.getConstant(Val: HiByteShift, DL, VT: MVT::i64));
1243 return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift));
1244}
1245
1246/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1247///
1248/// For example:
1249/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1250/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1251/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1252/// is lowered to:
1253/// (VREPLI $v1, 0)
1254/// (VILVL $v0, $v1, $v0)
1255static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
1256 ArrayRef<int> Mask, MVT VT,
1257 SDValue V1, SDValue V2,
1258 SelectionDAG &DAG,
1259 const APInt &Zeroable) {
1260 int Bits = VT.getSizeInBits();
1261 int EltBits = VT.getScalarSizeInBits();
1262 int NumElements = VT.getVectorNumElements();
1263
1264 if (Zeroable.isAllOnes())
1265 return DAG.getConstant(Val: 0, DL, VT);
1266
1267 // Define a helper function to check a particular ext-scale and lower to it if
1268 // valid.
1269 auto Lower = [&](int Scale) -> SDValue {
1270 SDValue InputV;
1271 bool AnyExt = true;
1272 int Offset = 0;
1273 for (int i = 0; i < NumElements; i++) {
1274 int M = Mask[i];
1275 if (M < 0)
1276 continue;
1277 if (i % Scale != 0) {
1278 // Each of the extended elements need to be zeroable.
1279 if (!Zeroable[i])
1280 return SDValue();
1281
1282 AnyExt = false;
1283 continue;
1284 }
1285
1286 // Each of the base elements needs to be consecutive indices into the
1287 // same input vector.
1288 SDValue V = M < NumElements ? V1 : V2;
1289 M = M % NumElements;
1290 if (!InputV) {
1291 InputV = V;
1292 Offset = M - (i / Scale);
1293
1294 // These offset can't be handled
1295 if (Offset % (NumElements / Scale))
1296 return SDValue();
1297 } else if (InputV != V)
1298 return SDValue();
1299
1300 if (M != (Offset + (i / Scale)))
1301 return SDValue(); // Non-consecutive strided elements.
1302 }
1303
1304 // If we fail to find an input, we have a zero-shuffle which should always
1305 // have already been handled.
1306 if (!InputV)
1307 return SDValue();
1308
1309 do {
1310 unsigned VilVLoHi = LoongArchISD::VILVL;
1311 if (Offset >= (NumElements / 2)) {
1312 VilVLoHi = LoongArchISD::VILVH;
1313 Offset -= (NumElements / 2);
1314 }
1315
1316 MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements);
1317 SDValue Ext =
1318 AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: 0, DL, VT: InputVT);
1319 InputV = DAG.getBitcast(VT: InputVT, V: InputV);
1320 InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV);
1321 Scale /= 2;
1322 EltBits *= 2;
1323 NumElements /= 2;
1324 } while (Scale > 1);
1325 return DAG.getBitcast(VT, V: InputV);
1326 };
1327
1328 // Each iteration, try extending the elements half as much, but into twice as
1329 // many elements.
1330 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1331 NumExtElements *= 2) {
1332 if (SDValue V = Lower(NumElements / NumExtElements))
1333 return V;
1334 }
1335 return SDValue();
1336}
1337
1338/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1339///
1340/// VREPLVEI performs vector broadcast based on an element specified by an
1341/// integer immediate, with its mask being similar to:
1342/// <x, x, x, ...>
1343/// where x is any valid index.
1344///
1345/// When undef's appear in the mask they are treated as if they were whatever
1346/// value is necessary in order to fit the above form.
1347static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
1348 MVT VT, SDValue V1, SDValue V2,
1349 SelectionDAG &DAG) {
1350 int SplatIndex = -1;
1351 for (const auto &M : Mask) {
1352 if (M != -1) {
1353 SplatIndex = M;
1354 break;
1355 }
1356 }
1357
1358 if (SplatIndex == -1)
1359 return DAG.getUNDEF(VT);
1360
1361 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1362 if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) {
1363 APInt Imm(64, SplatIndex);
1364 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1365 N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1366 }
1367
1368 return SDValue();
1369}
1370
1371/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1372///
1373/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1374/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1375///
1376/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1377/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1378/// When undef's appear they are treated as if they were whatever value is
1379/// necessary in order to fit the above forms.
1380///
1381/// For example:
1382/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1383/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1384/// i32 7, i32 6, i32 5, i32 4>
1385/// is lowered to:
1386/// (VSHUF4I_H $v0, $v1, 27)
1387/// where the 27 comes from:
1388/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1389static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1390 MVT VT, SDValue V1, SDValue V2,
1391 SelectionDAG &DAG) {
1392
1393 unsigned SubVecSize = 4;
1394 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1395 SubVecSize = 2;
1396
1397 int SubMask[4] = {-1, -1, -1, -1};
1398 for (unsigned i = 0; i < SubVecSize; ++i) {
1399 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1400 int M = Mask[j];
1401
1402 // Convert from vector index to 4-element subvector index
1403 // If an index refers to an element outside of the subvector then give up
1404 if (M != -1) {
1405 M -= 4 * (j / SubVecSize);
1406 if (M < 0 || M >= 4)
1407 return SDValue();
1408 }
1409
1410 // If the mask has an undef, replace it with the current index.
1411 // Note that it might still be undef if the current index is also undef
1412 if (SubMask[i] == -1)
1413 SubMask[i] = M;
1414 // Check that non-undef values are the same as in the mask. If they
1415 // aren't then give up
1416 else if (M != -1 && M != SubMask[i])
1417 return SDValue();
1418 }
1419 }
1420
1421 // Calculate the immediate. Replace any remaining undefs with zero
1422 APInt Imm(64, 0);
1423 for (int i = SubVecSize - 1; i >= 0; --i) {
1424 int M = SubMask[i];
1425
1426 if (M == -1)
1427 M = 0;
1428
1429 Imm <<= 2;
1430 Imm |= M & 0x3;
1431 }
1432
1433 // Return vshuf4i.d
1434 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1435 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1, N2: V2,
1436 N3: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1437
1438 return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1,
1439 N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1440}
1441
1442/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1443///
1444/// VPACKEV interleaves the even elements from each vector.
1445///
1446/// It is possible to lower into VPACKEV when the mask consists of two of the
1447/// following forms interleaved:
1448/// <0, 2, 4, ...>
1449/// <n, n+2, n+4, ...>
1450/// where n is the number of elements in the vector.
1451/// For example:
1452/// <0, 0, 2, 2, 4, 4, ...>
1453/// <0, n, 2, n+2, 4, n+4, ...>
1454///
1455/// When undef's appear in the mask they are treated as if they were whatever
1456/// value is necessary in order to fit the above forms.
1457static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1458 MVT VT, SDValue V1, SDValue V2,
1459 SelectionDAG &DAG) {
1460
1461 const auto &Begin = Mask.begin();
1462 const auto &End = Mask.end();
1463 SDValue OriV1 = V1, OriV2 = V2;
1464
1465 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1466 V1 = OriV1;
1467 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1468 V1 = OriV2;
1469 else
1470 return SDValue();
1471
1472 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1473 V2 = OriV1;
1474 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1475 V2 = OriV2;
1476 else
1477 return SDValue();
1478
1479 return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1);
1480}
1481
1482/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1483///
1484/// VPACKOD interleaves the odd elements from each vector.
1485///
1486/// It is possible to lower into VPACKOD when the mask consists of two of the
1487/// following forms interleaved:
1488/// <1, 3, 5, ...>
1489/// <n+1, n+3, n+5, ...>
1490/// where n is the number of elements in the vector.
1491/// For example:
1492/// <1, 1, 3, 3, 5, 5, ...>
1493/// <1, n+1, 3, n+3, 5, n+5, ...>
1494///
1495/// When undef's appear in the mask they are treated as if they were whatever
1496/// value is necessary in order to fit the above forms.
1497static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1498 MVT VT, SDValue V1, SDValue V2,
1499 SelectionDAG &DAG) {
1500
1501 const auto &Begin = Mask.begin();
1502 const auto &End = Mask.end();
1503 SDValue OriV1 = V1, OriV2 = V2;
1504
1505 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1506 V1 = OriV1;
1507 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1508 V1 = OriV2;
1509 else
1510 return SDValue();
1511
1512 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1513 V2 = OriV1;
1514 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1515 V2 = OriV2;
1516 else
1517 return SDValue();
1518
1519 return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1);
1520}
1521
1522/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1523///
1524/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1525/// of each vector.
1526///
1527/// It is possible to lower into VILVH when the mask consists of two of the
1528/// following forms interleaved:
1529/// <x, x+1, x+2, ...>
1530/// <n+x, n+x+1, n+x+2, ...>
1531/// where n is the number of elements in the vector and x is half n.
1532/// For example:
1533/// <x, x, x+1, x+1, x+2, x+2, ...>
1534/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1535///
1536/// When undef's appear in the mask they are treated as if they were whatever
1537/// value is necessary in order to fit the above forms.
1538static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
1539 MVT VT, SDValue V1, SDValue V2,
1540 SelectionDAG &DAG) {
1541
1542 const auto &Begin = Mask.begin();
1543 const auto &End = Mask.end();
1544 unsigned HalfSize = Mask.size() / 2;
1545 SDValue OriV1 = V1, OriV2 = V2;
1546
1547 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
1548 V1 = OriV1;
1549 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
1550 V1 = OriV2;
1551 else
1552 return SDValue();
1553
1554 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
1555 V2 = OriV1;
1556 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize,
1557 ExpectedIndexStride: 1))
1558 V2 = OriV2;
1559 else
1560 return SDValue();
1561
1562 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
1563}
1564
1565/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1566///
1567/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1568/// of each vector.
1569///
1570/// It is possible to lower into VILVL when the mask consists of two of the
1571/// following forms interleaved:
1572/// <0, 1, 2, ...>
1573/// <n, n+1, n+2, ...>
1574/// where n is the number of elements in the vector.
1575/// For example:
1576/// <0, 0, 1, 1, 2, 2, ...>
1577/// <0, n, 1, n+1, 2, n+2, ...>
1578///
1579/// When undef's appear in the mask they are treated as if they were whatever
1580/// value is necessary in order to fit the above forms.
1581static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
1582 MVT VT, SDValue V1, SDValue V2,
1583 SelectionDAG &DAG) {
1584
1585 const auto &Begin = Mask.begin();
1586 const auto &End = Mask.end();
1587 SDValue OriV1 = V1, OriV2 = V2;
1588
1589 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
1590 V1 = OriV1;
1591 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
1592 V1 = OriV2;
1593 else
1594 return SDValue();
1595
1596 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
1597 V2 = OriV1;
1598 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1))
1599 V2 = OriV2;
1600 else
1601 return SDValue();
1602
1603 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
1604}
1605
1606/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1607///
1608/// VPICKEV copies the even elements of each vector into the result vector.
1609///
1610/// It is possible to lower into VPICKEV when the mask consists of two of the
1611/// following forms concatenated:
1612/// <0, 2, 4, ...>
1613/// <n, n+2, n+4, ...>
1614/// where n is the number of elements in the vector.
1615/// For example:
1616/// <0, 2, 4, ..., 0, 2, 4, ...>
1617/// <0, 2, 4, ..., n, n+2, n+4, ...>
1618///
1619/// When undef's appear in the mask they are treated as if they were whatever
1620/// value is necessary in order to fit the above forms.
1621static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1622 MVT VT, SDValue V1, SDValue V2,
1623 SelectionDAG &DAG) {
1624
1625 const auto &Begin = Mask.begin();
1626 const auto &Mid = Mask.begin() + Mask.size() / 2;
1627 const auto &End = Mask.end();
1628 SDValue OriV1 = V1, OriV2 = V2;
1629
1630 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
1631 V1 = OriV1;
1632 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1633 V1 = OriV2;
1634 else
1635 return SDValue();
1636
1637 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
1638 V2 = OriV1;
1639 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2))
1640 V2 = OriV2;
1641
1642 else
1643 return SDValue();
1644
1645 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1649///
1650/// VPICKOD copies the odd elements of each vector into the result vector.
1651///
1652/// It is possible to lower into VPICKOD when the mask consists of two of the
1653/// following forms concatenated:
1654/// <1, 3, 5, ...>
1655/// <n+1, n+3, n+5, ...>
1656/// where n is the number of elements in the vector.
1657/// For example:
1658/// <1, 3, 5, ..., 1, 3, 5, ...>
1659/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1660///
1661/// When undef's appear in the mask they are treated as if they were whatever
1662/// value is necessary in order to fit the above forms.
1663static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1664 MVT VT, SDValue V1, SDValue V2,
1665 SelectionDAG &DAG) {
1666
1667 const auto &Begin = Mask.begin();
1668 const auto &Mid = Mask.begin() + Mask.size() / 2;
1669 const auto &End = Mask.end();
1670 SDValue OriV1 = V1, OriV2 = V2;
1671
1672 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
1673 V1 = OriV1;
1674 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1675 V1 = OriV2;
1676 else
1677 return SDValue();
1678
1679 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
1680 V2 = OriV1;
1681 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2))
1682 V2 = OriV2;
1683 else
1684 return SDValue();
1685
1686 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
1687}
1688
1689/// Lower VECTOR_SHUFFLE into VSHUF.
1690///
1691/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1692/// adding it as an operand to the resulting VSHUF.
1693static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1694 MVT VT, SDValue V1, SDValue V2,
1695 SelectionDAG &DAG) {
1696
1697 SmallVector<SDValue, 16> Ops;
1698 for (auto M : Mask)
1699 Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64));
1700
1701 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1702 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
1703
1704 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1705 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1706 // VSHF concatenates the vectors in a bitwise fashion:
1707 // <0b00, 0b01> + <0b10, 0b11> ->
1708 // 0b0100 + 0b1110 -> 0b01001110
1709 // <0b10, 0b11, 0b00, 0b01>
1710 // We must therefore swap the operands to get the correct result.
1711 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
1712}
1713
1714/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1715///
1716/// This routine breaks down the specific type of 128-bit shuffle and
1717/// dispatches to the lowering routines accordingly.
1718static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1719 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1720 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1721 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1722 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1723 "Vector type is unsupported for lsx!");
1724 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1725 "Two operands have different types!");
1726 assert(VT.getVectorNumElements() == Mask.size() &&
1727 "Unexpected mask size for shuffle!");
1728 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1729
1730 APInt KnownUndef, KnownZero;
1731 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1732 APInt Zeroable = KnownUndef | KnownZero;
1733
1734 SDValue Result;
1735 // TODO: Add more comparison patterns.
1736 if (V2.isUndef()) {
1737 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
1738 return Result;
1739 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1740 return Result;
1741
1742 // TODO: This comment may be enabled in the future to better match the
1743 // pattern for instruction selection.
1744 /* V2 = V1; */
1745 }
1746
1747 // It is recommended not to change the pattern comparison order for better
1748 // performance.
1749 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1750 return Result;
1751 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1752 return Result;
1753 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1754 return Result;
1755 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1756 return Result;
1757 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1758 return Result;
1759 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1760 return Result;
1761 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1762 (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1763 return Result;
1764 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1765 Zeroable)))
1766 return Result;
1767 if ((Result =
1768 lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
1769 return Result;
1770 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
1771 return Result;
1772 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1773 return NewShuffle;
1774 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1775 return Result;
1776 return SDValue();
1777}
1778
1779/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1780///
1781/// It is a XVREPLVEI when the mask is:
1782/// <x, x, x, ..., x+n, x+n, x+n, ...>
1783/// where the number of x is equal to n and n is half the length of vector.
1784///
1785/// When undef's appear in the mask they are treated as if they were whatever
1786/// value is necessary in order to fit the above form.
1787static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
1788 ArrayRef<int> Mask, MVT VT,
1789 SDValue V1, SDValue V2,
1790 SelectionDAG &DAG) {
1791 int SplatIndex = -1;
1792 for (const auto &M : Mask) {
1793 if (M != -1) {
1794 SplatIndex = M;
1795 break;
1796 }
1797 }
1798
1799 if (SplatIndex == -1)
1800 return DAG.getUNDEF(VT);
1801
1802 const auto &Begin = Mask.begin();
1803 const auto &End = Mask.end();
1804 unsigned HalfSize = Mask.size() / 2;
1805
1806 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1807 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) &&
1808 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize,
1809 ExpectedIndexStride: 0)) {
1810 APInt Imm(64, SplatIndex);
1811 return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1,
1812 N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64));
1813 }
1814
1815 return SDValue();
1816}
1817
1818/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1819static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
1820 MVT VT, SDValue V1, SDValue V2,
1821 SelectionDAG &DAG) {
1822 // When the size is less than or equal to 4, lower cost instructions may be
1823 // used.
1824 if (Mask.size() <= 4)
1825 return SDValue();
1826 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1827}
1828
1829/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1830static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
1831 MVT VT, SDValue V1, SDValue V2,
1832 SelectionDAG &DAG) {
1833 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1834}
1835
1836/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1837static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
1838 MVT VT, SDValue V1, SDValue V2,
1839 SelectionDAG &DAG) {
1840 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1841}
1842
1843/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1844static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
1845 MVT VT, SDValue V1, SDValue V2,
1846 SelectionDAG &DAG) {
1847
1848 const auto &Begin = Mask.begin();
1849 const auto &End = Mask.end();
1850 unsigned HalfSize = Mask.size() / 2;
1851 unsigned LeftSize = HalfSize / 2;
1852 SDValue OriV1 = V1, OriV2 = V2;
1853
1854 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
1855 ExpectedIndexStride: 1) &&
1856 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1))
1857 V1 = OriV1;
1858 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize,
1859 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
1860 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
1861 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
1862 V1 = OriV2;
1863 else
1864 return SDValue();
1865
1866 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize,
1867 ExpectedIndexStride: 1) &&
1868 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize,
1869 ExpectedIndexStride: 1))
1870 V2 = OriV1;
1871 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize,
1872 ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) &&
1873 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
1874 ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1))
1875 V2 = OriV2;
1876 else
1877 return SDValue();
1878
1879 return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1);
1880}
1881
1882/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1883static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
1884 MVT VT, SDValue V1, SDValue V2,
1885 SelectionDAG &DAG) {
1886
1887 const auto &Begin = Mask.begin();
1888 const auto &End = Mask.end();
1889 unsigned HalfSize = Mask.size() / 2;
1890 SDValue OriV1 = V1, OriV2 = V2;
1891
1892 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
1893 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
1894 V1 = OriV1;
1895 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) &&
1896 fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End,
1897 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
1898 V1 = OriV2;
1899 else
1900 return SDValue();
1901
1902 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) &&
1903 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
1904 V2 = OriV1;
1905 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(),
1906 ExpectedIndexStride: 1) &&
1907 fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End,
1908 ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1))
1909 V2 = OriV2;
1910 else
1911 return SDValue();
1912
1913 return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1);
1914}
1915
1916/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1917static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1918 MVT VT, SDValue V1, SDValue V2,
1919 SelectionDAG &DAG) {
1920
1921 const auto &Begin = Mask.begin();
1922 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1923 const auto &Mid = Mask.begin() + Mask.size() / 2;
1924 const auto &RightMid = Mask.end() - Mask.size() / 4;
1925 const auto &End = Mask.end();
1926 unsigned HalfSize = Mask.size() / 2;
1927 SDValue OriV1 = V1, OriV2 = V2;
1928
1929 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
1930 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
1931 V1 = OriV1;
1932 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
1933 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
1934 V1 = OriV2;
1935 else
1936 return SDValue();
1937
1938 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) &&
1939 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2))
1940 V2 = OriV1;
1941 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) &&
1942 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2))
1943 V2 = OriV2;
1944
1945 else
1946 return SDValue();
1947
1948 return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1);
1949}
1950
1951/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1952static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1953 MVT VT, SDValue V1, SDValue V2,
1954 SelectionDAG &DAG) {
1955
1956 const auto &Begin = Mask.begin();
1957 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1958 const auto &Mid = Mask.begin() + Mask.size() / 2;
1959 const auto &RightMid = Mask.end() - Mask.size() / 4;
1960 const auto &End = Mask.end();
1961 unsigned HalfSize = Mask.size() / 2;
1962 SDValue OriV1 = V1, OriV2 = V2;
1963
1964 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
1965 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
1966 V1 = OriV1;
1967 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
1968 fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1,
1969 ExpectedIndexStride: 2))
1970 V1 = OriV2;
1971 else
1972 return SDValue();
1973
1974 if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) &&
1975 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2))
1976 V2 = OriV1;
1977 else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) &&
1978 fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1,
1979 ExpectedIndexStride: 2))
1980 V2 = OriV2;
1981 else
1982 return SDValue();
1983
1984 return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1);
1985}
1986
1987/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1988static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1989 MVT VT, SDValue V1, SDValue V2,
1990 SelectionDAG &DAG) {
1991
1992 int MaskSize = Mask.size();
1993 int HalfSize = Mask.size() / 2;
1994 const auto &Begin = Mask.begin();
1995 const auto &Mid = Mask.begin() + HalfSize;
1996 const auto &End = Mask.end();
1997
1998 // VECTOR_SHUFFLE concatenates the vectors:
1999 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2000 // shuffling ->
2001 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2002 //
2003 // XVSHUF concatenates the vectors:
2004 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2005 // shuffling ->
2006 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2007 SmallVector<SDValue, 8> MaskAlloc;
2008 for (auto it = Begin; it < Mid; it++) {
2009 if (*it < 0) // UNDEF
2010 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
2011 else if ((*it >= 0 && *it < HalfSize) ||
2012 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2013 int M = *it < HalfSize ? *it : *it - HalfSize;
2014 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2015 } else
2016 return SDValue();
2017 }
2018 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2019
2020 for (auto it = Mid; it < End; it++) {
2021 if (*it < 0) // UNDEF
2022 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64));
2023 else if ((*it >= HalfSize && *it < MaskSize) ||
2024 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2025 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2026 MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64));
2027 } else
2028 return SDValue();
2029 }
2030 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2031
2032 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2033 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc);
2034 return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1);
2035}
2036
2037/// Shuffle vectors by lane to generate more optimized instructions.
2038/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2039///
2040/// Therefore, except for the following four cases, other cases are regarded
2041/// as cross-lane shuffles, where optimization is relatively limited.
2042///
2043/// - Shuffle high, low lanes of two inputs vector
2044/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2045/// - Shuffle low, high lanes of two inputs vector
2046/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2047/// - Shuffle low, low lanes of two inputs vector
2048/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2049/// - Shuffle high, high lanes of two inputs vector
2050/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2051///
2052/// The first case is the closest to LoongArch instructions and the other
2053/// cases need to be converted to it for processing.
2054///
2055/// This function may modify V1, V2 and Mask
2056static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
2057 MutableArrayRef<int> Mask, MVT VT,
2058 SDValue &V1, SDValue &V2,
2059 SelectionDAG &DAG) {
2060
2061 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2062
2063 int MaskSize = Mask.size();
2064 int HalfSize = Mask.size() / 2;
2065
2066 HalfMaskType preMask = None, postMask = None;
2067
2068 if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2069 return M < 0 || (M >= 0 && M < HalfSize) ||
2070 (M >= MaskSize && M < MaskSize + HalfSize);
2071 }))
2072 preMask = HighLaneTy;
2073 else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) {
2074 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2075 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2076 }))
2077 preMask = LowLaneTy;
2078
2079 if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2080 return M < 0 || (M >= 0 && M < HalfSize) ||
2081 (M >= MaskSize && M < MaskSize + HalfSize);
2082 }))
2083 postMask = HighLaneTy;
2084 else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) {
2085 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2086 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2087 }))
2088 postMask = LowLaneTy;
2089
2090 // The pre-half of mask is high lane type, and the post-half of mask
2091 // is low lane type, which is closest to the LoongArch instructions.
2092 //
2093 // Note: In the LoongArch architecture, the high lane of mask corresponds
2094 // to the lower 128-bit of vector register, and the low lane of mask
2095 // corresponds the higher 128-bit of vector register.
2096 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2097 return;
2098 }
2099 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2100 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2101 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2102 N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64));
2103 V1 = DAG.getBitcast(VT, V: V1);
2104
2105 if (!V2.isUndef()) {
2106 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2107 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2108 N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64));
2109 V2 = DAG.getBitcast(VT, V: V2);
2110 }
2111
2112 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2113 *it = *it < 0 ? *it : *it - HalfSize;
2114 }
2115 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2116 *it = *it < 0 ? *it : *it + HalfSize;
2117 }
2118 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2119 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2120 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2121 N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64));
2122 V1 = DAG.getBitcast(VT, V: V1);
2123
2124 if (!V2.isUndef()) {
2125 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2126 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2127 N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64));
2128 V2 = DAG.getBitcast(VT, V: V2);
2129 }
2130
2131 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2132 *it = *it < 0 ? *it : *it - HalfSize;
2133 }
2134 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2135 V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2136 V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1,
2137 N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64));
2138 V1 = DAG.getBitcast(VT, V: V1);
2139
2140 if (!V2.isUndef()) {
2141 V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2);
2142 V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2,
2143 N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64));
2144 V2 = DAG.getBitcast(VT, V: V2);
2145 }
2146
2147 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2148 *it = *it < 0 ? *it : *it + HalfSize;
2149 }
2150 } else { // cross-lane
2151 return;
2152 }
2153}
2154
2155/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2156/// Only for 256-bit vector.
2157///
2158/// For example:
2159/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2160/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2161/// is lowerded to:
2162/// (XVPERMI $xr2, $xr0, 78)
2163/// (XVSHUF $xr1, $xr2, $xr0)
2164/// (XVORI $xr0, $xr1, 0)
2165static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL,
2166 ArrayRef<int> Mask,
2167 MVT VT, SDValue V1,
2168 SDValue V2,
2169 SelectionDAG &DAG) {
2170 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2171 int Size = Mask.size();
2172 int LaneSize = Size / 2;
2173
2174 bool LaneCrossing[2] = {false, false};
2175 for (int i = 0; i < Size; ++i)
2176 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2177 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2178
2179 // Ensure that all lanes ared involved.
2180 if (!LaneCrossing[0] && !LaneCrossing[1])
2181 return SDValue();
2182
2183 SmallVector<int> InLaneMask;
2184 InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end());
2185 for (int i = 0; i < Size; ++i) {
2186 int &M = InLaneMask[i];
2187 if (M < 0)
2188 continue;
2189 if (((M % Size) / LaneSize) != (i / LaneSize))
2190 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2191 }
2192
2193 SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1);
2194 Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped,
2195 N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {2, 3, 0, 1});
2196 Flipped = DAG.getBitcast(VT, V: Flipped);
2197 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask);
2198}
2199
2200/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2201///
2202/// This routine breaks down the specific type of 256-bit shuffle and
2203/// dispatches to the lowering routines accordingly.
2204static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2205 SDValue V1, SDValue V2, SelectionDAG &DAG) {
2206 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2207 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2208 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2209 "Vector type is unsupported for lasx!");
2210 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2211 "Two operands have different types!");
2212 assert(VT.getVectorNumElements() == Mask.size() &&
2213 "Unexpected mask size for shuffle!");
2214 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2215 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2216
2217 // canonicalize non cross-lane shuffle vector
2218 SmallVector<int> NewMask(Mask);
2219 canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG);
2220
2221 APInt KnownUndef, KnownZero;
2222 computeZeroableShuffleElements(Mask: NewMask, V1, V2, KnownUndef, KnownZero);
2223 APInt Zeroable = KnownUndef | KnownZero;
2224
2225 SDValue Result;
2226 // TODO: Add more comparison patterns.
2227 if (V2.isUndef()) {
2228 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG)))
2229 return Result;
2230 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG)))
2231 return Result;
2232 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT,
2233 V1, V2, DAG)))
2234 return Result;
2235
2236 // TODO: This comment may be enabled in the future to better match the
2237 // pattern for instruction selection.
2238 /* V2 = V1; */
2239 }
2240
2241 // It is recommended not to change the pattern comparison order for better
2242 // performance.
2243 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
2244 return Result;
2245 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
2246 return Result;
2247 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG)))
2248 return Result;
2249 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG)))
2250 return Result;
2251 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG)))
2252 return Result;
2253 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG)))
2254 return Result;
2255 if ((Result =
2256 lowerVECTOR_SHUFFLEAsShift(DL, Mask: NewMask, VT, V1, V2, DAG, Zeroable)))
2257 return Result;
2258 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask: NewMask, VT, V1, V2, DAG)))
2259 return Result;
2260 if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG))
2261 return NewShuffle;
2262 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG)))
2263 return Result;
2264
2265 return SDValue();
2266}
2267
2268SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2269 SelectionDAG &DAG) const {
2270 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
2271 ArrayRef<int> OrigMask = SVOp->getMask();
2272 SDValue V1 = Op.getOperand(i: 0);
2273 SDValue V2 = Op.getOperand(i: 1);
2274 MVT VT = Op.getSimpleValueType();
2275 int NumElements = VT.getVectorNumElements();
2276 SDLoc DL(Op);
2277
2278 bool V1IsUndef = V1.isUndef();
2279 bool V2IsUndef = V2.isUndef();
2280 if (V1IsUndef && V2IsUndef)
2281 return DAG.getUNDEF(VT);
2282
2283 // When we create a shuffle node we put the UNDEF node to second operand,
2284 // but in some cases the first operand may be transformed to UNDEF.
2285 // In this case we should just commute the node.
2286 if (V1IsUndef)
2287 return DAG.getCommutedVectorShuffle(SV: *SVOp);
2288
2289 // Check for non-undef masks pointing at an undef vector and make the masks
2290 // undef as well. This makes it easier to match the shuffle based solely on
2291 // the mask.
2292 if (V2IsUndef &&
2293 any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) {
2294 SmallVector<int, 8> NewMask(OrigMask);
2295 for (int &M : NewMask)
2296 if (M >= NumElements)
2297 M = -1;
2298 return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask);
2299 }
2300
2301 // Check for illegal shuffle mask element index values.
2302 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2303 (void)MaskUpperLimit;
2304 assert(llvm::all_of(OrigMask,
2305 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2306 "Out of bounds shuffle index");
2307
2308 // For each vector width, delegate to a specialized lowering routine.
2309 if (VT.is128BitVector())
2310 return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
2311
2312 if (VT.is256BitVector())
2313 return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG);
2314
2315 return SDValue();
2316}
2317
2318SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2319 SelectionDAG &DAG) const {
2320 // Custom lower to ensure the libcall return is passed in an FPR on hard
2321 // float ABIs.
2322 SDLoc DL(Op);
2323 MakeLibCallOptions CallOptions;
2324 SDValue Op0 = Op.getOperand(i: 0);
2325 SDValue Chain = SDValue();
2326 RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16);
2327 SDValue Res;
2328 std::tie(args&: Res, args&: Chain) =
2329 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain);
2330 if (Subtarget.is64Bit())
2331 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2332 return DAG.getBitcast(VT: MVT::i32, V: Res);
2333}
2334
2335SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2336 SelectionDAG &DAG) const {
2337 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2338 // float ABIs.
2339 SDLoc DL(Op);
2340 MakeLibCallOptions CallOptions;
2341 SDValue Op0 = Op.getOperand(i: 0);
2342 SDValue Chain = SDValue();
2343 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2344 DL, VT: MVT::f32, Operand: Op0)
2345 : DAG.getBitcast(VT: MVT::f32, V: Op0);
2346 SDValue Res;
2347 std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg,
2348 CallOptions, dl: DL, Chain);
2349 return Res;
2350}
2351
2352SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2353 SelectionDAG &DAG) const {
2354 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2355 SDLoc DL(Op);
2356 MakeLibCallOptions CallOptions;
2357 RTLIB::Libcall LC =
2358 RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::bf16);
2359 SDValue Res =
2360 makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first;
2361 if (Subtarget.is64Bit())
2362 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res);
2363 return DAG.getBitcast(VT: MVT::i32, V: Res);
2364}
2365
2366SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2367 SelectionDAG &DAG) const {
2368 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2369 MVT VT = Op.getSimpleValueType();
2370 SDLoc DL(Op);
2371 Op = DAG.getNode(
2372 Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0),
2373 N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL));
2374 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64,
2375 DL, VT: MVT::f32, Operand: Op)
2376 : DAG.getBitcast(VT: MVT::f32, V: Op);
2377 if (VT != MVT::f32)
2378 return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res);
2379 return Res;
2380}
2381
2382static bool isConstantOrUndef(const SDValue Op) {
2383 if (Op->isUndef())
2384 return true;
2385 if (isa<ConstantSDNode>(Val: Op))
2386 return true;
2387 if (isa<ConstantFPSDNode>(Val: Op))
2388 return true;
2389 return false;
2390}
2391
2392static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2393 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2394 if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
2395 return true;
2396 return false;
2397}
2398
2399// Lower BUILD_VECTOR as broadcast load (if possible).
2400// For example:
2401// %a = load i8, ptr %ptr
2402// %b = build_vector %a, %a, %a, %a
2403// is lowered to :
2404// (VLDREPL_B $a0, 0)
2405static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
2406 const SDLoc &DL,
2407 SelectionDAG &DAG) {
2408 MVT VT = BVOp->getSimpleValueType(ResNo: 0);
2409 int NumOps = BVOp->getNumOperands();
2410
2411 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2412 "Unsupported vector type for broadcast.");
2413
2414 SDValue IdentitySrc;
2415 bool IsIdeneity = true;
2416
2417 for (int i = 0; i != NumOps; i++) {
2418 SDValue Op = BVOp->getOperand(Num: i);
2419 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2420 IsIdeneity = false;
2421 break;
2422 }
2423 IdentitySrc = BVOp->getOperand(Num: 0);
2424 }
2425
2426 // make sure that this load is valid and only has one user.
2427 if (!IdentitySrc || !BVOp->isOnlyUserOf(N: IdentitySrc.getNode()))
2428 return SDValue();
2429
2430 if (IsIdeneity) {
2431 auto *LN = cast<LoadSDNode>(Val&: IdentitySrc);
2432 SDVTList Tys =
2433 LN->isIndexed()
2434 ? DAG.getVTList(VT1: VT, VT2: LN->getBasePtr().getValueType(), VT3: MVT::Other)
2435 : DAG.getVTList(VT1: VT, VT2: MVT::Other);
2436 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2437 SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops);
2438 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN, 1), To: BCast.getValue(R: 1));
2439 return BCast;
2440 }
2441 return SDValue();
2442}
2443
2444SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2445 SelectionDAG &DAG) const {
2446 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
2447 EVT ResTy = Op->getValueType(ResNo: 0);
2448 SDLoc DL(Op);
2449 APInt SplatValue, SplatUndef;
2450 unsigned SplatBitSize;
2451 bool HasAnyUndefs;
2452 bool Is128Vec = ResTy.is128BitVector();
2453 bool Is256Vec = ResTy.is256BitVector();
2454
2455 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2456 (!Subtarget.hasExtLASX() || !Is256Vec))
2457 return SDValue();
2458
2459 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG))
2460 return Result;
2461
2462 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2463 /*MinSplatBits=*/8) &&
2464 SplatBitSize <= 64) {
2465 // We can only cope with 8, 16, 32, or 64-bit elements.
2466 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2467 SplatBitSize != 64)
2468 return SDValue();
2469
2470 EVT ViaVecTy;
2471
2472 switch (SplatBitSize) {
2473 default:
2474 return SDValue();
2475 case 8:
2476 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2477 break;
2478 case 16:
2479 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2480 break;
2481 case 32:
2482 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2483 break;
2484 case 64:
2485 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2486 break;
2487 }
2488
2489 // SelectionDAG::getConstant will promote SplatValue appropriately.
2490 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
2491
2492 // Bitcast to the type we originally wanted.
2493 if (ViaVecTy != ResTy)
2494 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
2495
2496 return Result;
2497 }
2498
2499 if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false))
2500 return Op;
2501
2502 if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
2503 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2504 // The resulting code is the same length as the expansion, but it doesn't
2505 // use memory operations.
2506 EVT ResTy = Node->getValueType(ResNo: 0);
2507
2508 assert(ResTy.isVector());
2509
2510 unsigned NumElts = ResTy.getVectorNumElements();
2511 SDValue Vector = DAG.getUNDEF(VT: ResTy);
2512 for (unsigned i = 0; i < NumElts; ++i) {
2513 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
2514 N2: Node->getOperand(Num: i),
2515 N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT()));
2516 }
2517 return Vector;
2518 }
2519
2520 return SDValue();
2521}
2522
2523SDValue
2524LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2525 SelectionDAG &DAG) const {
2526 EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0);
2527 SDValue Idx = Op->getOperand(Num: 1);
2528 EVT EltTy = VecTy.getVectorElementType();
2529 unsigned NumElts = VecTy.getVectorNumElements();
2530
2531 if (isa<ConstantSDNode>(Val: Idx) &&
2532 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
2533 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
2534 return Op;
2535
2536 return SDValue();
2537}
2538
2539SDValue
2540LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2541 SelectionDAG &DAG) const {
2542 if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2)))
2543 return Op;
2544 return SDValue();
2545}
2546
2547SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2548 SelectionDAG &DAG) const {
2549 SDLoc DL(Op);
2550 SyncScope::ID FenceSSID =
2551 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
2552
2553 // singlethread fences only synchronize with signal handlers on the same
2554 // thread and thus only need to preserve instruction order, not actually
2555 // enforce memory ordering.
2556 if (FenceSSID == SyncScope::SingleThread)
2557 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2558 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
2559
2560 return Op;
2561}
2562
2563SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2564 SelectionDAG &DAG) const {
2565
2566 if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) {
2567 DAG.getContext()->emitError(
2568 ErrorStr: "On LA64, only 64-bit registers can be written.");
2569 return Op.getOperand(i: 0);
2570 }
2571
2572 if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) {
2573 DAG.getContext()->emitError(
2574 ErrorStr: "On LA32, only 32-bit registers can be written.");
2575 return Op.getOperand(i: 0);
2576 }
2577
2578 return Op;
2579}
2580
2581SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2582 SelectionDAG &DAG) const {
2583 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
2584 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must "
2585 "be a constant integer");
2586 return SDValue();
2587 }
2588
2589 MachineFunction &MF = DAG.getMachineFunction();
2590 MF.getFrameInfo().setFrameAddressIsTaken(true);
2591 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2592 EVT VT = Op.getValueType();
2593 SDLoc DL(Op);
2594 SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT);
2595 unsigned Depth = Op.getConstantOperandVal(i: 0);
2596 int GRLenInBytes = Subtarget.getGRLen() / 8;
2597
2598 while (Depth--) {
2599 int Offset = -(GRLenInBytes * 2);
2600 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr,
2601 N2: DAG.getSignedConstant(Val: Offset, DL, VT));
2602 FrameAddr =
2603 DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo());
2604 }
2605 return FrameAddr;
2606}
2607
2608SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2609 SelectionDAG &DAG) const {
2610 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
2611 return SDValue();
2612
2613 // Currently only support lowering return address for current frame.
2614 if (Op.getConstantOperandVal(i: 0) != 0) {
2615 DAG.getContext()->emitError(
2616 ErrorStr: "return address can only be determined for the current frame");
2617 return SDValue();
2618 }
2619
2620 MachineFunction &MF = DAG.getMachineFunction();
2621 MF.getFrameInfo().setReturnAddressIsTaken(true);
2622 MVT GRLenVT = Subtarget.getGRLenVT();
2623
2624 // Return the value of the return address register, marking it an implicit
2625 // live-in.
2626 Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(),
2627 RC: getRegClassFor(VT: GRLenVT));
2628 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT);
2629}
2630
2631SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
2632 SelectionDAG &DAG) const {
2633 MachineFunction &MF = DAG.getMachineFunction();
2634 auto Size = Subtarget.getGRLen() / 8;
2635 auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false);
2636 return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2637}
2638
2639SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
2640 SelectionDAG &DAG) const {
2641 MachineFunction &MF = DAG.getMachineFunction();
2642 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
2643
2644 SDLoc DL(Op);
2645 SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
2646 VT: getPointerTy(DL: MF.getDataLayout()));
2647
2648 // vastart just stores the address of the VarArgsFrameIndex slot into the
2649 // memory location argument.
2650 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
2651 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1),
2652 PtrInfo: MachinePointerInfo(SV));
2653}
2654
2655SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
2656 SelectionDAG &DAG) const {
2657 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2658 !Subtarget.hasBasicD() && "unexpected target features");
2659
2660 SDLoc DL(Op);
2661 SDValue Op0 = Op.getOperand(i: 0);
2662 if (Op0->getOpcode() == ISD::AND) {
2663 auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1));
2664 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
2665 return Op;
2666 }
2667
2668 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
2669 Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) &&
2670 Op0.getConstantOperandVal(i: 2) == UINT64_C(0))
2671 return Op;
2672
2673 if (Op0.getOpcode() == ISD::AssertZext &&
2674 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32))
2675 return Op;
2676
2677 EVT OpVT = Op0.getValueType();
2678 EVT RetVT = Op.getValueType();
2679 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
2680 MakeLibCallOptions CallOptions;
2681 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
2682 SDValue Chain = SDValue();
2683 SDValue Result;
2684 std::tie(args&: Result, args&: Chain) =
2685 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
2686 return Result;
2687}
2688
2689SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
2690 SelectionDAG &DAG) const {
2691 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
2692 !Subtarget.hasBasicD() && "unexpected target features");
2693
2694 SDLoc DL(Op);
2695 SDValue Op0 = Op.getOperand(i: 0);
2696
2697 if ((Op0.getOpcode() == ISD::AssertSext ||
2698 Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
2699 dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32))
2700 return Op;
2701
2702 EVT OpVT = Op0.getValueType();
2703 EVT RetVT = Op.getValueType();
2704 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
2705 MakeLibCallOptions CallOptions;
2706 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true);
2707 SDValue Chain = SDValue();
2708 SDValue Result;
2709 std::tie(args&: Result, args&: Chain) =
2710 makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain);
2711 return Result;
2712}
2713
2714SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
2715 SelectionDAG &DAG) const {
2716
2717 SDLoc DL(Op);
2718 EVT VT = Op.getValueType();
2719 SDValue Op0 = Op.getOperand(i: 0);
2720 EVT Op0VT = Op0.getValueType();
2721
2722 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
2723 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
2724 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
2725 return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0);
2726 }
2727 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
2728 SDValue Lo, Hi;
2729 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32);
2730 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
2731 }
2732 return Op;
2733}
2734
2735SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
2736 SelectionDAG &DAG) const {
2737
2738 SDLoc DL(Op);
2739 SDValue Op0 = Op.getOperand(i: 0);
2740
2741 if (Op0.getValueType() == MVT::f16)
2742 Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0);
2743
2744 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
2745 !Subtarget.hasBasicD()) {
2746 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0);
2747 return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst);
2748 }
2749
2750 EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits());
2751 SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0);
2752 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc);
2753}
2754
2755static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
2756 SelectionDAG &DAG, unsigned Flags) {
2757 return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags);
2758}
2759
2760static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
2761 SelectionDAG &DAG, unsigned Flags) {
2762 return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(),
2763 TargetFlags: Flags);
2764}
2765
2766static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
2767 SelectionDAG &DAG, unsigned Flags) {
2768 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
2769 Offset: N->getOffset(), TargetFlags: Flags);
2770}
2771
2772static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
2773 SelectionDAG &DAG, unsigned Flags) {
2774 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
2775}
2776
2777template <class NodeTy>
2778SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2779 CodeModel::Model M,
2780 bool IsLocal) const {
2781 SDLoc DL(N);
2782 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2783 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2784 SDValue Load;
2785
2786 switch (M) {
2787 default:
2788 report_fatal_error(reason: "Unsupported code model");
2789
2790 case CodeModel::Large: {
2791 assert(Subtarget.is64Bit() && "Large code model requires LA64");
2792
2793 // This is not actually used, but is necessary for successfully matching
2794 // the PseudoLA_*_LARGE nodes.
2795 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
2796 if (IsLocal) {
2797 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
2798 // eventually becomes the desired 5-insn code sequence.
2799 Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty,
2800 Op1: Tmp, Op2: Addr),
2801 0);
2802 } else {
2803 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
2804 // eventually becomes the desired 5-insn code sequence.
2805 Load = SDValue(
2806 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr),
2807 0);
2808 }
2809 break;
2810 }
2811
2812 case CodeModel::Small:
2813 case CodeModel::Medium:
2814 if (IsLocal) {
2815 // This generates the pattern (PseudoLA_PCREL sym), which expands to
2816 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
2817 Load = SDValue(
2818 DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0);
2819 } else {
2820 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
2821 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
2822 Load =
2823 SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0);
2824 }
2825 }
2826
2827 if (!IsLocal) {
2828 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
2829 MachineFunction &MF = DAG.getMachineFunction();
2830 MachineMemOperand *MemOp = MF.getMachineMemOperand(
2831 PtrInfo: MachinePointerInfo::getGOT(MF),
2832 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
2833 MachineMemOperand::MOInvariant,
2834 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
2835 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp});
2836 }
2837
2838 return Load;
2839}
2840
2841SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
2842 SelectionDAG &DAG) const {
2843 return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG,
2844 M: DAG.getTarget().getCodeModel());
2845}
2846
2847SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
2848 SelectionDAG &DAG) const {
2849 return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG,
2850 M: DAG.getTarget().getCodeModel());
2851}
2852
2853SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
2854 SelectionDAG &DAG) const {
2855 return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG,
2856 M: DAG.getTarget().getCodeModel());
2857}
2858
2859SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
2860 SelectionDAG &DAG) const {
2861 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
2862 assert(N->getOffset() == 0 && "unexpected offset in global node");
2863 auto CM = DAG.getTarget().getCodeModel();
2864 const GlobalValue *GV = N->getGlobal();
2865
2866 if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) {
2867 if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel())
2868 CM = *GCM;
2869 }
2870
2871 return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal());
2872}
2873
2874SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2875 SelectionDAG &DAG,
2876 unsigned Opc, bool UseGOT,
2877 bool Large) const {
2878 SDLoc DL(N);
2879 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2880 MVT GRLenVT = Subtarget.getGRLenVT();
2881
2882 // This is not actually used, but is necessary for successfully matching the
2883 // PseudoLA_*_LARGE nodes.
2884 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
2885 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
2886
2887 // Only IE needs an extra argument for large code model.
2888 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
2889 ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
2890 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
2891
2892 // If it is LE for normal/medium code model, the add tp operation will occur
2893 // during the pseudo-instruction expansion.
2894 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
2895 return Offset;
2896
2897 if (UseGOT) {
2898 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
2899 MachineFunction &MF = DAG.getMachineFunction();
2900 MachineMemOperand *MemOp = MF.getMachineMemOperand(
2901 PtrInfo: MachinePointerInfo::getGOT(MF),
2902 f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
2903 MachineMemOperand::MOInvariant,
2904 MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8));
2905 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp});
2906 }
2907
2908 // Add the thread pointer.
2909 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset,
2910 N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT));
2911}
2912
2913SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2914 SelectionDAG &DAG,
2915 unsigned Opc,
2916 bool Large) const {
2917 SDLoc DL(N);
2918 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2919 IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits());
2920
2921 // This is not actually used, but is necessary for successfully matching the
2922 // PseudoLA_*_LARGE nodes.
2923 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
2924
2925 // Use a PC-relative addressing mode to access the dynamic GOT address.
2926 SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0);
2927 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
2928 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
2929
2930 // Prepare argument list to generate call.
2931 ArgListTy Args;
2932 ArgListEntry Entry;
2933 Entry.Node = Load;
2934 Entry.Ty = CallTy;
2935 Args.push_back(x: Entry);
2936
2937 // Setup call to __tls_get_addr.
2938 TargetLowering::CallLoweringInfo CLI(DAG);
2939 CLI.setDebugLoc(DL)
2940 .setChain(DAG.getEntryNode())
2941 .setLibCallee(CC: CallingConv::C, ResultType: CallTy,
2942 Target: DAG.getExternalSymbol(Sym: "__tls_get_addr", VT: Ty),
2943 ArgsList: std::move(Args));
2944
2945 return LowerCallTo(CLI).first;
2946}
2947
2948SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
2949 SelectionDAG &DAG, unsigned Opc,
2950 bool Large) const {
2951 SDLoc DL(N);
2952 EVT Ty = getPointerTy(DL: DAG.getDataLayout());
2953 const GlobalValue *GV = N->getGlobal();
2954
2955 // This is not actually used, but is necessary for successfully matching the
2956 // PseudoLA_*_LARGE nodes.
2957 SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty);
2958
2959 // Use a PC-relative addressing mode to access the global dynamic GOT address.
2960 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
2961 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0);
2962 return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0)
2963 : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0);
2964}
2965
2966SDValue
2967LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
2968 SelectionDAG &DAG) const {
2969 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
2970 CallingConv::GHC)
2971 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
2972
2973 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
2974 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
2975
2976 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
2977 assert(N->getOffset() == 0 && "unexpected offset in global node");
2978
2979 if (DAG.getTarget().useEmulatedTLS())
2980 reportFatalUsageError(reason: "the emulated TLS is prohibited");
2981
2982 bool IsDesc = DAG.getTarget().useTLSDESC();
2983
2984 switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) {
2985 case TLSModel::GeneralDynamic:
2986 // In this model, application code calls the dynamic linker function
2987 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
2988 // runtime.
2989 if (!IsDesc)
2990 return getDynamicTLSAddr(N, DAG,
2991 Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE
2992 : LoongArch::PseudoLA_TLS_GD,
2993 Large);
2994 break;
2995 case TLSModel::LocalDynamic:
2996 // Same as GeneralDynamic, except for assembly modifiers and relocation
2997 // records.
2998 if (!IsDesc)
2999 return getDynamicTLSAddr(N, DAG,
3000 Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3001 : LoongArch::PseudoLA_TLS_LD,
3002 Large);
3003 break;
3004 case TLSModel::InitialExec:
3005 // This model uses the GOT to resolve TLS offsets.
3006 return getStaticTLSAddr(N, DAG,
3007 Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3008 : LoongArch::PseudoLA_TLS_IE,
3009 /*UseGOT=*/true, Large);
3010 case TLSModel::LocalExec:
3011 // This model is used when static linking as the TLS offsets are resolved
3012 // during program linking.
3013 //
3014 // This node doesn't need an extra argument for the large code model.
3015 return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE,
3016 /*UseGOT=*/false, Large);
3017 }
3018
3019 return getTLSDescAddr(N, DAG,
3020 Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3021 : LoongArch::PseudoLA_TLS_DESC,
3022 Large);
3023}
3024
3025template <unsigned N>
3026static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
3027 SelectionDAG &DAG, bool IsSigned = false) {
3028 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
3029 // Check the ImmArg.
3030 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3031 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3032 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) +
3033 ": argument out of range.");
3034 return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType());
3035 }
3036 return SDValue();
3037}
3038
3039SDValue
3040LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3041 SelectionDAG &DAG) const {
3042 switch (Op.getConstantOperandVal(i: 0)) {
3043 default:
3044 return SDValue(); // Don't custom lower most intrinsics.
3045 case Intrinsic::thread_pointer: {
3046 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3047 return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT);
3048 }
3049 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3050 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3051 case Intrinsic::loongarch_lsx_vreplvei_d:
3052 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3053 return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG);
3054 case Intrinsic::loongarch_lsx_vreplvei_w:
3055 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3056 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3057 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3058 case Intrinsic::loongarch_lasx_xvpickve_d:
3059 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3060 return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG);
3061 case Intrinsic::loongarch_lasx_xvinsve0_d:
3062 return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG);
3063 case Intrinsic::loongarch_lsx_vsat_b:
3064 case Intrinsic::loongarch_lsx_vsat_bu:
3065 case Intrinsic::loongarch_lsx_vrotri_b:
3066 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3067 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3068 case Intrinsic::loongarch_lsx_vsrlri_b:
3069 case Intrinsic::loongarch_lsx_vsrari_b:
3070 case Intrinsic::loongarch_lsx_vreplvei_h:
3071 case Intrinsic::loongarch_lasx_xvsat_b:
3072 case Intrinsic::loongarch_lasx_xvsat_bu:
3073 case Intrinsic::loongarch_lasx_xvrotri_b:
3074 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3075 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3076 case Intrinsic::loongarch_lasx_xvsrlri_b:
3077 case Intrinsic::loongarch_lasx_xvsrari_b:
3078 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3079 case Intrinsic::loongarch_lasx_xvpickve_w:
3080 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3081 return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG);
3082 case Intrinsic::loongarch_lasx_xvinsve0_w:
3083 return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG);
3084 case Intrinsic::loongarch_lsx_vsat_h:
3085 case Intrinsic::loongarch_lsx_vsat_hu:
3086 case Intrinsic::loongarch_lsx_vrotri_h:
3087 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3088 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3089 case Intrinsic::loongarch_lsx_vsrlri_h:
3090 case Intrinsic::loongarch_lsx_vsrari_h:
3091 case Intrinsic::loongarch_lsx_vreplvei_b:
3092 case Intrinsic::loongarch_lasx_xvsat_h:
3093 case Intrinsic::loongarch_lasx_xvsat_hu:
3094 case Intrinsic::loongarch_lasx_xvrotri_h:
3095 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3096 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3097 case Intrinsic::loongarch_lasx_xvsrlri_h:
3098 case Intrinsic::loongarch_lasx_xvsrari_h:
3099 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3100 return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG);
3101 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3102 case Intrinsic::loongarch_lsx_vsrani_b_h:
3103 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3104 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3105 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3106 case Intrinsic::loongarch_lsx_vssrani_b_h:
3107 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3108 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3109 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3110 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3111 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3112 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3113 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3114 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3115 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3116 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3117 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3118 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3119 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3120 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3121 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3122 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3123 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3124 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3125 return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG);
3126 case Intrinsic::loongarch_lsx_vsat_w:
3127 case Intrinsic::loongarch_lsx_vsat_wu:
3128 case Intrinsic::loongarch_lsx_vrotri_w:
3129 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3130 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3131 case Intrinsic::loongarch_lsx_vsrlri_w:
3132 case Intrinsic::loongarch_lsx_vsrari_w:
3133 case Intrinsic::loongarch_lsx_vslei_bu:
3134 case Intrinsic::loongarch_lsx_vslei_hu:
3135 case Intrinsic::loongarch_lsx_vslei_wu:
3136 case Intrinsic::loongarch_lsx_vslei_du:
3137 case Intrinsic::loongarch_lsx_vslti_bu:
3138 case Intrinsic::loongarch_lsx_vslti_hu:
3139 case Intrinsic::loongarch_lsx_vslti_wu:
3140 case Intrinsic::loongarch_lsx_vslti_du:
3141 case Intrinsic::loongarch_lsx_vbsll_v:
3142 case Intrinsic::loongarch_lsx_vbsrl_v:
3143 case Intrinsic::loongarch_lasx_xvsat_w:
3144 case Intrinsic::loongarch_lasx_xvsat_wu:
3145 case Intrinsic::loongarch_lasx_xvrotri_w:
3146 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3147 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3148 case Intrinsic::loongarch_lasx_xvsrlri_w:
3149 case Intrinsic::loongarch_lasx_xvsrari_w:
3150 case Intrinsic::loongarch_lasx_xvslei_bu:
3151 case Intrinsic::loongarch_lasx_xvslei_hu:
3152 case Intrinsic::loongarch_lasx_xvslei_wu:
3153 case Intrinsic::loongarch_lasx_xvslei_du:
3154 case Intrinsic::loongarch_lasx_xvslti_bu:
3155 case Intrinsic::loongarch_lasx_xvslti_hu:
3156 case Intrinsic::loongarch_lasx_xvslti_wu:
3157 case Intrinsic::loongarch_lasx_xvslti_du:
3158 case Intrinsic::loongarch_lasx_xvbsll_v:
3159 case Intrinsic::loongarch_lasx_xvbsrl_v:
3160 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG);
3161 case Intrinsic::loongarch_lsx_vseqi_b:
3162 case Intrinsic::loongarch_lsx_vseqi_h:
3163 case Intrinsic::loongarch_lsx_vseqi_w:
3164 case Intrinsic::loongarch_lsx_vseqi_d:
3165 case Intrinsic::loongarch_lsx_vslei_b:
3166 case Intrinsic::loongarch_lsx_vslei_h:
3167 case Intrinsic::loongarch_lsx_vslei_w:
3168 case Intrinsic::loongarch_lsx_vslei_d:
3169 case Intrinsic::loongarch_lsx_vslti_b:
3170 case Intrinsic::loongarch_lsx_vslti_h:
3171 case Intrinsic::loongarch_lsx_vslti_w:
3172 case Intrinsic::loongarch_lsx_vslti_d:
3173 case Intrinsic::loongarch_lasx_xvseqi_b:
3174 case Intrinsic::loongarch_lasx_xvseqi_h:
3175 case Intrinsic::loongarch_lasx_xvseqi_w:
3176 case Intrinsic::loongarch_lasx_xvseqi_d:
3177 case Intrinsic::loongarch_lasx_xvslei_b:
3178 case Intrinsic::loongarch_lasx_xvslei_h:
3179 case Intrinsic::loongarch_lasx_xvslei_w:
3180 case Intrinsic::loongarch_lasx_xvslei_d:
3181 case Intrinsic::loongarch_lasx_xvslti_b:
3182 case Intrinsic::loongarch_lasx_xvslti_h:
3183 case Intrinsic::loongarch_lasx_xvslti_w:
3184 case Intrinsic::loongarch_lasx_xvslti_d:
3185 return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true);
3186 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3187 case Intrinsic::loongarch_lsx_vsrani_h_w:
3188 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3189 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3190 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3191 case Intrinsic::loongarch_lsx_vssrani_h_w:
3192 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3193 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3194 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3195 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3196 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3197 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3198 case Intrinsic::loongarch_lsx_vfrstpi_b:
3199 case Intrinsic::loongarch_lsx_vfrstpi_h:
3200 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3201 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3202 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3203 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3204 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3205 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3206 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3207 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3208 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3209 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3210 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3211 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3212 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3213 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3214 return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG);
3215 case Intrinsic::loongarch_lsx_vsat_d:
3216 case Intrinsic::loongarch_lsx_vsat_du:
3217 case Intrinsic::loongarch_lsx_vrotri_d:
3218 case Intrinsic::loongarch_lsx_vsrlri_d:
3219 case Intrinsic::loongarch_lsx_vsrari_d:
3220 case Intrinsic::loongarch_lasx_xvsat_d:
3221 case Intrinsic::loongarch_lasx_xvsat_du:
3222 case Intrinsic::loongarch_lasx_xvrotri_d:
3223 case Intrinsic::loongarch_lasx_xvsrlri_d:
3224 case Intrinsic::loongarch_lasx_xvsrari_d:
3225 return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG);
3226 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3227 case Intrinsic::loongarch_lsx_vsrani_w_d:
3228 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3229 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3230 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3231 case Intrinsic::loongarch_lsx_vssrani_w_d:
3232 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3233 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3234 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3235 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3236 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3237 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3238 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3239 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3240 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3241 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3242 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3243 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3244 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3245 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3246 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3247 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3248 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3249 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3250 return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG);
3251 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3252 case Intrinsic::loongarch_lsx_vsrani_d_q:
3253 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3254 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3255 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3256 case Intrinsic::loongarch_lsx_vssrani_d_q:
3257 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3258 case Intrinsic::loongarch_lsx_vssrani_du_q:
3259 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3260 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3261 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3262 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3263 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3264 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3265 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3266 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3267 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3268 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3269 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3270 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3271 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3272 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3273 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3274 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3275 return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG);
3276 case Intrinsic::loongarch_lsx_vnori_b:
3277 case Intrinsic::loongarch_lsx_vshuf4i_b:
3278 case Intrinsic::loongarch_lsx_vshuf4i_h:
3279 case Intrinsic::loongarch_lsx_vshuf4i_w:
3280 case Intrinsic::loongarch_lasx_xvnori_b:
3281 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3282 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3283 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3284 case Intrinsic::loongarch_lasx_xvpermi_d:
3285 return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG);
3286 case Intrinsic::loongarch_lsx_vshuf4i_d:
3287 case Intrinsic::loongarch_lsx_vpermi_w:
3288 case Intrinsic::loongarch_lsx_vbitseli_b:
3289 case Intrinsic::loongarch_lsx_vextrins_b:
3290 case Intrinsic::loongarch_lsx_vextrins_h:
3291 case Intrinsic::loongarch_lsx_vextrins_w:
3292 case Intrinsic::loongarch_lsx_vextrins_d:
3293 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3294 case Intrinsic::loongarch_lasx_xvpermi_w:
3295 case Intrinsic::loongarch_lasx_xvpermi_q:
3296 case Intrinsic::loongarch_lasx_xvbitseli_b:
3297 case Intrinsic::loongarch_lasx_xvextrins_b:
3298 case Intrinsic::loongarch_lasx_xvextrins_h:
3299 case Intrinsic::loongarch_lasx_xvextrins_w:
3300 case Intrinsic::loongarch_lasx_xvextrins_d:
3301 return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG);
3302 case Intrinsic::loongarch_lsx_vrepli_b:
3303 case Intrinsic::loongarch_lsx_vrepli_h:
3304 case Intrinsic::loongarch_lsx_vrepli_w:
3305 case Intrinsic::loongarch_lsx_vrepli_d:
3306 case Intrinsic::loongarch_lasx_xvrepli_b:
3307 case Intrinsic::loongarch_lasx_xvrepli_h:
3308 case Intrinsic::loongarch_lasx_xvrepli_w:
3309 case Intrinsic::loongarch_lasx_xvrepli_d:
3310 return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
3311 case Intrinsic::loongarch_lsx_vldi:
3312 case Intrinsic::loongarch_lasx_xvldi:
3313 return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true);
3314 }
3315}
3316
3317// Helper function that emits error message for intrinsics with chain and return
3318// merge values of a UNDEF and the chain.
3319static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
3320 StringRef ErrorMsg,
3321 SelectionDAG &DAG) {
3322 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
3323 return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)},
3324 dl: SDLoc(Op));
3325}
3326
3327SDValue
3328LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3329 SelectionDAG &DAG) const {
3330 SDLoc DL(Op);
3331 MVT GRLenVT = Subtarget.getGRLenVT();
3332 EVT VT = Op.getValueType();
3333 SDValue Chain = Op.getOperand(i: 0);
3334 const StringRef ErrorMsgOOR = "argument out of range";
3335 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3336 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3337
3338 switch (Op.getConstantOperandVal(i: 1)) {
3339 default:
3340 return Op;
3341 case Intrinsic::loongarch_crc_w_b_w:
3342 case Intrinsic::loongarch_crc_w_h_w:
3343 case Intrinsic::loongarch_crc_w_w_w:
3344 case Intrinsic::loongarch_crc_w_d_w:
3345 case Intrinsic::loongarch_crcc_w_b_w:
3346 case Intrinsic::loongarch_crcc_w_h_w:
3347 case Intrinsic::loongarch_crcc_w_w_w:
3348 case Intrinsic::loongarch_crcc_w_d_w:
3349 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
3350 case Intrinsic::loongarch_csrrd_w:
3351 case Intrinsic::loongarch_csrrd_d: {
3352 unsigned Imm = Op.getConstantOperandVal(i: 2);
3353 return !isUInt<14>(x: Imm)
3354 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3355 : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
3356 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3357 }
3358 case Intrinsic::loongarch_csrwr_w:
3359 case Intrinsic::loongarch_csrwr_d: {
3360 unsigned Imm = Op.getConstantOperandVal(i: 3);
3361 return !isUInt<14>(x: Imm)
3362 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3363 : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
3364 Ops: {Chain, Op.getOperand(i: 2),
3365 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3366 }
3367 case Intrinsic::loongarch_csrxchg_w:
3368 case Intrinsic::loongarch_csrxchg_d: {
3369 unsigned Imm = Op.getConstantOperandVal(i: 4);
3370 return !isUInt<14>(x: Imm)
3371 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3372 : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
3373 Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3),
3374 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3375 }
3376 case Intrinsic::loongarch_iocsrrd_d: {
3377 return DAG.getNode(
3378 Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other},
3379 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))});
3380 }
3381#define IOCSRRD_CASE(NAME, NODE) \
3382 case Intrinsic::loongarch_##NAME: { \
3383 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3384 {Chain, Op.getOperand(2)}); \
3385 }
3386 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3387 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3388 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3389#undef IOCSRRD_CASE
3390 case Intrinsic::loongarch_cpucfg: {
3391 return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
3392 Ops: {Chain, Op.getOperand(i: 2)});
3393 }
3394 case Intrinsic::loongarch_lddir_d: {
3395 unsigned Imm = Op.getConstantOperandVal(i: 3);
3396 return !isUInt<8>(x: Imm)
3397 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3398 : Op;
3399 }
3400 case Intrinsic::loongarch_movfcsr2gr: {
3401 if (!Subtarget.hasBasicF())
3402 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
3403 unsigned Imm = Op.getConstantOperandVal(i: 2);
3404 return !isUInt<2>(x: Imm)
3405 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3406 : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other},
3407 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
3408 }
3409 case Intrinsic::loongarch_lsx_vld:
3410 case Intrinsic::loongarch_lsx_vldrepl_b:
3411 case Intrinsic::loongarch_lasx_xvld:
3412 case Intrinsic::loongarch_lasx_xvldrepl_b:
3413 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
3414 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3415 : SDValue();
3416 case Intrinsic::loongarch_lsx_vldrepl_h:
3417 case Intrinsic::loongarch_lasx_xvldrepl_h:
3418 return !isShiftedInt<11, 1>(
3419 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
3420 ? emitIntrinsicWithChainErrorMessage(
3421 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
3422 : SDValue();
3423 case Intrinsic::loongarch_lsx_vldrepl_w:
3424 case Intrinsic::loongarch_lasx_xvldrepl_w:
3425 return !isShiftedInt<10, 2>(
3426 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
3427 ? emitIntrinsicWithChainErrorMessage(
3428 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
3429 : SDValue();
3430 case Intrinsic::loongarch_lsx_vldrepl_d:
3431 case Intrinsic::loongarch_lasx_xvldrepl_d:
3432 return !isShiftedInt<9, 3>(
3433 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue())
3434 ? emitIntrinsicWithChainErrorMessage(
3435 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
3436 : SDValue();
3437 }
3438}
3439
3440// Helper function that emits error message for intrinsics with void return
3441// value and return the chain.
3442static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
3443 SelectionDAG &DAG) {
3444
3445 DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + ".");
3446 return Op.getOperand(i: 0);
3447}
3448
3449SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3450 SelectionDAG &DAG) const {
3451 SDLoc DL(Op);
3452 MVT GRLenVT = Subtarget.getGRLenVT();
3453 SDValue Chain = Op.getOperand(i: 0);
3454 uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1);
3455 SDValue Op2 = Op.getOperand(i: 2);
3456 const StringRef ErrorMsgOOR = "argument out of range";
3457 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3458 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3459 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3460
3461 switch (IntrinsicEnum) {
3462 default:
3463 // TODO: Add more Intrinsics.
3464 return SDValue();
3465 case Intrinsic::loongarch_cacop_d:
3466 case Intrinsic::loongarch_cacop_w: {
3467 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3468 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG);
3469 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3470 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG);
3471 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3472 unsigned Imm1 = Op2->getAsZExtVal();
3473 int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue();
3474 if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2))
3475 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG);
3476 return Op;
3477 }
3478 case Intrinsic::loongarch_dbar: {
3479 unsigned Imm = Op2->getAsZExtVal();
3480 return !isUInt<15>(x: Imm)
3481 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3482 : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain,
3483 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3484 }
3485 case Intrinsic::loongarch_ibar: {
3486 unsigned Imm = Op2->getAsZExtVal();
3487 return !isUInt<15>(x: Imm)
3488 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3489 : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain,
3490 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3491 }
3492 case Intrinsic::loongarch_break: {
3493 unsigned Imm = Op2->getAsZExtVal();
3494 return !isUInt<15>(x: Imm)
3495 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3496 : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain,
3497 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3498 }
3499 case Intrinsic::loongarch_movgr2fcsr: {
3500 if (!Subtarget.hasBasicF())
3501 return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG);
3502 unsigned Imm = Op2->getAsZExtVal();
3503 return !isUInt<2>(x: Imm)
3504 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3505 : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain,
3506 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT),
3507 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT,
3508 Operand: Op.getOperand(i: 3)));
3509 }
3510 case Intrinsic::loongarch_syscall: {
3511 unsigned Imm = Op2->getAsZExtVal();
3512 return !isUInt<15>(x: Imm)
3513 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3514 : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain,
3515 N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT));
3516 }
3517#define IOCSRWR_CASE(NAME, NODE) \
3518 case Intrinsic::loongarch_##NAME: { \
3519 SDValue Op3 = Op.getOperand(3); \
3520 return Subtarget.is64Bit() \
3521 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3522 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3523 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3524 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3525 Op3); \
3526 }
3527 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3528 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3529 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3530#undef IOCSRWR_CASE
3531 case Intrinsic::loongarch_iocsrwr_d: {
3532 return !Subtarget.is64Bit()
3533 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
3534 : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain,
3535 N2: Op2,
3536 N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64,
3537 Operand: Op.getOperand(i: 3)));
3538 }
3539#define ASRT_LE_GT_CASE(NAME) \
3540 case Intrinsic::loongarch_##NAME: { \
3541 return !Subtarget.is64Bit() \
3542 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3543 : Op; \
3544 }
3545 ASRT_LE_GT_CASE(asrtle_d)
3546 ASRT_LE_GT_CASE(asrtgt_d)
3547#undef ASRT_LE_GT_CASE
3548 case Intrinsic::loongarch_ldpte_d: {
3549 unsigned Imm = Op.getConstantOperandVal(i: 3);
3550 return !Subtarget.is64Bit()
3551 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG)
3552 : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3553 : Op;
3554 }
3555 case Intrinsic::loongarch_lsx_vst:
3556 case Intrinsic::loongarch_lasx_xvst:
3557 return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue())
3558 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3559 : SDValue();
3560 case Intrinsic::loongarch_lasx_xvstelm_b:
3561 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3562 !isUInt<5>(x: Op.getConstantOperandVal(i: 5)))
3563 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3564 : SDValue();
3565 case Intrinsic::loongarch_lsx_vstelm_b:
3566 return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3567 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
3568 ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG)
3569 : SDValue();
3570 case Intrinsic::loongarch_lasx_xvstelm_h:
3571 return (!isShiftedInt<8, 1>(
3572 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3573 !isUInt<4>(x: Op.getConstantOperandVal(i: 5)))
3574 ? emitIntrinsicErrorMessage(
3575 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
3576 : SDValue();
3577 case Intrinsic::loongarch_lsx_vstelm_h:
3578 return (!isShiftedInt<8, 1>(
3579 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3580 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
3581 ? emitIntrinsicErrorMessage(
3582 Op, ErrorMsg: "argument out of range or not a multiple of 2", DAG)
3583 : SDValue();
3584 case Intrinsic::loongarch_lasx_xvstelm_w:
3585 return (!isShiftedInt<8, 2>(
3586 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3587 !isUInt<3>(x: Op.getConstantOperandVal(i: 5)))
3588 ? emitIntrinsicErrorMessage(
3589 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
3590 : SDValue();
3591 case Intrinsic::loongarch_lsx_vstelm_w:
3592 return (!isShiftedInt<8, 2>(
3593 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3594 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
3595 ? emitIntrinsicErrorMessage(
3596 Op, ErrorMsg: "argument out of range or not a multiple of 4", DAG)
3597 : SDValue();
3598 case Intrinsic::loongarch_lasx_xvstelm_d:
3599 return (!isShiftedInt<8, 3>(
3600 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3601 !isUInt<2>(x: Op.getConstantOperandVal(i: 5)))
3602 ? emitIntrinsicErrorMessage(
3603 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
3604 : SDValue();
3605 case Intrinsic::loongarch_lsx_vstelm_d:
3606 return (!isShiftedInt<8, 3>(
3607 x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) ||
3608 !isUInt<1>(x: Op.getConstantOperandVal(i: 5)))
3609 ? emitIntrinsicErrorMessage(
3610 Op, ErrorMsg: "argument out of range or not a multiple of 8", DAG)
3611 : SDValue();
3612 }
3613}
3614
3615SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3616 SelectionDAG &DAG) const {
3617 SDLoc DL(Op);
3618 SDValue Lo = Op.getOperand(i: 0);
3619 SDValue Hi = Op.getOperand(i: 1);
3620 SDValue Shamt = Op.getOperand(i: 2);
3621 EVT VT = Lo.getValueType();
3622
3623 // if Shamt-GRLen < 0: // Shamt < GRLen
3624 // Lo = Lo << Shamt
3625 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3626 // else:
3627 // Lo = 0
3628 // Hi = Lo << (Shamt-GRLen)
3629
3630 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
3631 SDValue One = DAG.getConstant(Val: 1, DL, VT);
3632 SDValue MinusGRLen =
3633 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
3634 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
3635 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
3636 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
3637
3638 SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt);
3639 SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One);
3640 SDValue ShiftRightLo =
3641 DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt);
3642 SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt);
3643 SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo);
3644 SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen);
3645
3646 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
3647
3648 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero);
3649 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
3650
3651 SDValue Parts[2] = {Lo, Hi};
3652 return DAG.getMergeValues(Ops: Parts, dl: DL);
3653}
3654
3655SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
3656 SelectionDAG &DAG,
3657 bool IsSRA) const {
3658 SDLoc DL(Op);
3659 SDValue Lo = Op.getOperand(i: 0);
3660 SDValue Hi = Op.getOperand(i: 1);
3661 SDValue Shamt = Op.getOperand(i: 2);
3662 EVT VT = Lo.getValueType();
3663
3664 // SRA expansion:
3665 // if Shamt-GRLen < 0: // Shamt < GRLen
3666 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3667 // Hi = Hi >>s Shamt
3668 // else:
3669 // Lo = Hi >>s (Shamt-GRLen);
3670 // Hi = Hi >>s (GRLen-1)
3671 //
3672 // SRL expansion:
3673 // if Shamt-GRLen < 0: // Shamt < GRLen
3674 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
3675 // Hi = Hi >>u Shamt
3676 // else:
3677 // Lo = Hi >>u (Shamt-GRLen);
3678 // Hi = 0;
3679
3680 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3681
3682 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
3683 SDValue One = DAG.getConstant(Val: 1, DL, VT);
3684 SDValue MinusGRLen =
3685 DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT);
3686 SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT);
3687 SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen);
3688 SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1);
3689
3690 SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt);
3691 SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One);
3692 SDValue ShiftLeftHi =
3693 DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt);
3694 SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi);
3695 SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt);
3696 SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen);
3697 SDValue HiFalse =
3698 IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero;
3699
3700 SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT);
3701
3702 Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse);
3703 Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse);
3704
3705 SDValue Parts[2] = {Lo, Hi};
3706 return DAG.getMergeValues(Ops: Parts, dl: DL);
3707}
3708
3709// Returns the opcode of the target-specific SDNode that implements the 32-bit
3710// form of the given Opcode.
3711static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
3712 switch (Opcode) {
3713 default:
3714 llvm_unreachable("Unexpected opcode");
3715 case ISD::SDIV:
3716 return LoongArchISD::DIV_W;
3717 case ISD::UDIV:
3718 return LoongArchISD::DIV_WU;
3719 case ISD::SREM:
3720 return LoongArchISD::MOD_W;
3721 case ISD::UREM:
3722 return LoongArchISD::MOD_WU;
3723 case ISD::SHL:
3724 return LoongArchISD::SLL_W;
3725 case ISD::SRA:
3726 return LoongArchISD::SRA_W;
3727 case ISD::SRL:
3728 return LoongArchISD::SRL_W;
3729 case ISD::ROTL:
3730 case ISD::ROTR:
3731 return LoongArchISD::ROTR_W;
3732 case ISD::CTTZ:
3733 return LoongArchISD::CTZ_W;
3734 case ISD::CTLZ:
3735 return LoongArchISD::CLZ_W;
3736 }
3737}
3738
3739// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
3740// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
3741// otherwise be promoted to i64, making it difficult to select the
3742// SLL_W/.../*W later one because the fact the operation was originally of
3743// type i8/i16/i32 is lost.
3744static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
3745 unsigned ExtOpc = ISD::ANY_EXTEND) {
3746 SDLoc DL(N);
3747 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode());
3748 SDValue NewOp0, NewRes;
3749
3750 switch (NumOp) {
3751 default:
3752 llvm_unreachable("Unexpected NumOp");
3753 case 1: {
3754 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
3755 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0);
3756 break;
3757 }
3758 case 2: {
3759 NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
3760 SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
3761 if (N->getOpcode() == ISD::ROTL) {
3762 SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64);
3763 NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1);
3764 }
3765 NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
3766 break;
3767 }
3768 // TODO:Handle more NumOp.
3769 }
3770
3771 // ReplaceNodeResults requires we maintain the same type for the return
3772 // value.
3773 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes);
3774}
3775
3776// Converts the given 32-bit operation to a i64 operation with signed extension
3777// semantic to reduce the signed extension instructions.
3778static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
3779 SDLoc DL(N);
3780 SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0));
3781 SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1));
3782 SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1);
3783 SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp,
3784 N2: DAG.getValueType(MVT::i32));
3785 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes);
3786}
3787
3788// Helper function that emits error message for intrinsics with/without chain
3789// and return a UNDEF or and the chain as the results.
3790static void emitErrorAndReplaceIntrinsicResults(
3791 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
3792 StringRef ErrorMsg, bool WithChain = true) {
3793 DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + ".");
3794 Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0)));
3795 if (!WithChain)
3796 return;
3797 Results.push_back(Elt: N->getOperand(Num: 0));
3798}
3799
3800template <unsigned N>
3801static void
3802replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
3803 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
3804 unsigned ResOp) {
3805 const StringRef ErrorMsgOOR = "argument out of range";
3806 unsigned Imm = Node->getConstantOperandVal(Num: 2);
3807 if (!isUInt<N>(Imm)) {
3808 emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR,
3809 /*WithChain=*/false);
3810 return;
3811 }
3812 SDLoc DL(Node);
3813 SDValue Vec = Node->getOperand(Num: 1);
3814
3815 SDValue PickElt =
3816 DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec,
3817 N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()),
3818 N3: DAG.getValueType(Vec.getValueType().getVectorElementType()));
3819 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0),
3820 Operand: PickElt.getValue(R: 0)));
3821}
3822
3823static void replaceVecCondBranchResults(SDNode *N,
3824 SmallVectorImpl<SDValue> &Results,
3825 SelectionDAG &DAG,
3826 const LoongArchSubtarget &Subtarget,
3827 unsigned ResOp) {
3828 SDLoc DL(N);
3829 SDValue Vec = N->getOperand(Num: 1);
3830
3831 SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec);
3832 Results.push_back(
3833 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0)));
3834}
3835
3836static void
3837replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
3838 SelectionDAG &DAG,
3839 const LoongArchSubtarget &Subtarget) {
3840 switch (N->getConstantOperandVal(Num: 0)) {
3841 default:
3842 llvm_unreachable("Unexpected Intrinsic.");
3843 case Intrinsic::loongarch_lsx_vpickve2gr_b:
3844 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
3845 ResOp: LoongArchISD::VPICK_SEXT_ELT);
3846 break;
3847 case Intrinsic::loongarch_lsx_vpickve2gr_h:
3848 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
3849 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
3850 ResOp: LoongArchISD::VPICK_SEXT_ELT);
3851 break;
3852 case Intrinsic::loongarch_lsx_vpickve2gr_w:
3853 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
3854 ResOp: LoongArchISD::VPICK_SEXT_ELT);
3855 break;
3856 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
3857 replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget,
3858 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
3859 break;
3860 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
3861 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
3862 replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget,
3863 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
3864 break;
3865 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
3866 replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget,
3867 ResOp: LoongArchISD::VPICK_ZEXT_ELT);
3868 break;
3869 case Intrinsic::loongarch_lsx_bz_b:
3870 case Intrinsic::loongarch_lsx_bz_h:
3871 case Intrinsic::loongarch_lsx_bz_w:
3872 case Intrinsic::loongarch_lsx_bz_d:
3873 case Intrinsic::loongarch_lasx_xbz_b:
3874 case Intrinsic::loongarch_lasx_xbz_h:
3875 case Intrinsic::loongarch_lasx_xbz_w:
3876 case Intrinsic::loongarch_lasx_xbz_d:
3877 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3878 ResOp: LoongArchISD::VALL_ZERO);
3879 break;
3880 case Intrinsic::loongarch_lsx_bz_v:
3881 case Intrinsic::loongarch_lasx_xbz_v:
3882 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3883 ResOp: LoongArchISD::VANY_ZERO);
3884 break;
3885 case Intrinsic::loongarch_lsx_bnz_b:
3886 case Intrinsic::loongarch_lsx_bnz_h:
3887 case Intrinsic::loongarch_lsx_bnz_w:
3888 case Intrinsic::loongarch_lsx_bnz_d:
3889 case Intrinsic::loongarch_lasx_xbnz_b:
3890 case Intrinsic::loongarch_lasx_xbnz_h:
3891 case Intrinsic::loongarch_lasx_xbnz_w:
3892 case Intrinsic::loongarch_lasx_xbnz_d:
3893 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3894 ResOp: LoongArchISD::VALL_NONZERO);
3895 break;
3896 case Intrinsic::loongarch_lsx_bnz_v:
3897 case Intrinsic::loongarch_lasx_xbnz_v:
3898 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
3899 ResOp: LoongArchISD::VANY_NONZERO);
3900 break;
3901 }
3902}
3903
3904static void replaceCMP_XCHG_128Results(SDNode *N,
3905 SmallVectorImpl<SDValue> &Results,
3906 SelectionDAG &DAG) {
3907 assert(N->getValueType(0) == MVT::i128 &&
3908 "AtomicCmpSwap on types less than 128 should be legal");
3909 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
3910
3911 unsigned Opcode;
3912 switch (MemOp->getMergedOrdering()) {
3913 case AtomicOrdering::Acquire:
3914 case AtomicOrdering::AcquireRelease:
3915 case AtomicOrdering::SequentiallyConsistent:
3916 Opcode = LoongArch::PseudoCmpXchg128Acquire;
3917 break;
3918 case AtomicOrdering::Monotonic:
3919 case AtomicOrdering::Release:
3920 Opcode = LoongArch::PseudoCmpXchg128;
3921 break;
3922 default:
3923 llvm_unreachable("Unexpected ordering!");
3924 }
3925
3926 SDLoc DL(N);
3927 auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: 2), DL, LoVT: MVT::i64, HiVT: MVT::i64);
3928 auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: 3), DL, LoVT: MVT::i64, HiVT: MVT::i64);
3929 SDValue Ops[] = {N->getOperand(Num: 1), CmpVal.first, CmpVal.second,
3930 NewVal.first, NewVal.second, N->getOperand(Num: 0)};
3931
3932 SDNode *CmpSwap = DAG.getMachineNode(
3933 Opcode, dl: SDLoc(N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other),
3934 Ops);
3935 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
3936 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128,
3937 N1: SDValue(CmpSwap, 0), N2: SDValue(CmpSwap, 1)));
3938 Results.push_back(Elt: SDValue(CmpSwap, 3));
3939}
3940
3941void LoongArchTargetLowering::ReplaceNodeResults(
3942 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3943 SDLoc DL(N);
3944 EVT VT = N->getValueType(ResNo: 0);
3945 switch (N->getOpcode()) {
3946 default:
3947 llvm_unreachable("Don't know how to legalize this operation");
3948 case ISD::ADD:
3949 case ISD::SUB:
3950 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
3951 "Unexpected custom legalisation");
3952 Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG));
3953 break;
3954 case ISD::SDIV:
3955 case ISD::UDIV:
3956 case ISD::SREM:
3957 case ISD::UREM:
3958 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3959 "Unexpected custom legalisation");
3960 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2,
3961 ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32
3962 ? ISD::ANY_EXTEND
3963 : ISD::SIGN_EXTEND));
3964 break;
3965 case ISD::SHL:
3966 case ISD::SRA:
3967 case ISD::SRL:
3968 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3969 "Unexpected custom legalisation");
3970 if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) {
3971 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
3972 break;
3973 }
3974 break;
3975 case ISD::ROTL:
3976 case ISD::ROTR:
3977 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3978 "Unexpected custom legalisation");
3979 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2));
3980 break;
3981 case ISD::FP_TO_SINT: {
3982 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3983 "Unexpected custom legalisation");
3984 SDValue Src = N->getOperand(Num: 0);
3985 EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0));
3986 if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) !=
3987 TargetLowering::TypeSoftenFloat) {
3988 if (!isTypeLegal(VT: Src.getValueType()))
3989 return;
3990 if (Src.getValueType() == MVT::f16)
3991 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src);
3992 SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src);
3993 Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst));
3994 return;
3995 }
3996 // If the FP type needs to be softened, emit a library call using the 'si'
3997 // version. If we left it to default legalization we'd end up with 'di'.
3998 RTLIB::Libcall LC;
3999 LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT);
4000 MakeLibCallOptions CallOptions;
4001 EVT OpVT = Src.getValueType();
4002 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true);
4003 SDValue Chain = SDValue();
4004 SDValue Result;
4005 std::tie(args&: Result, args&: Chain) =
4006 makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain);
4007 Results.push_back(Elt: Result);
4008 break;
4009 }
4010 case ISD::BITCAST: {
4011 SDValue Src = N->getOperand(Num: 0);
4012 EVT SrcVT = Src.getValueType();
4013 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4014 Subtarget.hasBasicF()) {
4015 SDValue Dst =
4016 DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src);
4017 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst));
4018 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4019 SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
4020 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src);
4021 SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64,
4022 N1: NewReg.getValue(R: 0), N2: NewReg.getValue(R: 1));
4023 Results.push_back(Elt: RetReg);
4024 }
4025 break;
4026 }
4027 case ISD::FP_TO_UINT: {
4028 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4029 "Unexpected custom legalisation");
4030 auto &TLI = DAG.getTargetLoweringInfo();
4031 SDValue Tmp1, Tmp2;
4032 TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG);
4033 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1));
4034 break;
4035 }
4036 case ISD::BSWAP: {
4037 SDValue Src = N->getOperand(Num: 0);
4038 assert((VT == MVT::i16 || VT == MVT::i32) &&
4039 "Unexpected custom legalization");
4040 MVT GRLenVT = Subtarget.getGRLenVT();
4041 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4042 SDValue Tmp;
4043 switch (VT.getSizeInBits()) {
4044 default:
4045 llvm_unreachable("Unexpected operand width");
4046 case 16:
4047 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc);
4048 break;
4049 case 32:
4050 // Only LA64 will get to here due to the size mismatch between VT and
4051 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4052 Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc);
4053 break;
4054 }
4055 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4056 break;
4057 }
4058 case ISD::BITREVERSE: {
4059 SDValue Src = N->getOperand(Num: 0);
4060 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4061 "Unexpected custom legalization");
4062 MVT GRLenVT = Subtarget.getGRLenVT();
4063 SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src);
4064 SDValue Tmp;
4065 switch (VT.getSizeInBits()) {
4066 default:
4067 llvm_unreachable("Unexpected operand width");
4068 case 8:
4069 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc);
4070 break;
4071 case 32:
4072 Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc);
4073 break;
4074 }
4075 Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp));
4076 break;
4077 }
4078 case ISD::CTLZ:
4079 case ISD::CTTZ: {
4080 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4081 "Unexpected custom legalisation");
4082 Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1));
4083 break;
4084 }
4085 case ISD::INTRINSIC_W_CHAIN: {
4086 SDValue Chain = N->getOperand(Num: 0);
4087 SDValue Op2 = N->getOperand(Num: 2);
4088 MVT GRLenVT = Subtarget.getGRLenVT();
4089 const StringRef ErrorMsgOOR = "argument out of range";
4090 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4091 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4092
4093 switch (N->getConstantOperandVal(Num: 1)) {
4094 default:
4095 llvm_unreachable("Unexpected Intrinsic.");
4096 case Intrinsic::loongarch_movfcsr2gr: {
4097 if (!Subtarget.hasBasicF()) {
4098 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF);
4099 return;
4100 }
4101 unsigned Imm = Op2->getAsZExtVal();
4102 if (!isUInt<2>(x: Imm)) {
4103 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4104 return;
4105 }
4106 SDValue MOVFCSR2GRResults = DAG.getNode(
4107 Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other},
4108 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4109 Results.push_back(
4110 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0)));
4111 Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1));
4112 break;
4113 }
4114#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4115 case Intrinsic::loongarch_##NAME: { \
4116 SDValue NODE = DAG.getNode( \
4117 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4118 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4119 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4120 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4121 Results.push_back(NODE.getValue(1)); \
4122 break; \
4123 }
4124 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4125 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4126 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4127 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4128 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4129 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4130#undef CRC_CASE_EXT_BINARYOP
4131
4132#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4133 case Intrinsic::loongarch_##NAME: { \
4134 SDValue NODE = DAG.getNode( \
4135 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4136 {Chain, Op2, \
4137 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4138 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4139 Results.push_back(NODE.getValue(1)); \
4140 break; \
4141 }
4142 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4143 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4144#undef CRC_CASE_EXT_UNARYOP
4145#define CSR_CASE(ID) \
4146 case Intrinsic::loongarch_##ID: { \
4147 if (!Subtarget.is64Bit()) \
4148 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4149 break; \
4150 }
4151 CSR_CASE(csrrd_d);
4152 CSR_CASE(csrwr_d);
4153 CSR_CASE(csrxchg_d);
4154 CSR_CASE(iocsrrd_d);
4155#undef CSR_CASE
4156 case Intrinsic::loongarch_csrrd_w: {
4157 unsigned Imm = Op2->getAsZExtVal();
4158 if (!isUInt<14>(x: Imm)) {
4159 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4160 return;
4161 }
4162 SDValue CSRRDResults =
4163 DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other},
4164 Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4165 Results.push_back(
4166 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0)));
4167 Results.push_back(Elt: CSRRDResults.getValue(R: 1));
4168 break;
4169 }
4170 case Intrinsic::loongarch_csrwr_w: {
4171 unsigned Imm = N->getConstantOperandVal(Num: 3);
4172 if (!isUInt<14>(x: Imm)) {
4173 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4174 return;
4175 }
4176 SDValue CSRWRResults =
4177 DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other},
4178 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
4179 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4180 Results.push_back(
4181 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0)));
4182 Results.push_back(Elt: CSRWRResults.getValue(R: 1));
4183 break;
4184 }
4185 case Intrinsic::loongarch_csrxchg_w: {
4186 unsigned Imm = N->getConstantOperandVal(Num: 4);
4187 if (!isUInt<14>(x: Imm)) {
4188 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR);
4189 return;
4190 }
4191 SDValue CSRXCHGResults = DAG.getNode(
4192 Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other},
4193 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2),
4194 DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)),
4195 DAG.getConstant(Val: Imm, DL, VT: GRLenVT)});
4196 Results.push_back(
4197 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0)));
4198 Results.push_back(Elt: CSRXCHGResults.getValue(R: 1));
4199 break;
4200 }
4201#define IOCSRRD_CASE(NAME, NODE) \
4202 case Intrinsic::loongarch_##NAME: { \
4203 SDValue IOCSRRDResults = \
4204 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4205 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4206 Results.push_back( \
4207 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4208 Results.push_back(IOCSRRDResults.getValue(1)); \
4209 break; \
4210 }
4211 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4212 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4213 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4214#undef IOCSRRD_CASE
4215 case Intrinsic::loongarch_cpucfg: {
4216 SDValue CPUCFGResults =
4217 DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other},
4218 Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)});
4219 Results.push_back(
4220 Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0)));
4221 Results.push_back(Elt: CPUCFGResults.getValue(R: 1));
4222 break;
4223 }
4224 case Intrinsic::loongarch_lddir_d: {
4225 if (!Subtarget.is64Bit()) {
4226 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64);
4227 return;
4228 }
4229 break;
4230 }
4231 }
4232 break;
4233 }
4234 case ISD::READ_REGISTER: {
4235 if (Subtarget.is64Bit())
4236 DAG.getContext()->emitError(
4237 ErrorStr: "On LA64, only 64-bit registers can be read.");
4238 else
4239 DAG.getContext()->emitError(
4240 ErrorStr: "On LA32, only 32-bit registers can be read.");
4241 Results.push_back(Elt: DAG.getUNDEF(VT));
4242 Results.push_back(Elt: N->getOperand(Num: 0));
4243 break;
4244 }
4245 case ISD::INTRINSIC_WO_CHAIN: {
4246 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4247 break;
4248 }
4249 case ISD::LROUND: {
4250 SDValue Op0 = N->getOperand(Num: 0);
4251 EVT OpVT = Op0.getValueType();
4252 RTLIB::Libcall LC =
4253 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4254 MakeLibCallOptions CallOptions;
4255 CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64, Value: true);
4256 SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first;
4257 Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result);
4258 Results.push_back(Elt: Result);
4259 break;
4260 }
4261 case ISD::ATOMIC_CMP_SWAP: {
4262 replaceCMP_XCHG_128Results(N, Results, DAG);
4263 break;
4264 }
4265 case ISD::TRUNCATE: {
4266 MVT VT = N->getSimpleValueType(ResNo: 0);
4267 if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector)
4268 return;
4269
4270 MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT();
4271 SDValue In = N->getOperand(Num: 0);
4272 EVT InVT = In.getValueType();
4273 EVT InEltVT = InVT.getVectorElementType();
4274 EVT EltVT = VT.getVectorElementType();
4275 unsigned MinElts = VT.getVectorNumElements();
4276 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4277 unsigned InBits = InVT.getSizeInBits();
4278
4279 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4280 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4281 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4282 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4283 for (unsigned I = 0; I < MinElts; ++I)
4284 TruncMask[I] = Scale * I;
4285
4286 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4287 MVT SVT = In.getSimpleValueType().getScalarType();
4288 MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts);
4289 SDValue WidenIn =
4290 DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In,
4291 N3: DAG.getVectorIdxConstant(Val: 0, DL));
4292 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4293 "Illegal vector type in truncation");
4294 WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn);
4295 Results.push_back(
4296 Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask));
4297 return;
4298 }
4299 }
4300
4301 break;
4302 }
4303 }
4304}
4305
4306static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
4307 TargetLowering::DAGCombinerInfo &DCI,
4308 const LoongArchSubtarget &Subtarget) {
4309 if (DCI.isBeforeLegalizeOps())
4310 return SDValue();
4311
4312 SDValue FirstOperand = N->getOperand(Num: 0);
4313 SDValue SecondOperand = N->getOperand(Num: 1);
4314 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4315 EVT ValTy = N->getValueType(ResNo: 0);
4316 SDLoc DL(N);
4317 uint64_t lsb, msb;
4318 unsigned SMIdx, SMLen;
4319 ConstantSDNode *CN;
4320 SDValue NewOperand;
4321 MVT GRLenVT = Subtarget.getGRLenVT();
4322
4323 // BSTRPICK requires the 32S feature.
4324 if (!Subtarget.has32S())
4325 return SDValue();
4326
4327 // Op's second operand must be a shifted mask.
4328 if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) ||
4329 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen))
4330 return SDValue();
4331
4332 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4333 // Pattern match BSTRPICK.
4334 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4335 // => BSTRPICK $dst, $src, msb, lsb
4336 // where msb = lsb + len - 1
4337
4338 // The second operand of the shift must be an immediate.
4339 if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))))
4340 return SDValue();
4341
4342 lsb = CN->getZExtValue();
4343
4344 // Return if the shifted mask does not start at bit 0 or the sum of its
4345 // length and lsb exceeds the word's size.
4346 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4347 return SDValue();
4348
4349 NewOperand = FirstOperand.getOperand(i: 0);
4350 } else {
4351 // Pattern match BSTRPICK.
4352 // $dst = and $src, (2**len- 1) , if len > 12
4353 // => BSTRPICK $dst, $src, msb, lsb
4354 // where lsb = 0 and msb = len - 1
4355
4356 // If the mask is <= 0xfff, andi can be used instead.
4357 if (CN->getZExtValue() <= 0xfff)
4358 return SDValue();
4359
4360 // Return if the MSB exceeds.
4361 if (SMIdx + SMLen > ValTy.getSizeInBits())
4362 return SDValue();
4363
4364 if (SMIdx > 0) {
4365 // Omit if the constant has more than 2 uses. This a conservative
4366 // decision. Whether it is a win depends on the HW microarchitecture.
4367 // However it should always be better for 1 and 2 uses.
4368 if (CN->use_size() > 2)
4369 return SDValue();
4370 // Return if the constant can be composed by a single LU12I.W.
4371 if ((CN->getZExtValue() & 0xfff) == 0)
4372 return SDValue();
4373 // Return if the constand can be composed by a single ADDI with
4374 // the zero register.
4375 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4376 return SDValue();
4377 }
4378
4379 lsb = SMIdx;
4380 NewOperand = FirstOperand;
4381 }
4382
4383 msb = lsb + SMLen - 1;
4384 SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand,
4385 N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT),
4386 N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
4387 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4388 return NR0;
4389 // Try to optimize to
4390 // bstrpick $Rd, $Rs, msb, lsb
4391 // slli $Rd, $Rd, lsb
4392 return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0,
4393 N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT));
4394}
4395
4396static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
4397 TargetLowering::DAGCombinerInfo &DCI,
4398 const LoongArchSubtarget &Subtarget) {
4399 // BSTRPICK requires the 32S feature.
4400 if (!Subtarget.has32S())
4401 return SDValue();
4402
4403 if (DCI.isBeforeLegalizeOps())
4404 return SDValue();
4405
4406 // $dst = srl (and $src, Mask), Shamt
4407 // =>
4408 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4409 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4410 //
4411
4412 SDValue FirstOperand = N->getOperand(Num: 0);
4413 ConstantSDNode *CN;
4414 EVT ValTy = N->getValueType(ResNo: 0);
4415 SDLoc DL(N);
4416 MVT GRLenVT = Subtarget.getGRLenVT();
4417 unsigned MaskIdx, MaskLen;
4418 uint64_t Shamt;
4419
4420 // The first operand must be an AND and the second operand of the AND must be
4421 // a shifted mask.
4422 if (FirstOperand.getOpcode() != ISD::AND ||
4423 !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) ||
4424 !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen))
4425 return SDValue();
4426
4427 // The second operand (shift amount) must be an immediate.
4428 if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))))
4429 return SDValue();
4430
4431 Shamt = CN->getZExtValue();
4432 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4433 return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy,
4434 N1: FirstOperand->getOperand(Num: 0),
4435 N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
4436 N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
4437
4438 return SDValue();
4439}
4440
4441// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4442// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4443static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4444 unsigned Depth) {
4445 // Limit recursion.
4446 if (Depth >= SelectionDAG::MaxRecursionDepth)
4447 return false;
4448 switch (Src.getOpcode()) {
4449 case ISD::SETCC:
4450 case ISD::TRUNCATE:
4451 return Src.getOperand(i: 0).getValueSizeInBits() == Size;
4452 case ISD::FREEZE:
4453 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1);
4454 case ISD::AND:
4455 case ISD::XOR:
4456 case ISD::OR:
4457 return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1) &&
4458 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1);
4459 case ISD::SELECT:
4460 case ISD::VSELECT:
4461 return Src.getOperand(i: 0).getScalarValueSizeInBits() == 1 &&
4462 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1) &&
4463 checkBitcastSrcVectorSize(Src: Src.getOperand(i: 2), Size, Depth: Depth + 1);
4464 case ISD::BUILD_VECTOR:
4465 return ISD::isBuildVectorAllZeros(N: Src.getNode()) ||
4466 ISD::isBuildVectorAllOnes(N: Src.getNode());
4467 }
4468 return false;
4469}
4470
4471// Helper to push sign extension of vXi1 SETCC result through bitops.
4472static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
4473 SDValue Src, const SDLoc &DL) {
4474 switch (Src.getOpcode()) {
4475 case ISD::SETCC:
4476 case ISD::FREEZE:
4477 case ISD::TRUNCATE:
4478 case ISD::BUILD_VECTOR:
4479 return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
4480 case ISD::AND:
4481 case ISD::XOR:
4482 case ISD::OR:
4483 return DAG.getNode(
4484 Opcode: Src.getOpcode(), DL, VT: SExtVT,
4485 N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 0), DL),
4486 N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL));
4487 case ISD::SELECT:
4488 case ISD::VSELECT:
4489 return DAG.getSelect(
4490 DL, VT: SExtVT, Cond: Src.getOperand(i: 0),
4491 LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL),
4492 RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 2), DL));
4493 }
4494 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4495}
4496
4497static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
4498 TargetLowering::DAGCombinerInfo &DCI,
4499 const LoongArchSubtarget &Subtarget) {
4500 SDLoc DL(N);
4501 EVT VT = N->getValueType(ResNo: 0);
4502 SDValue Src = N->getOperand(Num: 0);
4503 EVT SrcVT = Src.getValueType();
4504
4505 if (!DCI.isBeforeLegalizeOps())
4506 return SDValue();
4507
4508 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4509 return SDValue();
4510
4511 unsigned Opc = ISD::DELETED_NODE;
4512 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4513 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4514 bool UseLASX;
4515 EVT CmpVT = Src.getOperand(i: 0).getValueType();
4516 EVT EltVT = CmpVT.getVectorElementType();
4517
4518 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() <= 128)
4519 UseLASX = false;
4520 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4521 CmpVT.getSizeInBits() <= 256)
4522 UseLASX = true;
4523 else
4524 return SDValue();
4525
4526 SDValue SrcN1 = Src.getOperand(i: 1);
4527 switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get()) {
4528 default:
4529 break;
4530 case ISD::SETEQ:
4531 // x == 0 => not (vmsknez.b x)
4532 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
4533 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
4534 break;
4535 case ISD::SETGT:
4536 // x > -1 => vmskgez.b x
4537 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8)
4538 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4539 break;
4540 case ISD::SETGE:
4541 // x >= 0 => vmskgez.b x
4542 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
4543 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4544 break;
4545 case ISD::SETLT:
4546 // x < 0 => vmskltz.{b,h,w,d} x
4547 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) &&
4548 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4549 EltVT == MVT::i64))
4550 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4551 break;
4552 case ISD::SETLE:
4553 // x <= -1 => vmskltz.{b,h,w,d} x
4554 if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) &&
4555 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4556 EltVT == MVT::i64))
4557 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4558 break;
4559 case ISD::SETNE:
4560 // x != 0 => vmsknez.b x
4561 if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8)
4562 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
4563 break;
4564 }
4565 }
4566
4567 // Generate vXi1 using [X]VMSKLTZ
4568 if (Opc == ISD::DELETED_NODE) {
4569 MVT SExtVT;
4570 bool UseLASX = false;
4571 bool PropagateSExt = false;
4572 switch (SrcVT.getSimpleVT().SimpleTy) {
4573 default:
4574 return SDValue();
4575 case MVT::v2i1:
4576 SExtVT = MVT::v2i64;
4577 break;
4578 case MVT::v4i1:
4579 SExtVT = MVT::v4i32;
4580 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
4581 SExtVT = MVT::v4i64;
4582 UseLASX = true;
4583 PropagateSExt = true;
4584 }
4585 break;
4586 case MVT::v8i1:
4587 SExtVT = MVT::v8i16;
4588 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
4589 SExtVT = MVT::v8i32;
4590 UseLASX = true;
4591 PropagateSExt = true;
4592 }
4593 break;
4594 case MVT::v16i1:
4595 SExtVT = MVT::v16i8;
4596 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) {
4597 SExtVT = MVT::v16i16;
4598 UseLASX = true;
4599 PropagateSExt = true;
4600 }
4601 break;
4602 case MVT::v32i1:
4603 SExtVT = MVT::v32i8;
4604 UseLASX = true;
4605 break;
4606 };
4607 if (UseLASX && !Subtarget.has32S() && !Subtarget.hasExtLASX())
4608 return SDValue();
4609 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
4610 : DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src);
4611 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4612 } else {
4613 Src = Src.getOperand(i: 0);
4614 }
4615
4616 SDValue V = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: Src);
4617 EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements());
4618 V = DAG.getZExtOrTrunc(Op: V, DL, VT: T);
4619 return DAG.getBitcast(VT, V);
4620}
4621
4622static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
4623 TargetLowering::DAGCombinerInfo &DCI,
4624 const LoongArchSubtarget &Subtarget) {
4625 MVT GRLenVT = Subtarget.getGRLenVT();
4626 EVT ValTy = N->getValueType(ResNo: 0);
4627 SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1);
4628 ConstantSDNode *CN0, *CN1;
4629 SDLoc DL(N);
4630 unsigned ValBits = ValTy.getSizeInBits();
4631 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
4632 unsigned Shamt;
4633 bool SwapAndRetried = false;
4634
4635 // BSTRPICK requires the 32S feature.
4636 if (!Subtarget.has32S())
4637 return SDValue();
4638
4639 if (DCI.isBeforeLegalizeOps())
4640 return SDValue();
4641
4642 if (ValBits != 32 && ValBits != 64)
4643 return SDValue();
4644
4645Retry:
4646 // 1st pattern to match BSTRINS:
4647 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
4648 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
4649 // =>
4650 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4651 if (N0.getOpcode() == ISD::AND &&
4652 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
4653 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4654 N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
4655 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
4656 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
4657 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
4658 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
4659 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4660 (MaskIdx0 + MaskLen0 <= ValBits)) {
4661 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
4662 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
4663 N2: N1.getOperand(i: 0).getOperand(i: 0),
4664 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
4665 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4666 }
4667
4668 // 2nd pattern to match BSTRINS:
4669 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
4670 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
4671 // =>
4672 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
4673 if (N0.getOpcode() == ISD::AND &&
4674 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
4675 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4676 N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
4677 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
4678 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
4679 (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
4680 isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) &&
4681 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
4682 (MaskIdx0 + MaskLen0 <= ValBits)) {
4683 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
4684 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
4685 N2: N1.getOperand(i: 0).getOperand(i: 0),
4686 N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT),
4687 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4688 }
4689
4690 // 3rd pattern to match BSTRINS:
4691 // R = or (and X, mask0), (and Y, mask1)
4692 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
4693 // =>
4694 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
4695 // where msb = lsb + size - 1
4696 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4697 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
4698 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4699 (MaskIdx0 + MaskLen0 <= 64) &&
4700 (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) &&
4701 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4702 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
4703 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
4704 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1,
4705 N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)),
4706 N3: DAG.getConstant(Val: ValBits == 32
4707 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4708 : (MaskIdx0 + MaskLen0 - 1),
4709 DL, VT: GRLenVT),
4710 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4711 }
4712
4713 // 4th pattern to match BSTRINS:
4714 // R = or (and X, mask), (shl Y, shamt)
4715 // where mask = (2**shamt - 1)
4716 // =>
4717 // R = BSTRINS X, Y, ValBits - 1, shamt
4718 // where ValBits = 32 or 64
4719 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
4720 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
4721 isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4722 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
4723 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
4724 (MaskIdx0 + MaskLen0 <= ValBits)) {
4725 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
4726 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
4727 N2: N1.getOperand(i: 0),
4728 N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT),
4729 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
4730 }
4731
4732 // 5th pattern to match BSTRINS:
4733 // R = or (and X, mask), const
4734 // where ~mask = (2**size - 1) << lsb, mask & const = 0
4735 // =>
4736 // R = BSTRINS X, (const >> lsb), msb, lsb
4737 // where msb = lsb + size - 1
4738 if (N0.getOpcode() == ISD::AND &&
4739 (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) &&
4740 isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) &&
4741 (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) &&
4742 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
4743 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
4744 return DAG.getNode(
4745 Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0),
4746 N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy),
4747 N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
4748 : (MaskIdx0 + MaskLen0 - 1),
4749 DL, VT: GRLenVT),
4750 N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT));
4751 }
4752
4753 // 6th pattern.
4754 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
4755 // by the incoming bits are known to be zero.
4756 // =>
4757 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
4758 //
4759 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
4760 // pattern is more common than the 1st. So we put the 1st before the 6th in
4761 // order to match as many nodes as possible.
4762 ConstantSDNode *CNMask, *CNShamt;
4763 unsigned MaskIdx, MaskLen;
4764 if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND &&
4765 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
4766 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4767 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
4768 CNShamt->getZExtValue() + MaskLen <= ValBits) {
4769 Shamt = CNShamt->getZExtValue();
4770 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
4771 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
4772 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
4773 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
4774 N2: N1.getOperand(i: 0).getOperand(i: 0),
4775 N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT),
4776 N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT));
4777 }
4778 }
4779
4780 // 7th pattern.
4781 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
4782 // overwritten by the incoming bits are known to be zero.
4783 // =>
4784 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
4785 //
4786 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
4787 // before the 7th in order to match as many nodes as possible.
4788 if (N1.getOpcode() == ISD::AND &&
4789 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
4790 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) &&
4791 N1.getOperand(i: 0).getOpcode() == ISD::SHL &&
4792 (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) &&
4793 CNShamt->getZExtValue() == MaskIdx) {
4794 APInt ShMask(ValBits, CNMask->getZExtValue());
4795 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
4796 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
4797 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
4798 N2: N1.getOperand(i: 0).getOperand(i: 0),
4799 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
4800 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
4801 }
4802 }
4803
4804 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
4805 if (!SwapAndRetried) {
4806 std::swap(a&: N0, b&: N1);
4807 SwapAndRetried = true;
4808 goto Retry;
4809 }
4810
4811 SwapAndRetried = false;
4812Retry2:
4813 // 8th pattern.
4814 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
4815 // the incoming bits are known to be zero.
4816 // =>
4817 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
4818 //
4819 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
4820 // we put it here in order to match as many nodes as possible or generate less
4821 // instructions.
4822 if (N1.getOpcode() == ISD::AND &&
4823 (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) &&
4824 isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) {
4825 APInt ShMask(ValBits, CNMask->getZExtValue());
4826 if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) {
4827 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
4828 return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0,
4829 N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0),
4830 N1: N1->getOperand(Num: 0),
4831 N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)),
4832 N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT),
4833 N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT));
4834 }
4835 }
4836 // Swap N0/N1 and retry.
4837 if (!SwapAndRetried) {
4838 std::swap(a&: N0, b&: N1);
4839 SwapAndRetried = true;
4840 goto Retry2;
4841 }
4842
4843 return SDValue();
4844}
4845
4846static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
4847 ExtType = ISD::NON_EXTLOAD;
4848
4849 switch (V.getNode()->getOpcode()) {
4850 case ISD::LOAD: {
4851 LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode());
4852 if ((LoadNode->getMemoryVT() == MVT::i8) ||
4853 (LoadNode->getMemoryVT() == MVT::i16)) {
4854 ExtType = LoadNode->getExtensionType();
4855 return true;
4856 }
4857 return false;
4858 }
4859 case ISD::AssertSext: {
4860 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
4861 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
4862 ExtType = ISD::SEXTLOAD;
4863 return true;
4864 }
4865 return false;
4866 }
4867 case ISD::AssertZext: {
4868 VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1));
4869 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
4870 ExtType = ISD::ZEXTLOAD;
4871 return true;
4872 }
4873 return false;
4874 }
4875 default:
4876 return false;
4877 }
4878
4879 return false;
4880}
4881
4882// Eliminate redundant truncation and zero-extension nodes.
4883// * Case 1:
4884// +------------+ +------------+ +------------+
4885// | Input1 | | Input2 | | CC |
4886// +------------+ +------------+ +------------+
4887// | | |
4888// V V +----+
4889// +------------+ +------------+ |
4890// | TRUNCATE | | TRUNCATE | |
4891// +------------+ +------------+ |
4892// | | |
4893// V V |
4894// +------------+ +------------+ |
4895// | ZERO_EXT | | ZERO_EXT | |
4896// +------------+ +------------+ |
4897// | | |
4898// | +-------------+ |
4899// V V | |
4900// +----------------+ | |
4901// | AND | | |
4902// +----------------+ | |
4903// | | |
4904// +---------------+ | |
4905// | | |
4906// V V V
4907// +-------------+
4908// | CMP |
4909// +-------------+
4910// * Case 2:
4911// +------------+ +------------+ +-------------+ +------------+ +------------+
4912// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
4913// +------------+ +------------+ +-------------+ +------------+ +------------+
4914// | | | | |
4915// V | | | |
4916// +------------+ | | | |
4917// | XOR |<---------------------+ | |
4918// +------------+ | | |
4919// | | | |
4920// V V +---------------+ |
4921// +------------+ +------------+ | |
4922// | TRUNCATE | | TRUNCATE | | +-------------------------+
4923// +------------+ +------------+ | |
4924// | | | |
4925// V V | |
4926// +------------+ +------------+ | |
4927// | ZERO_EXT | | ZERO_EXT | | |
4928// +------------+ +------------+ | |
4929// | | | |
4930// V V | |
4931// +----------------+ | |
4932// | AND | | |
4933// +----------------+ | |
4934// | | |
4935// +---------------+ | |
4936// | | |
4937// V V V
4938// +-------------+
4939// | CMP |
4940// +-------------+
4941static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
4942 TargetLowering::DAGCombinerInfo &DCI,
4943 const LoongArchSubtarget &Subtarget) {
4944 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
4945
4946 SDNode *AndNode = N->getOperand(Num: 0).getNode();
4947 if (AndNode->getOpcode() != ISD::AND)
4948 return SDValue();
4949
4950 SDValue AndInputValue2 = AndNode->getOperand(Num: 1);
4951 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
4952 return SDValue();
4953
4954 SDValue CmpInputValue = N->getOperand(Num: 1);
4955 SDValue AndInputValue1 = AndNode->getOperand(Num: 0);
4956 if (AndInputValue1.getOpcode() == ISD::XOR) {
4957 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4958 return SDValue();
4959 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1));
4960 if (!CN || CN->getSExtValue() != -1)
4961 return SDValue();
4962 CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue);
4963 if (!CN || CN->getSExtValue() != 0)
4964 return SDValue();
4965 AndInputValue1 = AndInputValue1.getOperand(i: 0);
4966 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
4967 return SDValue();
4968 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
4969 if (AndInputValue2 != CmpInputValue)
4970 return SDValue();
4971 } else {
4972 return SDValue();
4973 }
4974
4975 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0);
4976 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
4977 return SDValue();
4978
4979 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0);
4980 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
4981 return SDValue();
4982
4983 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0);
4984 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0);
4985 ISD::LoadExtType ExtType1;
4986 ISD::LoadExtType ExtType2;
4987
4988 if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) ||
4989 !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2))
4990 return SDValue();
4991
4992 if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) ||
4993 AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0))
4994 return SDValue();
4995
4996 if ((ExtType2 != ISD::ZEXTLOAD) &&
4997 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
4998 return SDValue();
4999
5000 // These truncation and zero-extension nodes are not necessary, remove them.
5001 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0),
5002 N1: TruncInputValue1, N2: TruncInputValue2);
5003 SDValue NewSetCC =
5004 DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC);
5005 DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode());
5006 return SDValue(N, 0);
5007}
5008
5009// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5010static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
5011 TargetLowering::DAGCombinerInfo &DCI,
5012 const LoongArchSubtarget &Subtarget) {
5013 if (DCI.isBeforeLegalizeOps())
5014 return SDValue();
5015
5016 SDValue Src = N->getOperand(Num: 0);
5017 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5018 return SDValue();
5019
5020 return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
5021 Operand: Src.getOperand(i: 0));
5022}
5023
5024template <unsigned N>
5025static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
5026 SelectionDAG &DAG,
5027 const LoongArchSubtarget &Subtarget,
5028 bool IsSigned = false) {
5029 SDLoc DL(Node);
5030 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
5031 // Check the ImmArg.
5032 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5033 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5034 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
5035 ": argument out of range.");
5036 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT());
5037 }
5038 return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT());
5039}
5040
5041template <unsigned N>
5042static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5043 SelectionDAG &DAG, bool IsSigned = false) {
5044 SDLoc DL(Node);
5045 EVT ResTy = Node->getValueType(ResNo: 0);
5046 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp));
5047
5048 // Check the ImmArg.
5049 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5050 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5051 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
5052 ": argument out of range.");
5053 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5054 }
5055 return DAG.getConstant(
5056 Val: APInt(ResTy.getScalarType().getSizeInBits(),
5057 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5058 DL, VT: ResTy);
5059}
5060
5061static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
5062 SDLoc DL(Node);
5063 EVT ResTy = Node->getValueType(ResNo: 0);
5064 SDValue Vec = Node->getOperand(Num: 2);
5065 SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy);
5066 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask);
5067}
5068
5069static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
5070 SDLoc DL(Node);
5071 EVT ResTy = Node->getValueType(ResNo: 0);
5072 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
5073 SDValue Bit =
5074 DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG));
5075
5076 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1),
5077 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
5078}
5079
5080template <unsigned N>
5081static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
5082 SDLoc DL(Node);
5083 EVT ResTy = Node->getValueType(ResNo: 0);
5084 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
5085 // Check the unsigned ImmArg.
5086 if (!isUInt<N>(CImm->getZExtValue())) {
5087 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
5088 ": argument out of range.");
5089 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5090 }
5091
5092 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5093 SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
5094
5095 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask);
5096}
5097
5098template <unsigned N>
5099static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
5100 SDLoc DL(Node);
5101 EVT ResTy = Node->getValueType(ResNo: 0);
5102 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
5103 // Check the unsigned ImmArg.
5104 if (!isUInt<N>(CImm->getZExtValue())) {
5105 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
5106 ": argument out of range.");
5107 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5108 }
5109
5110 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5111 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
5112 return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
5113}
5114
5115template <unsigned N>
5116static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
5117 SDLoc DL(Node);
5118 EVT ResTy = Node->getValueType(ResNo: 0);
5119 auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2));
5120 // Check the unsigned ImmArg.
5121 if (!isUInt<N>(CImm->getZExtValue())) {
5122 DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) +
5123 ": argument out of range.");
5124 return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy);
5125 }
5126
5127 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5128 SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy);
5129 return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm);
5130}
5131
5132static SDValue
5133performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
5134 TargetLowering::DAGCombinerInfo &DCI,
5135 const LoongArchSubtarget &Subtarget) {
5136 SDLoc DL(N);
5137 switch (N->getConstantOperandVal(Num: 0)) {
5138 default:
5139 break;
5140 case Intrinsic::loongarch_lsx_vadd_b:
5141 case Intrinsic::loongarch_lsx_vadd_h:
5142 case Intrinsic::loongarch_lsx_vadd_w:
5143 case Intrinsic::loongarch_lsx_vadd_d:
5144 case Intrinsic::loongarch_lasx_xvadd_b:
5145 case Intrinsic::loongarch_lasx_xvadd_h:
5146 case Intrinsic::loongarch_lasx_xvadd_w:
5147 case Intrinsic::loongarch_lasx_xvadd_d:
5148 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5149 N2: N->getOperand(Num: 2));
5150 case Intrinsic::loongarch_lsx_vaddi_bu:
5151 case Intrinsic::loongarch_lsx_vaddi_hu:
5152 case Intrinsic::loongarch_lsx_vaddi_wu:
5153 case Intrinsic::loongarch_lsx_vaddi_du:
5154 case Intrinsic::loongarch_lasx_xvaddi_bu:
5155 case Intrinsic::loongarch_lasx_xvaddi_hu:
5156 case Intrinsic::loongarch_lasx_xvaddi_wu:
5157 case Intrinsic::loongarch_lasx_xvaddi_du:
5158 return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5159 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5160 case Intrinsic::loongarch_lsx_vsub_b:
5161 case Intrinsic::loongarch_lsx_vsub_h:
5162 case Intrinsic::loongarch_lsx_vsub_w:
5163 case Intrinsic::loongarch_lsx_vsub_d:
5164 case Intrinsic::loongarch_lasx_xvsub_b:
5165 case Intrinsic::loongarch_lasx_xvsub_h:
5166 case Intrinsic::loongarch_lasx_xvsub_w:
5167 case Intrinsic::loongarch_lasx_xvsub_d:
5168 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5169 N2: N->getOperand(Num: 2));
5170 case Intrinsic::loongarch_lsx_vsubi_bu:
5171 case Intrinsic::loongarch_lsx_vsubi_hu:
5172 case Intrinsic::loongarch_lsx_vsubi_wu:
5173 case Intrinsic::loongarch_lsx_vsubi_du:
5174 case Intrinsic::loongarch_lasx_xvsubi_bu:
5175 case Intrinsic::loongarch_lasx_xvsubi_hu:
5176 case Intrinsic::loongarch_lasx_xvsubi_wu:
5177 case Intrinsic::loongarch_lasx_xvsubi_du:
5178 return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5179 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5180 case Intrinsic::loongarch_lsx_vneg_b:
5181 case Intrinsic::loongarch_lsx_vneg_h:
5182 case Intrinsic::loongarch_lsx_vneg_w:
5183 case Intrinsic::loongarch_lsx_vneg_d:
5184 case Intrinsic::loongarch_lasx_xvneg_b:
5185 case Intrinsic::loongarch_lasx_xvneg_h:
5186 case Intrinsic::loongarch_lasx_xvneg_w:
5187 case Intrinsic::loongarch_lasx_xvneg_d:
5188 return DAG.getNode(
5189 Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0),
5190 N1: DAG.getConstant(
5191 Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0,
5192 /*isSigned=*/true),
5193 DL: SDLoc(N), VT: N->getValueType(ResNo: 0)),
5194 N2: N->getOperand(Num: 1));
5195 case Intrinsic::loongarch_lsx_vmax_b:
5196 case Intrinsic::loongarch_lsx_vmax_h:
5197 case Intrinsic::loongarch_lsx_vmax_w:
5198 case Intrinsic::loongarch_lsx_vmax_d:
5199 case Intrinsic::loongarch_lasx_xvmax_b:
5200 case Intrinsic::loongarch_lasx_xvmax_h:
5201 case Intrinsic::loongarch_lasx_xvmax_w:
5202 case Intrinsic::loongarch_lasx_xvmax_d:
5203 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5204 N2: N->getOperand(Num: 2));
5205 case Intrinsic::loongarch_lsx_vmax_bu:
5206 case Intrinsic::loongarch_lsx_vmax_hu:
5207 case Intrinsic::loongarch_lsx_vmax_wu:
5208 case Intrinsic::loongarch_lsx_vmax_du:
5209 case Intrinsic::loongarch_lasx_xvmax_bu:
5210 case Intrinsic::loongarch_lasx_xvmax_hu:
5211 case Intrinsic::loongarch_lasx_xvmax_wu:
5212 case Intrinsic::loongarch_lasx_xvmax_du:
5213 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5214 N2: N->getOperand(Num: 2));
5215 case Intrinsic::loongarch_lsx_vmaxi_b:
5216 case Intrinsic::loongarch_lsx_vmaxi_h:
5217 case Intrinsic::loongarch_lsx_vmaxi_w:
5218 case Intrinsic::loongarch_lsx_vmaxi_d:
5219 case Intrinsic::loongarch_lasx_xvmaxi_b:
5220 case Intrinsic::loongarch_lasx_xvmaxi_h:
5221 case Intrinsic::loongarch_lasx_xvmaxi_w:
5222 case Intrinsic::loongarch_lasx_xvmaxi_d:
5223 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5224 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
5225 case Intrinsic::loongarch_lsx_vmaxi_bu:
5226 case Intrinsic::loongarch_lsx_vmaxi_hu:
5227 case Intrinsic::loongarch_lsx_vmaxi_wu:
5228 case Intrinsic::loongarch_lsx_vmaxi_du:
5229 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5230 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5231 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5232 case Intrinsic::loongarch_lasx_xvmaxi_du:
5233 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5234 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5235 case Intrinsic::loongarch_lsx_vmin_b:
5236 case Intrinsic::loongarch_lsx_vmin_h:
5237 case Intrinsic::loongarch_lsx_vmin_w:
5238 case Intrinsic::loongarch_lsx_vmin_d:
5239 case Intrinsic::loongarch_lasx_xvmin_b:
5240 case Intrinsic::loongarch_lasx_xvmin_h:
5241 case Intrinsic::loongarch_lasx_xvmin_w:
5242 case Intrinsic::loongarch_lasx_xvmin_d:
5243 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5244 N2: N->getOperand(Num: 2));
5245 case Intrinsic::loongarch_lsx_vmin_bu:
5246 case Intrinsic::loongarch_lsx_vmin_hu:
5247 case Intrinsic::loongarch_lsx_vmin_wu:
5248 case Intrinsic::loongarch_lsx_vmin_du:
5249 case Intrinsic::loongarch_lasx_xvmin_bu:
5250 case Intrinsic::loongarch_lasx_xvmin_hu:
5251 case Intrinsic::loongarch_lasx_xvmin_wu:
5252 case Intrinsic::loongarch_lasx_xvmin_du:
5253 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5254 N2: N->getOperand(Num: 2));
5255 case Intrinsic::loongarch_lsx_vmini_b:
5256 case Intrinsic::loongarch_lsx_vmini_h:
5257 case Intrinsic::loongarch_lsx_vmini_w:
5258 case Intrinsic::loongarch_lsx_vmini_d:
5259 case Intrinsic::loongarch_lasx_xvmini_b:
5260 case Intrinsic::loongarch_lasx_xvmini_h:
5261 case Intrinsic::loongarch_lasx_xvmini_w:
5262 case Intrinsic::loongarch_lasx_xvmini_d:
5263 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5264 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true));
5265 case Intrinsic::loongarch_lsx_vmini_bu:
5266 case Intrinsic::loongarch_lsx_vmini_hu:
5267 case Intrinsic::loongarch_lsx_vmini_wu:
5268 case Intrinsic::loongarch_lsx_vmini_du:
5269 case Intrinsic::loongarch_lasx_xvmini_bu:
5270 case Intrinsic::loongarch_lasx_xvmini_hu:
5271 case Intrinsic::loongarch_lasx_xvmini_wu:
5272 case Intrinsic::loongarch_lasx_xvmini_du:
5273 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5274 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5275 case Intrinsic::loongarch_lsx_vmul_b:
5276 case Intrinsic::loongarch_lsx_vmul_h:
5277 case Intrinsic::loongarch_lsx_vmul_w:
5278 case Intrinsic::loongarch_lsx_vmul_d:
5279 case Intrinsic::loongarch_lasx_xvmul_b:
5280 case Intrinsic::loongarch_lasx_xvmul_h:
5281 case Intrinsic::loongarch_lasx_xvmul_w:
5282 case Intrinsic::loongarch_lasx_xvmul_d:
5283 return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5284 N2: N->getOperand(Num: 2));
5285 case Intrinsic::loongarch_lsx_vmadd_b:
5286 case Intrinsic::loongarch_lsx_vmadd_h:
5287 case Intrinsic::loongarch_lsx_vmadd_w:
5288 case Intrinsic::loongarch_lsx_vmadd_d:
5289 case Intrinsic::loongarch_lasx_xvmadd_b:
5290 case Intrinsic::loongarch_lasx_xvmadd_h:
5291 case Intrinsic::loongarch_lasx_xvmadd_w:
5292 case Intrinsic::loongarch_lasx_xvmadd_d: {
5293 EVT ResTy = N->getValueType(ResNo: 0);
5294 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
5295 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
5296 N2: N->getOperand(Num: 3)));
5297 }
5298 case Intrinsic::loongarch_lsx_vmsub_b:
5299 case Intrinsic::loongarch_lsx_vmsub_h:
5300 case Intrinsic::loongarch_lsx_vmsub_w:
5301 case Intrinsic::loongarch_lsx_vmsub_d:
5302 case Intrinsic::loongarch_lasx_xvmsub_b:
5303 case Intrinsic::loongarch_lasx_xvmsub_h:
5304 case Intrinsic::loongarch_lasx_xvmsub_w:
5305 case Intrinsic::loongarch_lasx_xvmsub_d: {
5306 EVT ResTy = N->getValueType(ResNo: 0);
5307 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1),
5308 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2),
5309 N2: N->getOperand(Num: 3)));
5310 }
5311 case Intrinsic::loongarch_lsx_vdiv_b:
5312 case Intrinsic::loongarch_lsx_vdiv_h:
5313 case Intrinsic::loongarch_lsx_vdiv_w:
5314 case Intrinsic::loongarch_lsx_vdiv_d:
5315 case Intrinsic::loongarch_lasx_xvdiv_b:
5316 case Intrinsic::loongarch_lasx_xvdiv_h:
5317 case Intrinsic::loongarch_lasx_xvdiv_w:
5318 case Intrinsic::loongarch_lasx_xvdiv_d:
5319 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5320 N2: N->getOperand(Num: 2));
5321 case Intrinsic::loongarch_lsx_vdiv_bu:
5322 case Intrinsic::loongarch_lsx_vdiv_hu:
5323 case Intrinsic::loongarch_lsx_vdiv_wu:
5324 case Intrinsic::loongarch_lsx_vdiv_du:
5325 case Intrinsic::loongarch_lasx_xvdiv_bu:
5326 case Intrinsic::loongarch_lasx_xvdiv_hu:
5327 case Intrinsic::loongarch_lasx_xvdiv_wu:
5328 case Intrinsic::loongarch_lasx_xvdiv_du:
5329 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5330 N2: N->getOperand(Num: 2));
5331 case Intrinsic::loongarch_lsx_vmod_b:
5332 case Intrinsic::loongarch_lsx_vmod_h:
5333 case Intrinsic::loongarch_lsx_vmod_w:
5334 case Intrinsic::loongarch_lsx_vmod_d:
5335 case Intrinsic::loongarch_lasx_xvmod_b:
5336 case Intrinsic::loongarch_lasx_xvmod_h:
5337 case Intrinsic::loongarch_lasx_xvmod_w:
5338 case Intrinsic::loongarch_lasx_xvmod_d:
5339 return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5340 N2: N->getOperand(Num: 2));
5341 case Intrinsic::loongarch_lsx_vmod_bu:
5342 case Intrinsic::loongarch_lsx_vmod_hu:
5343 case Intrinsic::loongarch_lsx_vmod_wu:
5344 case Intrinsic::loongarch_lsx_vmod_du:
5345 case Intrinsic::loongarch_lasx_xvmod_bu:
5346 case Intrinsic::loongarch_lasx_xvmod_hu:
5347 case Intrinsic::loongarch_lasx_xvmod_wu:
5348 case Intrinsic::loongarch_lasx_xvmod_du:
5349 return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5350 N2: N->getOperand(Num: 2));
5351 case Intrinsic::loongarch_lsx_vand_v:
5352 case Intrinsic::loongarch_lasx_xvand_v:
5353 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5354 N2: N->getOperand(Num: 2));
5355 case Intrinsic::loongarch_lsx_vor_v:
5356 case Intrinsic::loongarch_lasx_xvor_v:
5357 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5358 N2: N->getOperand(Num: 2));
5359 case Intrinsic::loongarch_lsx_vxor_v:
5360 case Intrinsic::loongarch_lasx_xvxor_v:
5361 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5362 N2: N->getOperand(Num: 2));
5363 case Intrinsic::loongarch_lsx_vnor_v:
5364 case Intrinsic::loongarch_lasx_xvnor_v: {
5365 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5366 N2: N->getOperand(Num: 2));
5367 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
5368 }
5369 case Intrinsic::loongarch_lsx_vandi_b:
5370 case Intrinsic::loongarch_lasx_xvandi_b:
5371 return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5372 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
5373 case Intrinsic::loongarch_lsx_vori_b:
5374 case Intrinsic::loongarch_lasx_xvori_b:
5375 return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5376 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
5377 case Intrinsic::loongarch_lsx_vxori_b:
5378 case Intrinsic::loongarch_lasx_xvxori_b:
5379 return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5380 N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG));
5381 case Intrinsic::loongarch_lsx_vsll_b:
5382 case Intrinsic::loongarch_lsx_vsll_h:
5383 case Intrinsic::loongarch_lsx_vsll_w:
5384 case Intrinsic::loongarch_lsx_vsll_d:
5385 case Intrinsic::loongarch_lasx_xvsll_b:
5386 case Intrinsic::loongarch_lasx_xvsll_h:
5387 case Intrinsic::loongarch_lasx_xvsll_w:
5388 case Intrinsic::loongarch_lasx_xvsll_d:
5389 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5390 N2: truncateVecElts(Node: N, DAG));
5391 case Intrinsic::loongarch_lsx_vslli_b:
5392 case Intrinsic::loongarch_lasx_xvslli_b:
5393 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5394 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
5395 case Intrinsic::loongarch_lsx_vslli_h:
5396 case Intrinsic::loongarch_lasx_xvslli_h:
5397 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5398 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
5399 case Intrinsic::loongarch_lsx_vslli_w:
5400 case Intrinsic::loongarch_lasx_xvslli_w:
5401 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5402 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5403 case Intrinsic::loongarch_lsx_vslli_d:
5404 case Intrinsic::loongarch_lasx_xvslli_d:
5405 return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5406 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
5407 case Intrinsic::loongarch_lsx_vsrl_b:
5408 case Intrinsic::loongarch_lsx_vsrl_h:
5409 case Intrinsic::loongarch_lsx_vsrl_w:
5410 case Intrinsic::loongarch_lsx_vsrl_d:
5411 case Intrinsic::loongarch_lasx_xvsrl_b:
5412 case Intrinsic::loongarch_lasx_xvsrl_h:
5413 case Intrinsic::loongarch_lasx_xvsrl_w:
5414 case Intrinsic::loongarch_lasx_xvsrl_d:
5415 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5416 N2: truncateVecElts(Node: N, DAG));
5417 case Intrinsic::loongarch_lsx_vsrli_b:
5418 case Intrinsic::loongarch_lasx_xvsrli_b:
5419 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5420 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
5421 case Intrinsic::loongarch_lsx_vsrli_h:
5422 case Intrinsic::loongarch_lasx_xvsrli_h:
5423 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5424 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
5425 case Intrinsic::loongarch_lsx_vsrli_w:
5426 case Intrinsic::loongarch_lasx_xvsrli_w:
5427 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5428 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5429 case Intrinsic::loongarch_lsx_vsrli_d:
5430 case Intrinsic::loongarch_lasx_xvsrli_d:
5431 return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5432 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
5433 case Intrinsic::loongarch_lsx_vsra_b:
5434 case Intrinsic::loongarch_lsx_vsra_h:
5435 case Intrinsic::loongarch_lsx_vsra_w:
5436 case Intrinsic::loongarch_lsx_vsra_d:
5437 case Intrinsic::loongarch_lasx_xvsra_b:
5438 case Intrinsic::loongarch_lasx_xvsra_h:
5439 case Intrinsic::loongarch_lasx_xvsra_w:
5440 case Intrinsic::loongarch_lasx_xvsra_d:
5441 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5442 N2: truncateVecElts(Node: N, DAG));
5443 case Intrinsic::loongarch_lsx_vsrai_b:
5444 case Intrinsic::loongarch_lasx_xvsrai_b:
5445 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5446 N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG));
5447 case Intrinsic::loongarch_lsx_vsrai_h:
5448 case Intrinsic::loongarch_lasx_xvsrai_h:
5449 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5450 N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG));
5451 case Intrinsic::loongarch_lsx_vsrai_w:
5452 case Intrinsic::loongarch_lasx_xvsrai_w:
5453 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5454 N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG));
5455 case Intrinsic::loongarch_lsx_vsrai_d:
5456 case Intrinsic::loongarch_lasx_xvsrai_d:
5457 return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5458 N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG));
5459 case Intrinsic::loongarch_lsx_vclz_b:
5460 case Intrinsic::loongarch_lsx_vclz_h:
5461 case Intrinsic::loongarch_lsx_vclz_w:
5462 case Intrinsic::loongarch_lsx_vclz_d:
5463 case Intrinsic::loongarch_lasx_xvclz_b:
5464 case Intrinsic::loongarch_lasx_xvclz_h:
5465 case Intrinsic::loongarch_lasx_xvclz_w:
5466 case Intrinsic::loongarch_lasx_xvclz_d:
5467 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
5468 case Intrinsic::loongarch_lsx_vpcnt_b:
5469 case Intrinsic::loongarch_lsx_vpcnt_h:
5470 case Intrinsic::loongarch_lsx_vpcnt_w:
5471 case Intrinsic::loongarch_lsx_vpcnt_d:
5472 case Intrinsic::loongarch_lasx_xvpcnt_b:
5473 case Intrinsic::loongarch_lasx_xvpcnt_h:
5474 case Intrinsic::loongarch_lasx_xvpcnt_w:
5475 case Intrinsic::loongarch_lasx_xvpcnt_d:
5476 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1));
5477 case Intrinsic::loongarch_lsx_vbitclr_b:
5478 case Intrinsic::loongarch_lsx_vbitclr_h:
5479 case Intrinsic::loongarch_lsx_vbitclr_w:
5480 case Intrinsic::loongarch_lsx_vbitclr_d:
5481 case Intrinsic::loongarch_lasx_xvbitclr_b:
5482 case Intrinsic::loongarch_lasx_xvbitclr_h:
5483 case Intrinsic::loongarch_lasx_xvbitclr_w:
5484 case Intrinsic::loongarch_lasx_xvbitclr_d:
5485 return lowerVectorBitClear(Node: N, DAG);
5486 case Intrinsic::loongarch_lsx_vbitclri_b:
5487 case Intrinsic::loongarch_lasx_xvbitclri_b:
5488 return lowerVectorBitClearImm<3>(Node: N, DAG);
5489 case Intrinsic::loongarch_lsx_vbitclri_h:
5490 case Intrinsic::loongarch_lasx_xvbitclri_h:
5491 return lowerVectorBitClearImm<4>(Node: N, DAG);
5492 case Intrinsic::loongarch_lsx_vbitclri_w:
5493 case Intrinsic::loongarch_lasx_xvbitclri_w:
5494 return lowerVectorBitClearImm<5>(Node: N, DAG);
5495 case Intrinsic::loongarch_lsx_vbitclri_d:
5496 case Intrinsic::loongarch_lasx_xvbitclri_d:
5497 return lowerVectorBitClearImm<6>(Node: N, DAG);
5498 case Intrinsic::loongarch_lsx_vbitset_b:
5499 case Intrinsic::loongarch_lsx_vbitset_h:
5500 case Intrinsic::loongarch_lsx_vbitset_w:
5501 case Intrinsic::loongarch_lsx_vbitset_d:
5502 case Intrinsic::loongarch_lasx_xvbitset_b:
5503 case Intrinsic::loongarch_lasx_xvbitset_h:
5504 case Intrinsic::loongarch_lasx_xvbitset_w:
5505 case Intrinsic::loongarch_lasx_xvbitset_d: {
5506 EVT VecTy = N->getValueType(ResNo: 0);
5507 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
5508 return DAG.getNode(
5509 Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
5510 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
5511 }
5512 case Intrinsic::loongarch_lsx_vbitseti_b:
5513 case Intrinsic::loongarch_lasx_xvbitseti_b:
5514 return lowerVectorBitSetImm<3>(Node: N, DAG);
5515 case Intrinsic::loongarch_lsx_vbitseti_h:
5516 case Intrinsic::loongarch_lasx_xvbitseti_h:
5517 return lowerVectorBitSetImm<4>(Node: N, DAG);
5518 case Intrinsic::loongarch_lsx_vbitseti_w:
5519 case Intrinsic::loongarch_lasx_xvbitseti_w:
5520 return lowerVectorBitSetImm<5>(Node: N, DAG);
5521 case Intrinsic::loongarch_lsx_vbitseti_d:
5522 case Intrinsic::loongarch_lasx_xvbitseti_d:
5523 return lowerVectorBitSetImm<6>(Node: N, DAG);
5524 case Intrinsic::loongarch_lsx_vbitrev_b:
5525 case Intrinsic::loongarch_lsx_vbitrev_h:
5526 case Intrinsic::loongarch_lsx_vbitrev_w:
5527 case Intrinsic::loongarch_lsx_vbitrev_d:
5528 case Intrinsic::loongarch_lasx_xvbitrev_b:
5529 case Intrinsic::loongarch_lasx_xvbitrev_h:
5530 case Intrinsic::loongarch_lasx_xvbitrev_w:
5531 case Intrinsic::loongarch_lasx_xvbitrev_d: {
5532 EVT VecTy = N->getValueType(ResNo: 0);
5533 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
5534 return DAG.getNode(
5535 Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1),
5536 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG)));
5537 }
5538 case Intrinsic::loongarch_lsx_vbitrevi_b:
5539 case Intrinsic::loongarch_lasx_xvbitrevi_b:
5540 return lowerVectorBitRevImm<3>(Node: N, DAG);
5541 case Intrinsic::loongarch_lsx_vbitrevi_h:
5542 case Intrinsic::loongarch_lasx_xvbitrevi_h:
5543 return lowerVectorBitRevImm<4>(Node: N, DAG);
5544 case Intrinsic::loongarch_lsx_vbitrevi_w:
5545 case Intrinsic::loongarch_lasx_xvbitrevi_w:
5546 return lowerVectorBitRevImm<5>(Node: N, DAG);
5547 case Intrinsic::loongarch_lsx_vbitrevi_d:
5548 case Intrinsic::loongarch_lasx_xvbitrevi_d:
5549 return lowerVectorBitRevImm<6>(Node: N, DAG);
5550 case Intrinsic::loongarch_lsx_vfadd_s:
5551 case Intrinsic::loongarch_lsx_vfadd_d:
5552 case Intrinsic::loongarch_lasx_xvfadd_s:
5553 case Intrinsic::loongarch_lasx_xvfadd_d:
5554 return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5555 N2: N->getOperand(Num: 2));
5556 case Intrinsic::loongarch_lsx_vfsub_s:
5557 case Intrinsic::loongarch_lsx_vfsub_d:
5558 case Intrinsic::loongarch_lasx_xvfsub_s:
5559 case Intrinsic::loongarch_lasx_xvfsub_d:
5560 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5561 N2: N->getOperand(Num: 2));
5562 case Intrinsic::loongarch_lsx_vfmul_s:
5563 case Intrinsic::loongarch_lsx_vfmul_d:
5564 case Intrinsic::loongarch_lasx_xvfmul_s:
5565 case Intrinsic::loongarch_lasx_xvfmul_d:
5566 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5567 N2: N->getOperand(Num: 2));
5568 case Intrinsic::loongarch_lsx_vfdiv_s:
5569 case Intrinsic::loongarch_lsx_vfdiv_d:
5570 case Intrinsic::loongarch_lasx_xvfdiv_s:
5571 case Intrinsic::loongarch_lasx_xvfdiv_d:
5572 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5573 N2: N->getOperand(Num: 2));
5574 case Intrinsic::loongarch_lsx_vfmadd_s:
5575 case Intrinsic::loongarch_lsx_vfmadd_d:
5576 case Intrinsic::loongarch_lasx_xvfmadd_s:
5577 case Intrinsic::loongarch_lasx_xvfmadd_d:
5578 return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1),
5579 N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3));
5580 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
5581 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
5582 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
5583 N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget));
5584 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
5585 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
5586 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
5587 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
5588 N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget));
5589 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
5590 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
5591 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
5592 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
5593 N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget));
5594 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
5595 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
5596 N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2),
5597 N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget));
5598 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
5599 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
5600 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
5601 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
5602 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
5603 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
5604 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
5605 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
5606 return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: 0),
5607 Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
5608 Operand: N->getOperand(Num: 1)));
5609 case Intrinsic::loongarch_lsx_vreplve_b:
5610 case Intrinsic::loongarch_lsx_vreplve_h:
5611 case Intrinsic::loongarch_lsx_vreplve_w:
5612 case Intrinsic::loongarch_lsx_vreplve_d:
5613 case Intrinsic::loongarch_lasx_xvreplve_b:
5614 case Intrinsic::loongarch_lasx_xvreplve_h:
5615 case Intrinsic::loongarch_lasx_xvreplve_w:
5616 case Intrinsic::loongarch_lasx_xvreplve_d:
5617 return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0),
5618 N1: N->getOperand(Num: 1),
5619 N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(),
5620 Operand: N->getOperand(Num: 2)));
5621 }
5622 return SDValue();
5623}
5624
5625static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG,
5626 TargetLowering::DAGCombinerInfo &DCI,
5627 const LoongArchSubtarget &Subtarget) {
5628 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
5629 // conversion is unnecessary and can be replaced with the
5630 // MOVFR2GR_S_LA64 operand.
5631 SDValue Op0 = N->getOperand(Num: 0);
5632 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
5633 return Op0.getOperand(i: 0);
5634 return SDValue();
5635}
5636
5637static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
5638 TargetLowering::DAGCombinerInfo &DCI,
5639 const LoongArchSubtarget &Subtarget) {
5640 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
5641 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
5642 // operand.
5643 SDValue Op0 = N->getOperand(Num: 0);
5644 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
5645 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
5646 "Unexpected value type!");
5647 return Op0.getOperand(i: 0);
5648 }
5649 return SDValue();
5650}
5651
5652static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
5653 TargetLowering::DAGCombinerInfo &DCI,
5654 const LoongArchSubtarget &Subtarget) {
5655 MVT VT = N->getSimpleValueType(ResNo: 0);
5656 unsigned NumBits = VT.getScalarSizeInBits();
5657
5658 // Simplify the inputs.
5659 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5660 APInt DemandedMask(APInt::getAllOnes(numBits: NumBits));
5661 if (TLI.SimplifyDemandedBits(Op: SDValue(N, 0), DemandedBits: DemandedMask, DCI))
5662 return SDValue(N, 0);
5663
5664 return SDValue();
5665}
5666
5667static SDValue
5668performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG,
5669 TargetLowering::DAGCombinerInfo &DCI,
5670 const LoongArchSubtarget &Subtarget) {
5671 SDValue Op0 = N->getOperand(Num: 0);
5672 SDLoc DL(N);
5673
5674 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
5675 // redundant. Instead, use BuildPairF64's operands directly.
5676 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
5677 return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1));
5678
5679 if (Op0->isUndef()) {
5680 SDValue Lo = DAG.getUNDEF(VT: MVT::i32);
5681 SDValue Hi = DAG.getUNDEF(VT: MVT::i32);
5682 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
5683 }
5684
5685 // It's cheaper to materialise two 32-bit integers than to load a double
5686 // from the constant pool and transfer it to integer registers through the
5687 // stack.
5688 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) {
5689 APInt V = C->getValueAPF().bitcastToAPInt();
5690 SDValue Lo = DAG.getConstant(Val: V.trunc(width: 32), DL, VT: MVT::i32);
5691 SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32);
5692 return DCI.CombineTo(N, Res0: Lo, Res1: Hi);
5693 }
5694
5695 return SDValue();
5696}
5697
5698SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
5699 DAGCombinerInfo &DCI) const {
5700 SelectionDAG &DAG = DCI.DAG;
5701 switch (N->getOpcode()) {
5702 default:
5703 break;
5704 case ISD::AND:
5705 return performANDCombine(N, DAG, DCI, Subtarget);
5706 case ISD::OR:
5707 return performORCombine(N, DAG, DCI, Subtarget);
5708 case ISD::SETCC:
5709 return performSETCCCombine(N, DAG, DCI, Subtarget);
5710 case ISD::SRL:
5711 return performSRLCombine(N, DAG, DCI, Subtarget);
5712 case ISD::BITCAST:
5713 return performBITCASTCombine(N, DAG, DCI, Subtarget);
5714 case LoongArchISD::BITREV_W:
5715 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
5716 case ISD::INTRINSIC_WO_CHAIN:
5717 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
5718 case LoongArchISD::MOVGR2FR_W_LA64:
5719 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
5720 case LoongArchISD::MOVFR2GR_S_LA64:
5721 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
5722 case LoongArchISD::VMSKLTZ:
5723 case LoongArchISD::XVMSKLTZ:
5724 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
5725 case LoongArchISD::SPLIT_PAIR_F64:
5726 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
5727 }
5728 return SDValue();
5729}
5730
5731static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
5732 MachineBasicBlock *MBB) {
5733 if (!ZeroDivCheck)
5734 return MBB;
5735
5736 // Build instructions:
5737 // MBB:
5738 // div(or mod) $dst, $dividend, $divisor
5739 // bne $divisor, $zero, SinkMBB
5740 // BreakMBB:
5741 // break 7 // BRK_DIVZERO
5742 // SinkMBB:
5743 // fallthrough
5744 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
5745 MachineFunction::iterator It = ++MBB->getIterator();
5746 MachineFunction *MF = MBB->getParent();
5747 auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
5748 auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
5749 MF->insert(MBBI: It, MBB: BreakMBB);
5750 MF->insert(MBBI: It, MBB: SinkMBB);
5751
5752 // Transfer the remainder of MBB and its successor edges to SinkMBB.
5753 SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end());
5754 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
5755
5756 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
5757 DebugLoc DL = MI.getDebugLoc();
5758 MachineOperand &Divisor = MI.getOperand(i: 2);
5759 Register DivisorReg = Divisor.getReg();
5760
5761 // MBB:
5762 BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE))
5763 .addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill()))
5764 .addReg(RegNo: LoongArch::R0)
5765 .addMBB(MBB: SinkMBB);
5766 MBB->addSuccessor(Succ: BreakMBB);
5767 MBB->addSuccessor(Succ: SinkMBB);
5768
5769 // BreakMBB:
5770 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
5771 // definition of BRK_DIVZERO.
5772 BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/);
5773 BreakMBB->addSuccessor(Succ: SinkMBB);
5774
5775 // Clear Divisor's kill flag.
5776 Divisor.setIsKill(false);
5777
5778 return SinkMBB;
5779}
5780
5781static MachineBasicBlock *
5782emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
5783 const LoongArchSubtarget &Subtarget) {
5784 unsigned CondOpc;
5785 switch (MI.getOpcode()) {
5786 default:
5787 llvm_unreachable("Unexpected opcode");
5788 case LoongArch::PseudoVBZ:
5789 CondOpc = LoongArch::VSETEQZ_V;
5790 break;
5791 case LoongArch::PseudoVBZ_B:
5792 CondOpc = LoongArch::VSETANYEQZ_B;
5793 break;
5794 case LoongArch::PseudoVBZ_H:
5795 CondOpc = LoongArch::VSETANYEQZ_H;
5796 break;
5797 case LoongArch::PseudoVBZ_W:
5798 CondOpc = LoongArch::VSETANYEQZ_W;
5799 break;
5800 case LoongArch::PseudoVBZ_D:
5801 CondOpc = LoongArch::VSETANYEQZ_D;
5802 break;
5803 case LoongArch::PseudoVBNZ:
5804 CondOpc = LoongArch::VSETNEZ_V;
5805 break;
5806 case LoongArch::PseudoVBNZ_B:
5807 CondOpc = LoongArch::VSETALLNEZ_B;
5808 break;
5809 case LoongArch::PseudoVBNZ_H:
5810 CondOpc = LoongArch::VSETALLNEZ_H;
5811 break;
5812 case LoongArch::PseudoVBNZ_W:
5813 CondOpc = LoongArch::VSETALLNEZ_W;
5814 break;
5815 case LoongArch::PseudoVBNZ_D:
5816 CondOpc = LoongArch::VSETALLNEZ_D;
5817 break;
5818 case LoongArch::PseudoXVBZ:
5819 CondOpc = LoongArch::XVSETEQZ_V;
5820 break;
5821 case LoongArch::PseudoXVBZ_B:
5822 CondOpc = LoongArch::XVSETANYEQZ_B;
5823 break;
5824 case LoongArch::PseudoXVBZ_H:
5825 CondOpc = LoongArch::XVSETANYEQZ_H;
5826 break;
5827 case LoongArch::PseudoXVBZ_W:
5828 CondOpc = LoongArch::XVSETANYEQZ_W;
5829 break;
5830 case LoongArch::PseudoXVBZ_D:
5831 CondOpc = LoongArch::XVSETANYEQZ_D;
5832 break;
5833 case LoongArch::PseudoXVBNZ:
5834 CondOpc = LoongArch::XVSETNEZ_V;
5835 break;
5836 case LoongArch::PseudoXVBNZ_B:
5837 CondOpc = LoongArch::XVSETALLNEZ_B;
5838 break;
5839 case LoongArch::PseudoXVBNZ_H:
5840 CondOpc = LoongArch::XVSETALLNEZ_H;
5841 break;
5842 case LoongArch::PseudoXVBNZ_W:
5843 CondOpc = LoongArch::XVSETALLNEZ_W;
5844 break;
5845 case LoongArch::PseudoXVBNZ_D:
5846 CondOpc = LoongArch::XVSETALLNEZ_D;
5847 break;
5848 }
5849
5850 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5851 const BasicBlock *LLVM_BB = BB->getBasicBlock();
5852 DebugLoc DL = MI.getDebugLoc();
5853 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5854 MachineFunction::iterator It = ++BB->getIterator();
5855
5856 MachineFunction *F = BB->getParent();
5857 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
5858 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
5859 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
5860
5861 F->insert(MBBI: It, MBB: FalseBB);
5862 F->insert(MBBI: It, MBB: TrueBB);
5863 F->insert(MBBI: It, MBB: SinkBB);
5864
5865 // Transfer the remainder of MBB and its successor edges to Sink.
5866 SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
5867 SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
5868
5869 // Insert the real instruction to BB.
5870 Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass);
5871 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg());
5872
5873 // Insert branch.
5874 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB);
5875 BB->addSuccessor(Succ: FalseBB);
5876 BB->addSuccessor(Succ: TrueBB);
5877
5878 // FalseBB.
5879 Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
5880 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1)
5881 .addReg(RegNo: LoongArch::R0)
5882 .addImm(Val: 0);
5883 BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB);
5884 FalseBB->addSuccessor(Succ: SinkBB);
5885
5886 // TrueBB.
5887 Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
5888 BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2)
5889 .addReg(RegNo: LoongArch::R0)
5890 .addImm(Val: 1);
5891 TrueBB->addSuccessor(Succ: SinkBB);
5892
5893 // SinkBB: merge the results.
5894 BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI),
5895 DestReg: MI.getOperand(i: 0).getReg())
5896 .addReg(RegNo: RD1)
5897 .addMBB(MBB: FalseBB)
5898 .addReg(RegNo: RD2)
5899 .addMBB(MBB: TrueBB);
5900
5901 // The pseudo instruction is gone now.
5902 MI.eraseFromParent();
5903 return SinkBB;
5904}
5905
5906static MachineBasicBlock *
5907emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
5908 const LoongArchSubtarget &Subtarget) {
5909 unsigned InsOp;
5910 unsigned HalfSize;
5911 switch (MI.getOpcode()) {
5912 default:
5913 llvm_unreachable("Unexpected opcode");
5914 case LoongArch::PseudoXVINSGR2VR_B:
5915 HalfSize = 16;
5916 InsOp = LoongArch::VINSGR2VR_B;
5917 break;
5918 case LoongArch::PseudoXVINSGR2VR_H:
5919 HalfSize = 8;
5920 InsOp = LoongArch::VINSGR2VR_H;
5921 break;
5922 }
5923 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5924 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
5925 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
5926 DebugLoc DL = MI.getDebugLoc();
5927 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5928 // XDst = vector_insert XSrc, Elt, Idx
5929 Register XDst = MI.getOperand(i: 0).getReg();
5930 Register XSrc = MI.getOperand(i: 1).getReg();
5931 Register Elt = MI.getOperand(i: 2).getReg();
5932 unsigned Idx = MI.getOperand(i: 3).getImm();
5933
5934 Register ScratchReg1 = XSrc;
5935 if (Idx >= HalfSize) {
5936 ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
5937 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1)
5938 .addReg(RegNo: XSrc)
5939 .addReg(RegNo: XSrc)
5940 .addImm(Val: 1);
5941 }
5942
5943 Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC);
5944 Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC);
5945 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1)
5946 .addReg(RegNo: ScratchReg1, flags: 0, SubReg: LoongArch::sub_128);
5947 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2)
5948 .addReg(RegNo: ScratchSubReg1)
5949 .addReg(RegNo: Elt)
5950 .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx);
5951
5952 Register ScratchReg2 = XDst;
5953 if (Idx >= HalfSize)
5954 ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
5955
5956 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2)
5957 .addImm(Val: 0)
5958 .addReg(RegNo: ScratchSubReg2)
5959 .addImm(Val: LoongArch::sub_128);
5960
5961 if (Idx >= HalfSize)
5962 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst)
5963 .addReg(RegNo: XSrc)
5964 .addReg(RegNo: ScratchReg2)
5965 .addImm(Val: 2);
5966
5967 MI.eraseFromParent();
5968 return BB;
5969}
5970
5971static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
5972 MachineBasicBlock *BB,
5973 const LoongArchSubtarget &Subtarget) {
5974 assert(Subtarget.hasExtLSX());
5975 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5976 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
5977 DebugLoc DL = MI.getDebugLoc();
5978 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5979 Register Dst = MI.getOperand(i: 0).getReg();
5980 Register Src = MI.getOperand(i: 1).getReg();
5981 Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC);
5982 Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC);
5983 Register ScratchReg3 = MRI.createVirtualRegister(RegClass: RC);
5984
5985 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VLDI), DestReg: ScratchReg1).addImm(Val: 0);
5986 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
5987 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
5988 : LoongArch::VINSGR2VR_W),
5989 DestReg: ScratchReg2)
5990 .addReg(RegNo: ScratchReg1)
5991 .addReg(RegNo: Src)
5992 .addImm(Val: 0);
5993 BuildMI(
5994 BB&: *BB, I&: MI, MIMD: DL,
5995 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
5996 DestReg: ScratchReg3)
5997 .addReg(RegNo: ScratchReg2);
5998 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
5999 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6000 : LoongArch::VPICKVE2GR_W),
6001 DestReg: Dst)
6002 .addReg(RegNo: ScratchReg3)
6003 .addImm(Val: 0);
6004
6005 MI.eraseFromParent();
6006 return BB;
6007}
6008
6009static MachineBasicBlock *
6010emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
6011 const LoongArchSubtarget &Subtarget) {
6012 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6013 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6014 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6015 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6016 Register Dst = MI.getOperand(i: 0).getReg();
6017 Register Src = MI.getOperand(i: 1).getReg();
6018 DebugLoc DL = MI.getDebugLoc();
6019 unsigned EleBits = 8;
6020 unsigned NotOpc = 0;
6021 unsigned MskOpc;
6022
6023 switch (MI.getOpcode()) {
6024 default:
6025 llvm_unreachable("Unexpected opcode");
6026 case LoongArch::PseudoVMSKLTZ_B:
6027 MskOpc = LoongArch::VMSKLTZ_B;
6028 break;
6029 case LoongArch::PseudoVMSKLTZ_H:
6030 MskOpc = LoongArch::VMSKLTZ_H;
6031 EleBits = 16;
6032 break;
6033 case LoongArch::PseudoVMSKLTZ_W:
6034 MskOpc = LoongArch::VMSKLTZ_W;
6035 EleBits = 32;
6036 break;
6037 case LoongArch::PseudoVMSKLTZ_D:
6038 MskOpc = LoongArch::VMSKLTZ_D;
6039 EleBits = 64;
6040 break;
6041 case LoongArch::PseudoVMSKGEZ_B:
6042 MskOpc = LoongArch::VMSKGEZ_B;
6043 break;
6044 case LoongArch::PseudoVMSKEQZ_B:
6045 MskOpc = LoongArch::VMSKNZ_B;
6046 NotOpc = LoongArch::VNOR_V;
6047 break;
6048 case LoongArch::PseudoVMSKNEZ_B:
6049 MskOpc = LoongArch::VMSKNZ_B;
6050 break;
6051 case LoongArch::PseudoXVMSKLTZ_B:
6052 MskOpc = LoongArch::XVMSKLTZ_B;
6053 RC = &LoongArch::LASX256RegClass;
6054 break;
6055 case LoongArch::PseudoXVMSKLTZ_H:
6056 MskOpc = LoongArch::XVMSKLTZ_H;
6057 RC = &LoongArch::LASX256RegClass;
6058 EleBits = 16;
6059 break;
6060 case LoongArch::PseudoXVMSKLTZ_W:
6061 MskOpc = LoongArch::XVMSKLTZ_W;
6062 RC = &LoongArch::LASX256RegClass;
6063 EleBits = 32;
6064 break;
6065 case LoongArch::PseudoXVMSKLTZ_D:
6066 MskOpc = LoongArch::XVMSKLTZ_D;
6067 RC = &LoongArch::LASX256RegClass;
6068 EleBits = 64;
6069 break;
6070 case LoongArch::PseudoXVMSKGEZ_B:
6071 MskOpc = LoongArch::XVMSKGEZ_B;
6072 RC = &LoongArch::LASX256RegClass;
6073 break;
6074 case LoongArch::PseudoXVMSKEQZ_B:
6075 MskOpc = LoongArch::XVMSKNZ_B;
6076 NotOpc = LoongArch::XVNOR_V;
6077 RC = &LoongArch::LASX256RegClass;
6078 break;
6079 case LoongArch::PseudoXVMSKNEZ_B:
6080 MskOpc = LoongArch::XVMSKNZ_B;
6081 RC = &LoongArch::LASX256RegClass;
6082 break;
6083 }
6084
6085 Register Msk = MRI.createVirtualRegister(RegClass: RC);
6086 if (NotOpc) {
6087 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
6088 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src);
6089 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk)
6090 .addReg(RegNo: Tmp, flags: RegState::Kill)
6091 .addReg(RegNo: Tmp, flags: RegState::Kill);
6092 } else {
6093 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src);
6094 }
6095
6096 if (TRI->getRegSizeInBits(RC: *RC) > 128) {
6097 Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6098 Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6099 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo)
6100 .addReg(RegNo: Msk)
6101 .addImm(Val: 0);
6102 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi)
6103 .addReg(RegNo: Msk, flags: RegState::Kill)
6104 .addImm(Val: 4);
6105 BuildMI(BB&: *BB, I&: MI, MIMD: DL,
6106 MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6107 : LoongArch::BSTRINS_W),
6108 DestReg: Dst)
6109 .addReg(RegNo: Lo, flags: RegState::Kill)
6110 .addReg(RegNo: Hi, flags: RegState::Kill)
6111 .addImm(Val: 256 / EleBits - 1)
6112 .addImm(Val: 128 / EleBits);
6113 } else {
6114 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst)
6115 .addReg(RegNo: Msk, flags: RegState::Kill)
6116 .addImm(Val: 0);
6117 }
6118
6119 MI.eraseFromParent();
6120 return BB;
6121}
6122
6123static MachineBasicBlock *
6124emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6125 const LoongArchSubtarget &Subtarget) {
6126 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6127 "Unexpected instruction");
6128
6129 MachineFunction &MF = *BB->getParent();
6130 DebugLoc DL = MI.getDebugLoc();
6131 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6132 Register LoReg = MI.getOperand(i: 0).getReg();
6133 Register HiReg = MI.getOperand(i: 1).getReg();
6134 Register SrcReg = MI.getOperand(i: 2).getReg();
6135
6136 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg);
6137 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg)
6138 .addReg(RegNo: SrcReg, flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
6139 MI.eraseFromParent(); // The pseudo instruction is gone now.
6140 return BB;
6141}
6142
6143static MachineBasicBlock *
6144emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB,
6145 const LoongArchSubtarget &Subtarget) {
6146 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6147 "Unexpected instruction");
6148
6149 MachineFunction &MF = *BB->getParent();
6150 DebugLoc DL = MI.getDebugLoc();
6151 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
6152 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
6153 Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass);
6154 Register DstReg = MI.getOperand(i: 0).getReg();
6155 Register LoReg = MI.getOperand(i: 1).getReg();
6156 Register HiReg = MI.getOperand(i: 2).getReg();
6157
6158 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg)
6159 .addReg(RegNo: LoReg, flags: getKillRegState(B: MI.getOperand(i: 1).isKill()));
6160 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg)
6161 .addReg(RegNo: TmpReg, flags: RegState::Kill)
6162 .addReg(RegNo: HiReg, flags: getKillRegState(B: MI.getOperand(i: 2).isKill()));
6163 MI.eraseFromParent(); // The pseudo instruction is gone now.
6164 return BB;
6165}
6166
6167static bool isSelectPseudo(MachineInstr &MI) {
6168 switch (MI.getOpcode()) {
6169 default:
6170 return false;
6171 case LoongArch::Select_GPR_Using_CC_GPR:
6172 return true;
6173 }
6174}
6175
6176static MachineBasicBlock *
6177emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB,
6178 const LoongArchSubtarget &Subtarget) {
6179 // To "insert" Select_* instructions, we actually have to insert the triangle
6180 // control-flow pattern. The incoming instructions know the destination vreg
6181 // to set, the condition code register to branch on, the true/false values to
6182 // select between, and the condcode to use to select the appropriate branch.
6183 //
6184 // We produce the following control flow:
6185 // HeadMBB
6186 // | \
6187 // | IfFalseMBB
6188 // | /
6189 // TailMBB
6190 //
6191 // When we find a sequence of selects we attempt to optimize their emission
6192 // by sharing the control flow. Currently we only handle cases where we have
6193 // multiple selects with the exact same condition (same LHS, RHS and CC).
6194 // The selects may be interleaved with other instructions if the other
6195 // instructions meet some requirements we deem safe:
6196 // - They are not pseudo instructions.
6197 // - They are debug instructions. Otherwise,
6198 // - They do not have side-effects, do not access memory and their inputs do
6199 // not depend on the results of the select pseudo-instructions.
6200 // The TrueV/FalseV operands of the selects cannot depend on the result of
6201 // previous selects in the sequence.
6202 // These conditions could be further relaxed. See the X86 target for a
6203 // related approach and more information.
6204
6205 Register LHS = MI.getOperand(i: 1).getReg();
6206 Register RHS;
6207 if (MI.getOperand(i: 2).isReg())
6208 RHS = MI.getOperand(i: 2).getReg();
6209 auto CC = static_cast<unsigned>(MI.getOperand(i: 3).getImm());
6210
6211 SmallVector<MachineInstr *, 4> SelectDebugValues;
6212 SmallSet<Register, 4> SelectDests;
6213 SelectDests.insert(V: MI.getOperand(i: 0).getReg());
6214
6215 MachineInstr *LastSelectPseudo = &MI;
6216 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6217 SequenceMBBI != E; ++SequenceMBBI) {
6218 if (SequenceMBBI->isDebugInstr())
6219 continue;
6220 if (isSelectPseudo(MI&: *SequenceMBBI)) {
6221 if (SequenceMBBI->getOperand(i: 1).getReg() != LHS ||
6222 !SequenceMBBI->getOperand(i: 2).isReg() ||
6223 SequenceMBBI->getOperand(i: 2).getReg() != RHS ||
6224 SequenceMBBI->getOperand(i: 3).getImm() != CC ||
6225 SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) ||
6226 SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg()))
6227 break;
6228 LastSelectPseudo = &*SequenceMBBI;
6229 SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues);
6230 SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg());
6231 continue;
6232 }
6233 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6234 SequenceMBBI->mayLoadOrStore() ||
6235 SequenceMBBI->usesCustomInsertionHook())
6236 break;
6237 if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) {
6238 return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg());
6239 }))
6240 break;
6241 }
6242
6243 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6244 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6245 DebugLoc DL = MI.getDebugLoc();
6246 MachineFunction::iterator I = ++BB->getIterator();
6247
6248 MachineBasicBlock *HeadMBB = BB;
6249 MachineFunction *F = BB->getParent();
6250 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
6251 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
6252
6253 F->insert(MBBI: I, MBB: IfFalseMBB);
6254 F->insert(MBBI: I, MBB: TailMBB);
6255
6256 // Set the call frame size on entry to the new basic blocks.
6257 unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo);
6258 IfFalseMBB->setCallFrameSize(CallFrameSize);
6259 TailMBB->setCallFrameSize(CallFrameSize);
6260
6261 // Transfer debug instructions associated with the selects to TailMBB.
6262 for (MachineInstr *DebugInstr : SelectDebugValues) {
6263 TailMBB->push_back(MI: DebugInstr->removeFromParent());
6264 }
6265
6266 // Move all instructions after the sequence to TailMBB.
6267 TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB,
6268 From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end());
6269 // Update machine-CFG edges by transferring all successors of the current
6270 // block to the new block which will contain the Phi nodes for the selects.
6271 TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB);
6272 // Set the successors for HeadMBB.
6273 HeadMBB->addSuccessor(Succ: IfFalseMBB);
6274 HeadMBB->addSuccessor(Succ: TailMBB);
6275
6276 // Insert appropriate branch.
6277 if (MI.getOperand(i: 2).isImm())
6278 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC))
6279 .addReg(RegNo: LHS)
6280 .addImm(Val: MI.getOperand(i: 2).getImm())
6281 .addMBB(MBB: TailMBB);
6282 else
6283 BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB);
6284
6285 // IfFalseMBB just falls through to TailMBB.
6286 IfFalseMBB->addSuccessor(Succ: TailMBB);
6287
6288 // Create PHIs for all of the select pseudo-instructions.
6289 auto SelectMBBI = MI.getIterator();
6290 auto SelectEnd = std::next(x: LastSelectPseudo->getIterator());
6291 auto InsertionPoint = TailMBB->begin();
6292 while (SelectMBBI != SelectEnd) {
6293 auto Next = std::next(x: SelectMBBI);
6294 if (isSelectPseudo(MI&: *SelectMBBI)) {
6295 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6296 BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI->getDebugLoc(),
6297 MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI->getOperand(i: 0).getReg())
6298 .addReg(RegNo: SelectMBBI->getOperand(i: 4).getReg())
6299 .addMBB(MBB: HeadMBB)
6300 .addReg(RegNo: SelectMBBI->getOperand(i: 5).getReg())
6301 .addMBB(MBB: IfFalseMBB);
6302 SelectMBBI->eraseFromParent();
6303 }
6304 SelectMBBI = Next;
6305 }
6306
6307 F->getProperties().resetNoPHIs();
6308 return TailMBB;
6309}
6310
6311MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6312 MachineInstr &MI, MachineBasicBlock *BB) const {
6313 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6314 DebugLoc DL = MI.getDebugLoc();
6315
6316 switch (MI.getOpcode()) {
6317 default:
6318 llvm_unreachable("Unexpected instr type to insert");
6319 case LoongArch::DIV_W:
6320 case LoongArch::DIV_WU:
6321 case LoongArch::MOD_W:
6322 case LoongArch::MOD_WU:
6323 case LoongArch::DIV_D:
6324 case LoongArch::DIV_DU:
6325 case LoongArch::MOD_D:
6326 case LoongArch::MOD_DU:
6327 return insertDivByZeroTrap(MI, MBB: BB);
6328 break;
6329 case LoongArch::WRFCSR: {
6330 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR),
6331 DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm())
6332 .addReg(RegNo: MI.getOperand(i: 1).getReg());
6333 MI.eraseFromParent();
6334 return BB;
6335 }
6336 case LoongArch::RDFCSR: {
6337 MachineInstr *ReadFCSR =
6338 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR),
6339 DestReg: MI.getOperand(i: 0).getReg())
6340 .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm());
6341 ReadFCSR->getOperand(i: 1).setIsUndef();
6342 MI.eraseFromParent();
6343 return BB;
6344 }
6345 case LoongArch::Select_GPR_Using_CC_GPR:
6346 return emitSelectPseudo(MI, BB, Subtarget);
6347 case LoongArch::BuildPairF64Pseudo:
6348 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6349 case LoongArch::SplitPairF64Pseudo:
6350 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6351 case LoongArch::PseudoVBZ:
6352 case LoongArch::PseudoVBZ_B:
6353 case LoongArch::PseudoVBZ_H:
6354 case LoongArch::PseudoVBZ_W:
6355 case LoongArch::PseudoVBZ_D:
6356 case LoongArch::PseudoVBNZ:
6357 case LoongArch::PseudoVBNZ_B:
6358 case LoongArch::PseudoVBNZ_H:
6359 case LoongArch::PseudoVBNZ_W:
6360 case LoongArch::PseudoVBNZ_D:
6361 case LoongArch::PseudoXVBZ:
6362 case LoongArch::PseudoXVBZ_B:
6363 case LoongArch::PseudoXVBZ_H:
6364 case LoongArch::PseudoXVBZ_W:
6365 case LoongArch::PseudoXVBZ_D:
6366 case LoongArch::PseudoXVBNZ:
6367 case LoongArch::PseudoXVBNZ_B:
6368 case LoongArch::PseudoXVBNZ_H:
6369 case LoongArch::PseudoXVBNZ_W:
6370 case LoongArch::PseudoXVBNZ_D:
6371 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6372 case LoongArch::PseudoXVINSGR2VR_B:
6373 case LoongArch::PseudoXVINSGR2VR_H:
6374 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6375 case LoongArch::PseudoCTPOP:
6376 return emitPseudoCTPOP(MI, BB, Subtarget);
6377 case LoongArch::PseudoVMSKLTZ_B:
6378 case LoongArch::PseudoVMSKLTZ_H:
6379 case LoongArch::PseudoVMSKLTZ_W:
6380 case LoongArch::PseudoVMSKLTZ_D:
6381 case LoongArch::PseudoVMSKGEZ_B:
6382 case LoongArch::PseudoVMSKEQZ_B:
6383 case LoongArch::PseudoVMSKNEZ_B:
6384 case LoongArch::PseudoXVMSKLTZ_B:
6385 case LoongArch::PseudoXVMSKLTZ_H:
6386 case LoongArch::PseudoXVMSKLTZ_W:
6387 case LoongArch::PseudoXVMSKLTZ_D:
6388 case LoongArch::PseudoXVMSKGEZ_B:
6389 case LoongArch::PseudoXVMSKEQZ_B:
6390 case LoongArch::PseudoXVMSKNEZ_B:
6391 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6392 case TargetOpcode::STATEPOINT:
6393 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
6394 // while bl call instruction (where statepoint will be lowered at the
6395 // end) has implicit def. This def is early-clobber as it will be set at
6396 // the moment of the call and earlier than any use is read.
6397 // Add this implicit dead def here as a workaround.
6398 MI.addOperand(MF&: *MI.getMF(),
6399 Op: MachineOperand::CreateReg(
6400 Reg: LoongArch::R1, /*isDef*/ true,
6401 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
6402 /*isUndef*/ false, /*isEarlyClobber*/ true));
6403 if (!Subtarget.is64Bit())
6404 report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets");
6405 return emitPatchPoint(MI, MBB: BB);
6406 }
6407}
6408
6409bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
6410 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
6411 unsigned *Fast) const {
6412 if (!Subtarget.hasUAL())
6413 return false;
6414
6415 // TODO: set reasonable speed number.
6416 if (Fast)
6417 *Fast = 1;
6418 return true;
6419}
6420
6421const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
6422 switch ((LoongArchISD::NodeType)Opcode) {
6423 case LoongArchISD::FIRST_NUMBER:
6424 break;
6425
6426#define NODE_NAME_CASE(node) \
6427 case LoongArchISD::node: \
6428 return "LoongArchISD::" #node;
6429
6430 // TODO: Add more target-dependent nodes later.
6431 NODE_NAME_CASE(CALL)
6432 NODE_NAME_CASE(CALL_MEDIUM)
6433 NODE_NAME_CASE(CALL_LARGE)
6434 NODE_NAME_CASE(RET)
6435 NODE_NAME_CASE(TAIL)
6436 NODE_NAME_CASE(TAIL_MEDIUM)
6437 NODE_NAME_CASE(TAIL_LARGE)
6438 NODE_NAME_CASE(SELECT_CC)
6439 NODE_NAME_CASE(SLL_W)
6440 NODE_NAME_CASE(SRA_W)
6441 NODE_NAME_CASE(SRL_W)
6442 NODE_NAME_CASE(BSTRINS)
6443 NODE_NAME_CASE(BSTRPICK)
6444 NODE_NAME_CASE(MOVGR2FR_W_LA64)
6445 NODE_NAME_CASE(MOVFR2GR_S_LA64)
6446 NODE_NAME_CASE(FTINT)
6447 NODE_NAME_CASE(BUILD_PAIR_F64)
6448 NODE_NAME_CASE(SPLIT_PAIR_F64)
6449 NODE_NAME_CASE(REVB_2H)
6450 NODE_NAME_CASE(REVB_2W)
6451 NODE_NAME_CASE(BITREV_4B)
6452 NODE_NAME_CASE(BITREV_8B)
6453 NODE_NAME_CASE(BITREV_W)
6454 NODE_NAME_CASE(ROTR_W)
6455 NODE_NAME_CASE(ROTL_W)
6456 NODE_NAME_CASE(DIV_W)
6457 NODE_NAME_CASE(DIV_WU)
6458 NODE_NAME_CASE(MOD_W)
6459 NODE_NAME_CASE(MOD_WU)
6460 NODE_NAME_CASE(CLZ_W)
6461 NODE_NAME_CASE(CTZ_W)
6462 NODE_NAME_CASE(DBAR)
6463 NODE_NAME_CASE(IBAR)
6464 NODE_NAME_CASE(BREAK)
6465 NODE_NAME_CASE(SYSCALL)
6466 NODE_NAME_CASE(CRC_W_B_W)
6467 NODE_NAME_CASE(CRC_W_H_W)
6468 NODE_NAME_CASE(CRC_W_W_W)
6469 NODE_NAME_CASE(CRC_W_D_W)
6470 NODE_NAME_CASE(CRCC_W_B_W)
6471 NODE_NAME_CASE(CRCC_W_H_W)
6472 NODE_NAME_CASE(CRCC_W_W_W)
6473 NODE_NAME_CASE(CRCC_W_D_W)
6474 NODE_NAME_CASE(CSRRD)
6475 NODE_NAME_CASE(CSRWR)
6476 NODE_NAME_CASE(CSRXCHG)
6477 NODE_NAME_CASE(IOCSRRD_B)
6478 NODE_NAME_CASE(IOCSRRD_H)
6479 NODE_NAME_CASE(IOCSRRD_W)
6480 NODE_NAME_CASE(IOCSRRD_D)
6481 NODE_NAME_CASE(IOCSRWR_B)
6482 NODE_NAME_CASE(IOCSRWR_H)
6483 NODE_NAME_CASE(IOCSRWR_W)
6484 NODE_NAME_CASE(IOCSRWR_D)
6485 NODE_NAME_CASE(CPUCFG)
6486 NODE_NAME_CASE(MOVGR2FCSR)
6487 NODE_NAME_CASE(MOVFCSR2GR)
6488 NODE_NAME_CASE(CACOP_D)
6489 NODE_NAME_CASE(CACOP_W)
6490 NODE_NAME_CASE(VSHUF)
6491 NODE_NAME_CASE(VPICKEV)
6492 NODE_NAME_CASE(VPICKOD)
6493 NODE_NAME_CASE(VPACKEV)
6494 NODE_NAME_CASE(VPACKOD)
6495 NODE_NAME_CASE(VILVL)
6496 NODE_NAME_CASE(VILVH)
6497 NODE_NAME_CASE(VSHUF4I)
6498 NODE_NAME_CASE(VREPLVEI)
6499 NODE_NAME_CASE(VREPLGR2VR)
6500 NODE_NAME_CASE(XVPERMI)
6501 NODE_NAME_CASE(VPICK_SEXT_ELT)
6502 NODE_NAME_CASE(VPICK_ZEXT_ELT)
6503 NODE_NAME_CASE(VREPLVE)
6504 NODE_NAME_CASE(VALL_ZERO)
6505 NODE_NAME_CASE(VANY_ZERO)
6506 NODE_NAME_CASE(VALL_NONZERO)
6507 NODE_NAME_CASE(VANY_NONZERO)
6508 NODE_NAME_CASE(FRECIPE)
6509 NODE_NAME_CASE(FRSQRTE)
6510 NODE_NAME_CASE(VSLLI)
6511 NODE_NAME_CASE(VSRLI)
6512 NODE_NAME_CASE(VBSLL)
6513 NODE_NAME_CASE(VBSRL)
6514 NODE_NAME_CASE(VLDREPL)
6515 NODE_NAME_CASE(VMSKLTZ)
6516 NODE_NAME_CASE(VMSKGEZ)
6517 NODE_NAME_CASE(VMSKEQZ)
6518 NODE_NAME_CASE(VMSKNEZ)
6519 NODE_NAME_CASE(XVMSKLTZ)
6520 NODE_NAME_CASE(XVMSKGEZ)
6521 NODE_NAME_CASE(XVMSKEQZ)
6522 NODE_NAME_CASE(XVMSKNEZ)
6523 }
6524#undef NODE_NAME_CASE
6525 return nullptr;
6526}
6527
6528//===----------------------------------------------------------------------===//
6529// Calling Convention Implementation
6530//===----------------------------------------------------------------------===//
6531
6532// Eight general-purpose registers a0-a7 used for passing integer arguments,
6533// with a0-a1 reused to return values. Generally, the GPRs are used to pass
6534// fixed-point arguments, and floating-point arguments when no FPR is available
6535// or with soft float ABI.
6536const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
6537 LoongArch::R7, LoongArch::R8, LoongArch::R9,
6538 LoongArch::R10, LoongArch::R11};
6539// Eight floating-point registers fa0-fa7 used for passing floating-point
6540// arguments, and fa0-fa1 are also used to return values.
6541const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
6542 LoongArch::F3, LoongArch::F4, LoongArch::F5,
6543 LoongArch::F6, LoongArch::F7};
6544// FPR32 and FPR64 alias each other.
6545const MCPhysReg ArgFPR64s[] = {
6546 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
6547 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
6548
6549const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
6550 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
6551 LoongArch::VR6, LoongArch::VR7};
6552
6553const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
6554 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
6555 LoongArch::XR6, LoongArch::XR7};
6556
6557// Pass a 2*GRLen argument that has been split into two GRLen values through
6558// registers or the stack as necessary.
6559static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
6560 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
6561 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
6562 ISD::ArgFlagsTy ArgFlags2) {
6563 unsigned GRLenInBytes = GRLen / 8;
6564 if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
6565 // At least one half can be passed via register.
6566 State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg,
6567 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
6568 } else {
6569 // Both halves must be passed on the stack, with proper alignment.
6570 Align StackAlign =
6571 std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign());
6572 State.addLoc(
6573 V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(),
6574 Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign),
6575 LocVT: VA1.getLocVT(), HTP: CCValAssign::Full));
6576 State.addLoc(V: CCValAssign::getMem(
6577 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
6578 LocVT: LocVT2, HTP: CCValAssign::Full));
6579 return false;
6580 }
6581 if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) {
6582 // The second half can also be passed via register.
6583 State.addLoc(
6584 V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full));
6585 } else {
6586 // The second half is passed via the stack, without additional alignment.
6587 State.addLoc(V: CCValAssign::getMem(
6588 ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)),
6589 LocVT: LocVT2, HTP: CCValAssign::Full));
6590 }
6591 return false;
6592}
6593
6594// Implements the LoongArch calling convention. Returns true upon failure.
6595static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
6596 unsigned ValNo, MVT ValVT,
6597 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6598 CCState &State, bool IsFixed, bool IsRet,
6599 Type *OrigTy) {
6600 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
6601 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
6602 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
6603 MVT LocVT = ValVT;
6604
6605 // Any return value split into more than two values can't be returned
6606 // directly.
6607 if (IsRet && ValNo > 1)
6608 return true;
6609
6610 // If passing a variadic argument, or if no FPR is available.
6611 bool UseGPRForFloat = true;
6612
6613 switch (ABI) {
6614 default:
6615 llvm_unreachable("Unexpected ABI");
6616 break;
6617 case LoongArchABI::ABI_ILP32F:
6618 case LoongArchABI::ABI_LP64F:
6619 case LoongArchABI::ABI_ILP32D:
6620 case LoongArchABI::ABI_LP64D:
6621 UseGPRForFloat = !IsFixed;
6622 break;
6623 case LoongArchABI::ABI_ILP32S:
6624 case LoongArchABI::ABI_LP64S:
6625 break;
6626 }
6627
6628 // If this is a variadic argument, the LoongArch calling convention requires
6629 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
6630 // byte alignment. An aligned register should be used regardless of whether
6631 // the original argument was split during legalisation or not. The argument
6632 // will not be passed by registers if the original type is larger than
6633 // 2*GRLen, so the register alignment rule does not apply.
6634 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
6635 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
6636 DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) {
6637 unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs);
6638 // Skip 'odd' register if necessary.
6639 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
6640 State.AllocateReg(Regs: ArgGPRs);
6641 }
6642
6643 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
6644 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
6645 State.getPendingArgFlags();
6646
6647 assert(PendingLocs.size() == PendingArgFlags.size() &&
6648 "PendingLocs and PendingArgFlags out of sync");
6649
6650 // FPR32 and FPR64 alias each other.
6651 if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s))
6652 UseGPRForFloat = true;
6653
6654 if (UseGPRForFloat && ValVT == MVT::f32) {
6655 LocVT = GRLenVT;
6656 LocInfo = CCValAssign::BCvt;
6657 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
6658 LocVT = MVT::i64;
6659 LocInfo = CCValAssign::BCvt;
6660 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
6661 // Handle passing f64 on LA32D with a soft float ABI or when floating point
6662 // registers are exhausted.
6663 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
6664 // Depending on available argument GPRS, f64 may be passed in a pair of
6665 // GPRs, split between a GPR and the stack, or passed completely on the
6666 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
6667 // cases.
6668 MCRegister Reg = State.AllocateReg(Regs: ArgGPRs);
6669 if (!Reg) {
6670 int64_t StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8));
6671 State.addLoc(
6672 V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
6673 return false;
6674 }
6675 LocVT = MVT::i32;
6676 State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6677 MCRegister HiReg = State.AllocateReg(Regs: ArgGPRs);
6678 if (HiReg) {
6679 State.addLoc(
6680 V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo));
6681 } else {
6682 int64_t StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4));
6683 State.addLoc(
6684 V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
6685 }
6686 return false;
6687 }
6688
6689 // Split arguments might be passed indirectly, so keep track of the pending
6690 // values.
6691 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
6692 LocVT = GRLenVT;
6693 LocInfo = CCValAssign::Indirect;
6694 PendingLocs.push_back(
6695 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
6696 PendingArgFlags.push_back(Elt: ArgFlags);
6697 if (!ArgFlags.isSplitEnd()) {
6698 return false;
6699 }
6700 }
6701
6702 // If the split argument only had two elements, it should be passed directly
6703 // in registers or on the stack.
6704 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
6705 PendingLocs.size() <= 2) {
6706 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
6707 // Apply the normal calling convention rules to the first half of the
6708 // split argument.
6709 CCValAssign VA = PendingLocs[0];
6710 ISD::ArgFlagsTy AF = PendingArgFlags[0];
6711 PendingLocs.clear();
6712 PendingArgFlags.clear();
6713 return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT,
6714 ArgFlags2: ArgFlags);
6715 }
6716
6717 // Allocate to a register if possible, or else a stack slot.
6718 Register Reg;
6719 unsigned StoreSizeBytes = GRLen / 8;
6720 Align StackAlign = Align(GRLen / 8);
6721
6722 if (ValVT == MVT::f32 && !UseGPRForFloat)
6723 Reg = State.AllocateReg(Regs: ArgFPR32s);
6724 else if (ValVT == MVT::f64 && !UseGPRForFloat)
6725 Reg = State.AllocateReg(Regs: ArgFPR64s);
6726 else if (ValVT.is128BitVector())
6727 Reg = State.AllocateReg(Regs: ArgVRs);
6728 else if (ValVT.is256BitVector())
6729 Reg = State.AllocateReg(Regs: ArgXRs);
6730 else
6731 Reg = State.AllocateReg(Regs: ArgGPRs);
6732
6733 unsigned StackOffset =
6734 Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign);
6735
6736 // If we reach this point and PendingLocs is non-empty, we must be at the
6737 // end of a split argument that must be passed indirectly.
6738 if (!PendingLocs.empty()) {
6739 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
6740 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
6741 for (auto &It : PendingLocs) {
6742 if (Reg)
6743 It.convertToReg(Reg);
6744 else
6745 It.convertToMem(Offset: StackOffset);
6746 State.addLoc(V: It);
6747 }
6748 PendingLocs.clear();
6749 PendingArgFlags.clear();
6750 return false;
6751 }
6752 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
6753 "Expected an GRLenVT at this stage");
6754
6755 if (Reg) {
6756 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6757 return false;
6758 }
6759
6760 // When a floating-point value is passed on the stack, no bit-cast is needed.
6761 if (ValVT.isFloatingPoint()) {
6762 LocVT = ValVT;
6763 LocInfo = CCValAssign::Full;
6764 }
6765
6766 State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo));
6767 return false;
6768}
6769
6770void LoongArchTargetLowering::analyzeInputArgs(
6771 MachineFunction &MF, CCState &CCInfo,
6772 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
6773 LoongArchCCAssignFn Fn) const {
6774 FunctionType *FType = MF.getFunction().getFunctionType();
6775 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6776 MVT ArgVT = Ins[i].VT;
6777 Type *ArgTy = nullptr;
6778 if (IsRet)
6779 ArgTy = FType->getReturnType();
6780 else if (Ins[i].isOrigArg())
6781 ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex());
6782 LoongArchABI::ABI ABI =
6783 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6784 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
6785 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
6786 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
6787 << '\n');
6788 llvm_unreachable("");
6789 }
6790 }
6791}
6792
6793void LoongArchTargetLowering::analyzeOutputArgs(
6794 MachineFunction &MF, CCState &CCInfo,
6795 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
6796 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
6797 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
6798 MVT ArgVT = Outs[i].VT;
6799 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
6800 LoongArchABI::ABI ABI =
6801 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
6802 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
6803 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
6804 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
6805 << "\n");
6806 llvm_unreachable("");
6807 }
6808 }
6809}
6810
6811// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
6812// values.
6813static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
6814 const CCValAssign &VA, const SDLoc &DL) {
6815 switch (VA.getLocInfo()) {
6816 default:
6817 llvm_unreachable("Unexpected CCValAssign::LocInfo");
6818 case CCValAssign::Full:
6819 case CCValAssign::Indirect:
6820 break;
6821 case CCValAssign::BCvt:
6822 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6823 Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val);
6824 else
6825 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val);
6826 break;
6827 }
6828 return Val;
6829}
6830
6831static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
6832 const CCValAssign &VA, const SDLoc &DL,
6833 const ISD::InputArg &In,
6834 const LoongArchTargetLowering &TLI) {
6835 MachineFunction &MF = DAG.getMachineFunction();
6836 MachineRegisterInfo &RegInfo = MF.getRegInfo();
6837 EVT LocVT = VA.getLocVT();
6838 SDValue Val;
6839 const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT());
6840 Register VReg = RegInfo.createVirtualRegister(RegClass: RC);
6841 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
6842 Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
6843
6844 // If input is sign extended from 32 bits, note it for the OptW pass.
6845 if (In.isOrigArg()) {
6846 Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex());
6847 if (OrigArg->getType()->isIntegerTy()) {
6848 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
6849 // An input zero extended from i31 can also be considered sign extended.
6850 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
6851 (BitWidth < 32 && In.Flags.isZExt())) {
6852 LoongArchMachineFunctionInfo *LAFI =
6853 MF.getInfo<LoongArchMachineFunctionInfo>();
6854 LAFI->addSExt32Register(Reg: VReg);
6855 }
6856 }
6857 }
6858
6859 return convertLocVTToValVT(DAG, Val, VA, DL);
6860}
6861
6862// The caller is responsible for loading the full value if the argument is
6863// passed with CCValAssign::Indirect.
6864static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
6865 const CCValAssign &VA, const SDLoc &DL) {
6866 MachineFunction &MF = DAG.getMachineFunction();
6867 MachineFrameInfo &MFI = MF.getFrameInfo();
6868 EVT ValVT = VA.getValVT();
6869 int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
6870 /*IsImmutable=*/true);
6871 SDValue FIN = DAG.getFrameIndex(
6872 FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0)));
6873
6874 ISD::LoadExtType ExtType;
6875 switch (VA.getLocInfo()) {
6876 default:
6877 llvm_unreachable("Unexpected CCValAssign::LocInfo");
6878 case CCValAssign::Full:
6879 case CCValAssign::Indirect:
6880 case CCValAssign::BCvt:
6881 ExtType = ISD::NON_EXTLOAD;
6882 break;
6883 }
6884 return DAG.getExtLoad(
6885 ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN,
6886 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT);
6887}
6888
6889static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain,
6890 const CCValAssign &VA,
6891 const CCValAssign &HiVA,
6892 const SDLoc &DL) {
6893 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
6894 "Unexpected VA");
6895 MachineFunction &MF = DAG.getMachineFunction();
6896 MachineFrameInfo &MFI = MF.getFrameInfo();
6897 MachineRegisterInfo &RegInfo = MF.getRegInfo();
6898
6899 assert(VA.isRegLoc() && "Expected register VA assignment");
6900
6901 Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6902 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg);
6903 SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32);
6904 SDValue Hi;
6905 if (HiVA.isMemLoc()) {
6906 // Second half of f64 is passed on the stack.
6907 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(),
6908 /*IsImmutable=*/true);
6909 SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32);
6910 Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN,
6911 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
6912 } else {
6913 // Second half of f64 is passed in another GPR.
6914 Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
6915 RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg);
6916 Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32);
6917 }
6918 return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
6919}
6920
6921static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
6922 const CCValAssign &VA, const SDLoc &DL) {
6923 EVT LocVT = VA.getLocVT();
6924
6925 switch (VA.getLocInfo()) {
6926 default:
6927 llvm_unreachable("Unexpected CCValAssign::LocInfo");
6928 case CCValAssign::Full:
6929 break;
6930 case CCValAssign::BCvt:
6931 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
6932 Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val);
6933 else
6934 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val);
6935 break;
6936 }
6937 return Val;
6938}
6939
6940static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
6941 CCValAssign::LocInfo LocInfo,
6942 ISD::ArgFlagsTy ArgFlags, CCState &State) {
6943 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
6944 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
6945 // s0 s1 s2 s3 s4 s5 s6 s7 s8
6946 static const MCPhysReg GPRList[] = {
6947 LoongArch::R23, LoongArch::R24, LoongArch::R25,
6948 LoongArch::R26, LoongArch::R27, LoongArch::R28,
6949 LoongArch::R29, LoongArch::R30, LoongArch::R31};
6950 if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) {
6951 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6952 return false;
6953 }
6954 }
6955
6956 if (LocVT == MVT::f32) {
6957 // Pass in STG registers: F1, F2, F3, F4
6958 // fs0,fs1,fs2,fs3
6959 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
6960 LoongArch::F26, LoongArch::F27};
6961 if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) {
6962 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6963 return false;
6964 }
6965 }
6966
6967 if (LocVT == MVT::f64) {
6968 // Pass in STG registers: D1, D2, D3, D4
6969 // fs4,fs5,fs6,fs7
6970 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
6971 LoongArch::F30_64, LoongArch::F31_64};
6972 if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) {
6973 State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo));
6974 return false;
6975 }
6976 }
6977
6978 report_fatal_error(reason: "No registers left in GHC calling convention");
6979 return true;
6980}
6981
6982// Transform physical registers into virtual registers.
6983SDValue LoongArchTargetLowering::LowerFormalArguments(
6984 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
6985 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
6986 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6987
6988 MachineFunction &MF = DAG.getMachineFunction();
6989
6990 switch (CallConv) {
6991 default:
6992 llvm_unreachable("Unsupported calling convention");
6993 case CallingConv::C:
6994 case CallingConv::Fast:
6995 break;
6996 case CallingConv::GHC:
6997 if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) ||
6998 !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD))
6999 report_fatal_error(
7000 reason: "GHC calling convention requires the F and D extensions");
7001 }
7002
7003 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7004 MVT GRLenVT = Subtarget.getGRLenVT();
7005 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7006 // Used with varargs to acumulate store chains.
7007 std::vector<SDValue> OutChains;
7008
7009 // Assign locations to all of the incoming arguments.
7010 SmallVector<CCValAssign> ArgLocs;
7011 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7012
7013 if (CallConv == CallingConv::GHC)
7014 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC);
7015 else
7016 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch);
7017
7018 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7019 CCValAssign &VA = ArgLocs[i];
7020 SDValue ArgValue;
7021 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7022 // case.
7023 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7024 assert(VA.needsCustom());
7025 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL);
7026 } else if (VA.isRegLoc())
7027 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this);
7028 else
7029 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7030 if (VA.getLocInfo() == CCValAssign::Indirect) {
7031 // If the original argument was split and passed by reference, we need to
7032 // load all parts of it here (using the same address).
7033 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
7034 PtrInfo: MachinePointerInfo()));
7035 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7036 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7037 assert(ArgPartOffset == 0);
7038 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7039 CCValAssign &PartVA = ArgLocs[i + 1];
7040 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7041 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
7042 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset);
7043 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
7044 PtrInfo: MachinePointerInfo()));
7045 ++i;
7046 ++InsIdx;
7047 }
7048 continue;
7049 }
7050 InVals.push_back(Elt: ArgValue);
7051 }
7052
7053 if (IsVarArg) {
7054 ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
7055 unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs);
7056 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7057 MachineFrameInfo &MFI = MF.getFrameInfo();
7058 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7059 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7060
7061 // Offset of the first variable argument from stack pointer, and size of
7062 // the vararg save area. For now, the varargs save area is either zero or
7063 // large enough to hold a0-a7.
7064 int VaArgOffset, VarArgsSaveSize;
7065
7066 // If all registers are allocated, then all varargs must be passed on the
7067 // stack and we don't need to save any argregs.
7068 if (ArgRegs.size() == Idx) {
7069 VaArgOffset = CCInfo.getStackSize();
7070 VarArgsSaveSize = 0;
7071 } else {
7072 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7073 VaArgOffset = -VarArgsSaveSize;
7074 }
7075
7076 // Record the frame index of the first variable argument
7077 // which is a value necessary to VASTART.
7078 int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
7079 LoongArchFI->setVarArgsFrameIndex(FI);
7080
7081 // If saving an odd number of registers then create an extra stack slot to
7082 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7083 // offsets to even-numbered registered remain 2*GRLen-aligned.
7084 if (Idx % 2) {
7085 MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes,
7086 IsImmutable: true);
7087 VarArgsSaveSize += GRLenInBytes;
7088 }
7089
7090 // Copy the integer registers that may have been used for passing varargs
7091 // to the vararg save area.
7092 for (unsigned I = Idx; I < ArgRegs.size();
7093 ++I, VaArgOffset += GRLenInBytes) {
7094 const Register Reg = RegInfo.createVirtualRegister(RegClass: RC);
7095 RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg);
7096 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT);
7097 FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true);
7098 SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
7099 SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff,
7100 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
7101 cast<StoreSDNode>(Val: Store.getNode())
7102 ->getMemOperand()
7103 ->setValue((Value *)nullptr);
7104 OutChains.push_back(x: Store);
7105 }
7106 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7107 }
7108
7109 // All stores are grouped in one node to allow the matching between
7110 // the size of Ins and InVals. This only happens for vararg functions.
7111 if (!OutChains.empty()) {
7112 OutChains.push_back(x: Chain);
7113 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
7114 }
7115
7116 return Chain;
7117}
7118
7119bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
7120 return CI->isTailCall();
7121}
7122
7123// Check if the return value is used as only a return value, as otherwise
7124// we can't perform a tail-call.
7125bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
7126 SDValue &Chain) const {
7127 if (N->getNumValues() != 1)
7128 return false;
7129 if (!N->hasNUsesOfValue(NUses: 1, Value: 0))
7130 return false;
7131
7132 SDNode *Copy = *N->user_begin();
7133 if (Copy->getOpcode() != ISD::CopyToReg)
7134 return false;
7135
7136 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7137 // isn't safe to perform a tail call.
7138 if (Copy->getGluedNode())
7139 return false;
7140
7141 // The copy must be used by a LoongArchISD::RET, and nothing else.
7142 bool HasRet = false;
7143 for (SDNode *Node : Copy->users()) {
7144 if (Node->getOpcode() != LoongArchISD::RET)
7145 return false;
7146 HasRet = true;
7147 }
7148
7149 if (!HasRet)
7150 return false;
7151
7152 Chain = Copy->getOperand(Num: 0);
7153 return true;
7154}
7155
7156// Check whether the call is eligible for tail call optimization.
7157bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7158 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7159 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7160
7161 auto CalleeCC = CLI.CallConv;
7162 auto &Outs = CLI.Outs;
7163 auto &Caller = MF.getFunction();
7164 auto CallerCC = Caller.getCallingConv();
7165
7166 // Do not tail call opt if the stack is used to pass parameters.
7167 if (CCInfo.getStackSize() != 0)
7168 return false;
7169
7170 // Do not tail call opt if any parameters need to be passed indirectly.
7171 for (auto &VA : ArgLocs)
7172 if (VA.getLocInfo() == CCValAssign::Indirect)
7173 return false;
7174
7175 // Do not tail call opt if either caller or callee uses struct return
7176 // semantics.
7177 auto IsCallerStructRet = Caller.hasStructRetAttr();
7178 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7179 if (IsCallerStructRet || IsCalleeStructRet)
7180 return false;
7181
7182 // Do not tail call opt if either the callee or caller has a byval argument.
7183 for (auto &Arg : Outs)
7184 if (Arg.Flags.isByVal())
7185 return false;
7186
7187 // The callee has to preserve all registers the caller needs to preserve.
7188 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7189 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7190 if (CalleeCC != CallerCC) {
7191 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7192 if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
7193 return false;
7194 }
7195 return true;
7196}
7197
7198static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
7199 return DAG.getDataLayout().getPrefTypeAlign(
7200 Ty: VT.getTypeForEVT(Context&: *DAG.getContext()));
7201}
7202
7203// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7204// and output parameter nodes.
7205SDValue
7206LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
7207 SmallVectorImpl<SDValue> &InVals) const {
7208 SelectionDAG &DAG = CLI.DAG;
7209 SDLoc &DL = CLI.DL;
7210 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
7211 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7212 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
7213 SDValue Chain = CLI.Chain;
7214 SDValue Callee = CLI.Callee;
7215 CallingConv::ID CallConv = CLI.CallConv;
7216 bool IsVarArg = CLI.IsVarArg;
7217 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7218 MVT GRLenVT = Subtarget.getGRLenVT();
7219 bool &IsTailCall = CLI.IsTailCall;
7220
7221 MachineFunction &MF = DAG.getMachineFunction();
7222
7223 // Analyze the operands of the call, assigning locations to each operand.
7224 SmallVector<CCValAssign> ArgLocs;
7225 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7226
7227 if (CallConv == CallingConv::GHC)
7228 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC);
7229 else
7230 analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch);
7231
7232 // Check if it's really possible to do a tail call.
7233 if (IsTailCall)
7234 IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs);
7235
7236 if (IsTailCall)
7237 ++NumTailCalls;
7238 else if (CLI.CB && CLI.CB->isMustTailCall())
7239 report_fatal_error(reason: "failed to perform tail call elimination on a call "
7240 "site marked musttail");
7241
7242 // Get a count of how many bytes are to be pushed on the stack.
7243 unsigned NumBytes = ArgCCInfo.getStackSize();
7244
7245 // Create local copies for byval args.
7246 SmallVector<SDValue> ByValArgs;
7247 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7248 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7249 if (!Flags.isByVal())
7250 continue;
7251
7252 SDValue Arg = OutVals[i];
7253 unsigned Size = Flags.getByValSize();
7254 Align Alignment = Flags.getNonZeroByValAlign();
7255
7256 int FI =
7257 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false);
7258 SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
7259 SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT);
7260
7261 Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment,
7262 /*IsVolatile=*/isVol: false,
7263 /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt,
7264 DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
7265 ByValArgs.push_back(Elt: FIPtr);
7266 }
7267
7268 if (!IsTailCall)
7269 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
7270
7271 // Copy argument values to their designated locations.
7272 SmallVector<std::pair<Register, SDValue>> RegsToPass;
7273 SmallVector<SDValue> MemOpChains;
7274 SDValue StackPtr;
7275 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7276 ++i, ++OutIdx) {
7277 CCValAssign &VA = ArgLocs[i];
7278 SDValue ArgValue = OutVals[OutIdx];
7279 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7280
7281 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7282 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7283 assert(VA.isRegLoc() && "Expected register VA assignment");
7284 assert(VA.needsCustom());
7285 SDValue SplitF64 =
7286 DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
7287 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue);
7288 SDValue Lo = SplitF64.getValue(R: 0);
7289 SDValue Hi = SplitF64.getValue(R: 1);
7290
7291 Register RegLo = VA.getLocReg();
7292 RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo));
7293
7294 // Get the CCValAssign for the Hi part.
7295 CCValAssign &HiVA = ArgLocs[++i];
7296
7297 if (HiVA.isMemLoc()) {
7298 // Second half of f64 is passed on the stack.
7299 if (!StackPtr.getNode())
7300 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
7301 SDValue Address =
7302 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
7303 N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL));
7304 // Emit the store.
7305 MemOpChains.push_back(Elt: DAG.getStore(
7306 Chain, dl: DL, Val: Hi, Ptr: Address,
7307 PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset())));
7308 } else {
7309 // Second half of f64 is passed in another GPR.
7310 Register RegHigh = HiVA.getLocReg();
7311 RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi));
7312 }
7313 continue;
7314 }
7315
7316 // Promote the value if needed.
7317 // For now, only handle fully promoted and indirect arguments.
7318 if (VA.getLocInfo() == CCValAssign::Indirect) {
7319 // Store the argument in a stack slot and pass its address.
7320 Align StackAlign =
7321 std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG),
7322 b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG));
7323 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7324 // If the original argument was split and passed by reference, we need to
7325 // store the required parts of it here (and pass just one address).
7326 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7327 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7328 assert(ArgPartOffset == 0);
7329 // Calculate the total size to store. We don't have access to what we're
7330 // actually storing other than performing the loop and collecting the
7331 // info.
7332 SmallVector<std::pair<SDValue, SDValue>> Parts;
7333 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7334 SDValue PartValue = OutVals[OutIdx + 1];
7335 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7336 SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL);
7337 EVT PartVT = PartValue.getValueType();
7338
7339 StoredSize += PartVT.getStoreSize();
7340 StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG));
7341 Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset));
7342 ++i;
7343 ++OutIdx;
7344 }
7345 SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign);
7346 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
7347 MemOpChains.push_back(
7348 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot,
7349 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
7350 for (const auto &Part : Parts) {
7351 SDValue PartValue = Part.first;
7352 SDValue PartOffset = Part.second;
7353 SDValue Address =
7354 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset);
7355 MemOpChains.push_back(
7356 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
7357 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
7358 }
7359 ArgValue = SpillSlot;
7360 } else {
7361 ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL);
7362 }
7363
7364 // Use local copy if it is a byval arg.
7365 if (Flags.isByVal())
7366 ArgValue = ByValArgs[j++];
7367
7368 if (VA.isRegLoc()) {
7369 // Queue up the argument copies and emit them at the end.
7370 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
7371 } else {
7372 assert(VA.isMemLoc() && "Argument not register or memory");
7373 assert(!IsTailCall && "Tail call not allowed if stack is used "
7374 "for passing parameters");
7375
7376 // Work out the address of the stack slot.
7377 if (!StackPtr.getNode())
7378 StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT);
7379 SDValue Address =
7380 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
7381 N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL));
7382
7383 // Emit the store.
7384 MemOpChains.push_back(
7385 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo()));
7386 }
7387 }
7388
7389 // Join the stores, which are independent of one another.
7390 if (!MemOpChains.empty())
7391 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
7392
7393 SDValue Glue;
7394
7395 // Build a sequence of copy-to-reg nodes, chained and glued together.
7396 for (auto &Reg : RegsToPass) {
7397 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue);
7398 Glue = Chain.getValue(R: 1);
7399 }
7400
7401 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
7402 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
7403 // split it and then direct call can be matched by PseudoCALL.
7404 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
7405 const GlobalValue *GV = S->getGlobal();
7406 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
7407 ? LoongArchII::MO_CALL
7408 : LoongArchII::MO_CALL_PLT;
7409 Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags);
7410 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
7411 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr)
7412 ? LoongArchII::MO_CALL
7413 : LoongArchII::MO_CALL_PLT;
7414 Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags);
7415 }
7416
7417 // The first call operand is the chain and the second is the target address.
7418 SmallVector<SDValue> Ops;
7419 Ops.push_back(Elt: Chain);
7420 Ops.push_back(Elt: Callee);
7421
7422 // Add argument registers to the end of the list so that they are
7423 // known live into the call.
7424 for (auto &Reg : RegsToPass)
7425 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
7426
7427 if (!IsTailCall) {
7428 // Add a register mask operand representing the call-preserved registers.
7429 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
7430 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
7431 assert(Mask && "Missing call preserved mask for calling convention");
7432 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
7433 }
7434
7435 // Glue the call to the argument copies, if any.
7436 if (Glue.getNode())
7437 Ops.push_back(Elt: Glue);
7438
7439 // Emit the call.
7440 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
7441 unsigned Op;
7442 switch (DAG.getTarget().getCodeModel()) {
7443 default:
7444 report_fatal_error(reason: "Unsupported code model");
7445 case CodeModel::Small:
7446 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
7447 break;
7448 case CodeModel::Medium:
7449 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
7450 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
7451 break;
7452 case CodeModel::Large:
7453 assert(Subtarget.is64Bit() && "Large code model requires LA64");
7454 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
7455 break;
7456 }
7457
7458 if (IsTailCall) {
7459 MF.getFrameInfo().setHasTailCall();
7460 SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
7461 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
7462 return Ret;
7463 }
7464
7465 Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops);
7466 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
7467 Glue = Chain.getValue(R: 1);
7468
7469 // Mark the end of the call, which is glued to the call itself.
7470 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
7471 Glue = Chain.getValue(R: 1);
7472
7473 // Assign locations to each value returned by this call.
7474 SmallVector<CCValAssign> RVLocs;
7475 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
7476 analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch);
7477
7478 // Copy all of the result registers out of their specified physreg.
7479 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
7480 auto &VA = RVLocs[i];
7481 // Copy the value out.
7482 SDValue RetValue =
7483 DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue);
7484 // Glue the RetValue to the end of the call sequence.
7485 Chain = RetValue.getValue(R: 1);
7486 Glue = RetValue.getValue(R: 2);
7487
7488 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7489 assert(VA.needsCustom());
7490 SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs[++i].getLocReg(),
7491 VT: MVT::i32, Glue);
7492 Chain = RetValue2.getValue(R: 1);
7493 Glue = RetValue2.getValue(R: 2);
7494 RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64,
7495 N1: RetValue, N2: RetValue2);
7496 } else
7497 RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL);
7498
7499 InVals.push_back(Elt: RetValue);
7500 }
7501
7502 return Chain;
7503}
7504
7505bool LoongArchTargetLowering::CanLowerReturn(
7506 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
7507 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
7508 const Type *RetTy) const {
7509 SmallVector<CCValAssign> RVLocs;
7510 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
7511
7512 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7513 LoongArchABI::ABI ABI =
7514 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7515 if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full,
7516 ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
7517 OrigTy: nullptr))
7518 return false;
7519 }
7520 return true;
7521}
7522
7523SDValue LoongArchTargetLowering::LowerReturn(
7524 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7525 const SmallVectorImpl<ISD::OutputArg> &Outs,
7526 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
7527 SelectionDAG &DAG) const {
7528 // Stores the assignment of the return value to a location.
7529 SmallVector<CCValAssign> RVLocs;
7530
7531 // Info about the registers and stack slot.
7532 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
7533 *DAG.getContext());
7534
7535 analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
7536 CLI: nullptr, Fn: CC_LoongArch);
7537 if (CallConv == CallingConv::GHC && !RVLocs.empty())
7538 report_fatal_error(reason: "GHC functions return void only");
7539 SDValue Glue;
7540 SmallVector<SDValue, 4> RetOps(1, Chain);
7541
7542 // Copy the result values into the output registers.
7543 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
7544 SDValue Val = OutVals[OutIdx];
7545 CCValAssign &VA = RVLocs[i];
7546 assert(VA.isRegLoc() && "Can only return in registers!");
7547
7548 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7549 // Handle returning f64 on LA32D with a soft float ABI.
7550 assert(VA.isRegLoc() && "Expected return via registers");
7551 assert(VA.needsCustom());
7552 SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL,
7553 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val);
7554 SDValue Lo = SplitF64.getValue(R: 0);
7555 SDValue Hi = SplitF64.getValue(R: 1);
7556 Register RegLo = VA.getLocReg();
7557 Register RegHi = RVLocs[++i].getLocReg();
7558
7559 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue);
7560 Glue = Chain.getValue(R: 1);
7561 RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32));
7562 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue);
7563 Glue = Chain.getValue(R: 1);
7564 RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32));
7565 } else {
7566 // Handle a 'normal' return.
7567 Val = convertValVTToLocVT(DAG, Val, VA, DL);
7568 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue);
7569
7570 // Guarantee that all emitted copies are stuck together.
7571 Glue = Chain.getValue(R: 1);
7572 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7573 }
7574 }
7575
7576 RetOps[0] = Chain; // Update chain.
7577
7578 // Add the glue node if we have it.
7579 if (Glue.getNode())
7580 RetOps.push_back(Elt: Glue);
7581
7582 return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps);
7583}
7584
7585bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm,
7586 EVT VT) const {
7587 if (!Subtarget.hasExtLSX())
7588 return false;
7589
7590 if (VT == MVT::f32) {
7591 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
7592 return (masked == 0x3e000000 || masked == 0x40000000);
7593 }
7594
7595 if (VT == MVT::f64) {
7596 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
7597 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
7598 }
7599
7600 return false;
7601}
7602
7603bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7604 bool ForCodeSize) const {
7605 // TODO: Maybe need more checks here after vector extension is supported.
7606 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7607 return false;
7608 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7609 return false;
7610 return (Imm.isZero() || Imm.isExactlyValue(V: 1.0) || isFPImmVLDILegal(Imm, VT));
7611}
7612
7613bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
7614 return true;
7615}
7616
7617bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
7618 return true;
7619}
7620
7621bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
7622 const Instruction *I) const {
7623 if (!Subtarget.is64Bit())
7624 return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I);
7625
7626 if (isa<LoadInst>(Val: I))
7627 return true;
7628
7629 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
7630 // require fences beacuse we can use amswap_db.[w/d].
7631 Type *Ty = I->getOperand(i: 0)->getType();
7632 if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) {
7633 unsigned Size = Ty->getIntegerBitWidth();
7634 return (Size == 8 || Size == 16);
7635 }
7636
7637 return false;
7638}
7639
7640EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
7641 LLVMContext &Context,
7642 EVT VT) const {
7643 if (!VT.isVector())
7644 return getPointerTy(DL);
7645 return VT.changeVectorElementTypeToInteger();
7646}
7647
7648bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
7649 // TODO: Support vectors.
7650 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y);
7651}
7652
7653bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
7654 const CallInst &I,
7655 MachineFunction &MF,
7656 unsigned Intrinsic) const {
7657 switch (Intrinsic) {
7658 default:
7659 return false;
7660 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
7661 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
7662 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
7663 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
7664 Info.opc = ISD::INTRINSIC_W_CHAIN;
7665 Info.memVT = MVT::i32;
7666 Info.ptrVal = I.getArgOperand(i: 0);
7667 Info.offset = 0;
7668 Info.align = Align(4);
7669 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
7670 MachineMemOperand::MOVolatile;
7671 return true;
7672 // TODO: Add more Intrinsics later.
7673 }
7674}
7675
7676// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
7677// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
7678// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
7679// regression, we need to implement it manually.
7680void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
7681 AtomicRMWInst::BinOp Op = AI->getOperation();
7682
7683 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
7684 Op == AtomicRMWInst::And) &&
7685 "Unable to expand");
7686 unsigned MinWordSize = 4;
7687
7688 IRBuilder<> Builder(AI);
7689 LLVMContext &Ctx = Builder.getContext();
7690 const DataLayout &DL = AI->getDataLayout();
7691 Type *ValueType = AI->getType();
7692 Type *WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize * 8);
7693
7694 Value *Addr = AI->getPointerOperand();
7695 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
7696 IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
7697
7698 Value *AlignedAddr = Builder.CreateIntrinsic(
7699 ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
7700 Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, FMFSource: nullptr,
7701 Name: "AlignedAddr");
7702
7703 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
7704 Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
7705 Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
7706 ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt");
7707 Value *Mask = Builder.CreateShl(
7708 LHS: ConstantInt::get(Ty: WordType,
7709 V: (1 << (DL.getTypeStoreSize(Ty: ValueType) * 8)) - 1),
7710 RHS: ShiftAmt, Name: "Mask");
7711 Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask");
7712 Value *ValOperand_Shifted =
7713 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType),
7714 RHS: ShiftAmt, Name: "ValOperand_Shifted");
7715 Value *NewOperand;
7716 if (Op == AtomicRMWInst::And)
7717 NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand");
7718 else
7719 NewOperand = ValOperand_Shifted;
7720
7721 AtomicRMWInst *NewAI =
7722 Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align(MinWordSize),
7723 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
7724
7725 Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted");
7726 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted");
7727 Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType);
7728 AI->replaceAllUsesWith(V: FinalOldResult);
7729 AI->eraseFromParent();
7730}
7731
7732TargetLowering::AtomicExpansionKind
7733LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
7734 // TODO: Add more AtomicRMWInst that needs to be extended.
7735
7736 // Since floating-point operation requires a non-trivial set of data
7737 // operations, use CmpXChg to expand.
7738 if (AI->isFloatingPointOperation() ||
7739 AI->getOperation() == AtomicRMWInst::UIncWrap ||
7740 AI->getOperation() == AtomicRMWInst::UDecWrap ||
7741 AI->getOperation() == AtomicRMWInst::USubCond ||
7742 AI->getOperation() == AtomicRMWInst::USubSat)
7743 return AtomicExpansionKind::CmpXChg;
7744
7745 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
7746 (AI->getOperation() == AtomicRMWInst::Xchg ||
7747 AI->getOperation() == AtomicRMWInst::Add ||
7748 AI->getOperation() == AtomicRMWInst::Sub)) {
7749 return AtomicExpansionKind::None;
7750 }
7751
7752 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
7753 if (Subtarget.hasLAMCAS()) {
7754 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
7755 AI->getOperation() == AtomicRMWInst::Or ||
7756 AI->getOperation() == AtomicRMWInst::Xor))
7757 return AtomicExpansionKind::Expand;
7758 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
7759 return AtomicExpansionKind::CmpXChg;
7760 }
7761
7762 if (Size == 8 || Size == 16)
7763 return AtomicExpansionKind::MaskedIntrinsic;
7764 return AtomicExpansionKind::None;
7765}
7766
7767static Intrinsic::ID
7768getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
7769 AtomicRMWInst::BinOp BinOp) {
7770 if (GRLen == 64) {
7771 switch (BinOp) {
7772 default:
7773 llvm_unreachable("Unexpected AtomicRMW BinOp");
7774 case AtomicRMWInst::Xchg:
7775 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
7776 case AtomicRMWInst::Add:
7777 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
7778 case AtomicRMWInst::Sub:
7779 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
7780 case AtomicRMWInst::Nand:
7781 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
7782 case AtomicRMWInst::UMax:
7783 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
7784 case AtomicRMWInst::UMin:
7785 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
7786 case AtomicRMWInst::Max:
7787 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
7788 case AtomicRMWInst::Min:
7789 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
7790 // TODO: support other AtomicRMWInst.
7791 }
7792 }
7793
7794 if (GRLen == 32) {
7795 switch (BinOp) {
7796 default:
7797 llvm_unreachable("Unexpected AtomicRMW BinOp");
7798 case AtomicRMWInst::Xchg:
7799 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
7800 case AtomicRMWInst::Add:
7801 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
7802 case AtomicRMWInst::Sub:
7803 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
7804 case AtomicRMWInst::Nand:
7805 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
7806 case AtomicRMWInst::UMax:
7807 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
7808 case AtomicRMWInst::UMin:
7809 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
7810 case AtomicRMWInst::Max:
7811 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
7812 case AtomicRMWInst::Min:
7813 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
7814 // TODO: support other AtomicRMWInst.
7815 }
7816 }
7817
7818 llvm_unreachable("Unexpected GRLen\n");
7819}
7820
7821TargetLowering::AtomicExpansionKind
7822LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
7823 AtomicCmpXchgInst *CI) const {
7824
7825 if (Subtarget.hasLAMCAS())
7826 return AtomicExpansionKind::None;
7827
7828 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
7829 if (Size == 8 || Size == 16)
7830 return AtomicExpansionKind::MaskedIntrinsic;
7831 return AtomicExpansionKind::None;
7832}
7833
7834Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
7835 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
7836 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
7837 unsigned GRLen = Subtarget.getGRLen();
7838 AtomicOrdering FailOrd = CI->getFailureOrdering();
7839 Value *FailureOrdering =
7840 Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd));
7841 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
7842 if (GRLen == 64) {
7843 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
7844 CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty());
7845 NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty());
7846 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
7847 }
7848 Type *Tys[] = {AlignedAddr->getType()};
7849 Value *Result = Builder.CreateIntrinsic(
7850 ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
7851 if (GRLen == 64)
7852 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
7853 return Result;
7854}
7855
7856Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
7857 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
7858 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
7859 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
7860 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
7861 // mask, as this produces better code than the LL/SC loop emitted by
7862 // int_loongarch_masked_atomicrmw_xchg.
7863 if (AI->getOperation() == AtomicRMWInst::Xchg &&
7864 isa<ConstantInt>(Val: AI->getValOperand())) {
7865 ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand());
7866 if (CVal->isZero())
7867 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr,
7868 Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask"),
7869 Align: AI->getAlign(), Ordering: Ord);
7870 if (CVal->isMinusOne())
7871 return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask,
7872 Align: AI->getAlign(), Ordering: Ord);
7873 }
7874
7875 unsigned GRLen = Subtarget.getGRLen();
7876 Value *Ordering =
7877 Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering()));
7878 Type *Tys[] = {AlignedAddr->getType()};
7879 Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration(
7880 M: AI->getModule(),
7881 id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys);
7882
7883 if (GRLen == 64) {
7884 Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty());
7885 Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty());
7886 ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty());
7887 }
7888
7889 Value *Result;
7890
7891 // Must pass the shift amount needed to sign extend the loaded value prior
7892 // to performing a signed comparison for min/max. ShiftAmt is the number of
7893 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
7894 // is the number of bits to left+right shift the value in order to
7895 // sign-extend.
7896 if (AI->getOperation() == AtomicRMWInst::Min ||
7897 AI->getOperation() == AtomicRMWInst::Max) {
7898 const DataLayout &DL = AI->getDataLayout();
7899 unsigned ValWidth =
7900 DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType());
7901 Value *SextShamt =
7902 Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt);
7903 Result = Builder.CreateCall(Callee: LlwOpScwLoop,
7904 Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering});
7905 } else {
7906 Result =
7907 Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering});
7908 }
7909
7910 if (GRLen == 64)
7911 Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty());
7912 return Result;
7913}
7914
7915bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
7916 const MachineFunction &MF, EVT VT) const {
7917 VT = VT.getScalarType();
7918
7919 if (!VT.isSimple())
7920 return false;
7921
7922 switch (VT.getSimpleVT().SimpleTy) {
7923 case MVT::f32:
7924 case MVT::f64:
7925 return true;
7926 default:
7927 break;
7928 }
7929
7930 return false;
7931}
7932
7933Register LoongArchTargetLowering::getExceptionPointerRegister(
7934 const Constant *PersonalityFn) const {
7935 return LoongArch::R4;
7936}
7937
7938Register LoongArchTargetLowering::getExceptionSelectorRegister(
7939 const Constant *PersonalityFn) const {
7940 return LoongArch::R5;
7941}
7942
7943//===----------------------------------------------------------------------===//
7944// Target Optimization Hooks
7945//===----------------------------------------------------------------------===//
7946
7947static int getEstimateRefinementSteps(EVT VT,
7948 const LoongArchSubtarget &Subtarget) {
7949 // Feature FRECIPE instrucions relative accuracy is 2^-14.
7950 // IEEE float has 23 digits and double has 52 digits.
7951 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
7952 return RefinementSteps;
7953}
7954
7955SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand,
7956 SelectionDAG &DAG, int Enabled,
7957 int &RefinementSteps,
7958 bool &UseOneConstNR,
7959 bool Reciprocal) const {
7960 if (Subtarget.hasFrecipe()) {
7961 SDLoc DL(Operand);
7962 EVT VT = Operand.getValueType();
7963
7964 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
7965 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
7966 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
7967 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
7968 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
7969
7970 if (RefinementSteps == ReciprocalEstimate::Unspecified)
7971 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
7972
7973 SDValue Estimate = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand);
7974 if (Reciprocal)
7975 Estimate = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Estimate);
7976
7977 return Estimate;
7978 }
7979 }
7980
7981 return SDValue();
7982}
7983
7984SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand,
7985 SelectionDAG &DAG,
7986 int Enabled,
7987 int &RefinementSteps) const {
7988 if (Subtarget.hasFrecipe()) {
7989 SDLoc DL(Operand);
7990 EVT VT = Operand.getValueType();
7991
7992 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
7993 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
7994 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
7995 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
7996 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
7997
7998 if (RefinementSteps == ReciprocalEstimate::Unspecified)
7999 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8000
8001 return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand);
8002 }
8003 }
8004
8005 return SDValue();
8006}
8007
8008//===----------------------------------------------------------------------===//
8009// LoongArch Inline Assembly Support
8010//===----------------------------------------------------------------------===//
8011
8012LoongArchTargetLowering::ConstraintType
8013LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8014 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8015 //
8016 // 'f': A floating-point register (if available).
8017 // 'k': A memory operand whose address is formed by a base register and
8018 // (optionally scaled) index register.
8019 // 'l': A signed 16-bit constant.
8020 // 'm': A memory operand whose address is formed by a base register and
8021 // offset that is suitable for use in instructions with the same
8022 // addressing mode as st.w and ld.w.
8023 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8024 // instruction)
8025 // 'I': A signed 12-bit constant (for arithmetic instructions).
8026 // 'J': Integer zero.
8027 // 'K': An unsigned 12-bit constant (for logic instructions).
8028 // "ZB": An address that is held in a general-purpose register. The offset is
8029 // zero.
8030 // "ZC": A memory operand whose address is formed by a base register and
8031 // offset that is suitable for use in instructions with the same
8032 // addressing mode as ll.w and sc.w.
8033 if (Constraint.size() == 1) {
8034 switch (Constraint[0]) {
8035 default:
8036 break;
8037 case 'f':
8038 case 'q':
8039 return C_RegisterClass;
8040 case 'l':
8041 case 'I':
8042 case 'J':
8043 case 'K':
8044 return C_Immediate;
8045 case 'k':
8046 return C_Memory;
8047 }
8048 }
8049
8050 if (Constraint == "ZC" || Constraint == "ZB")
8051 return C_Memory;
8052
8053 // 'm' is handled here.
8054 return TargetLowering::getConstraintType(Constraint);
8055}
8056
8057InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8058 StringRef ConstraintCode) const {
8059 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8060 .Case(S: "k", Value: InlineAsm::ConstraintCode::k)
8061 .Case(S: "ZB", Value: InlineAsm::ConstraintCode::ZB)
8062 .Case(S: "ZC", Value: InlineAsm::ConstraintCode::ZC)
8063 .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8064}
8065
8066std::pair<unsigned, const TargetRegisterClass *>
8067LoongArchTargetLowering::getRegForInlineAsmConstraint(
8068 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8069 // First, see if this is a constraint that directly corresponds to a LoongArch
8070 // register class.
8071 if (Constraint.size() == 1) {
8072 switch (Constraint[0]) {
8073 case 'r':
8074 // TODO: Support fixed vectors up to GRLen?
8075 if (VT.isVector())
8076 break;
8077 return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass);
8078 case 'q':
8079 return std::make_pair(x: 0U, y: &LoongArch::GPRNoR0R1RegClass);
8080 case 'f':
8081 if (Subtarget.hasBasicF() && VT == MVT::f32)
8082 return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass);
8083 if (Subtarget.hasBasicD() && VT == MVT::f64)
8084 return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass);
8085 if (Subtarget.hasExtLSX() &&
8086 TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT))
8087 return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass);
8088 if (Subtarget.hasExtLASX() &&
8089 TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT))
8090 return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass);
8091 break;
8092 default:
8093 break;
8094 }
8095 }
8096
8097 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8098 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8099 // constraints while the official register name is prefixed with a '$'. So we
8100 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8101 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8102 // case insensitive, so no need to convert the constraint to upper case here.
8103 //
8104 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8105 // decode the usage of register name aliases into their official names. And
8106 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8107 // official register names.
8108 if (Constraint.starts_with(Prefix: "{$r") || Constraint.starts_with(Prefix: "{$f") ||
8109 Constraint.starts_with(Prefix: "{$vr") || Constraint.starts_with(Prefix: "{$xr")) {
8110 bool IsFP = Constraint[2] == 'f';
8111 std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$');
8112 std::pair<unsigned, const TargetRegisterClass *> R;
8113 R = TargetLowering::getRegForInlineAsmConstraint(
8114 TRI, Constraint: join_items(Separator: "", Items&: Temp.first, Items&: Temp.second), VT);
8115 // Match those names to the widest floating point register type available.
8116 if (IsFP) {
8117 unsigned RegNo = R.first;
8118 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8119 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8120 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8121 return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass);
8122 }
8123 }
8124 }
8125 return R;
8126 }
8127
8128 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8129}
8130
8131void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8132 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8133 SelectionDAG &DAG) const {
8134 // Currently only support length 1 constraints.
8135 if (Constraint.size() == 1) {
8136 switch (Constraint[0]) {
8137 case 'l':
8138 // Validate & create a 16-bit signed immediate operand.
8139 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
8140 uint64_t CVal = C->getSExtValue();
8141 if (isInt<16>(x: CVal))
8142 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
8143 VT: Subtarget.getGRLenVT()));
8144 }
8145 return;
8146 case 'I':
8147 // Validate & create a 12-bit signed immediate operand.
8148 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
8149 uint64_t CVal = C->getSExtValue();
8150 if (isInt<12>(x: CVal))
8151 Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op),
8152 VT: Subtarget.getGRLenVT()));
8153 }
8154 return;
8155 case 'J':
8156 // Validate & create an integer zero operand.
8157 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
8158 if (C->getZExtValue() == 0)
8159 Ops.push_back(
8160 x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
8161 return;
8162 case 'K':
8163 // Validate & create a 12-bit unsigned immediate operand.
8164 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) {
8165 uint64_t CVal = C->getZExtValue();
8166 if (isUInt<12>(x: CVal))
8167 Ops.push_back(
8168 x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT()));
8169 }
8170 return;
8171 default:
8172 break;
8173 }
8174 }
8175 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8176}
8177
8178#define GET_REGISTER_MATCHER
8179#include "LoongArchGenAsmMatcher.inc"
8180
8181Register
8182LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
8183 const MachineFunction &MF) const {
8184 std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$');
8185 std::string NewRegName = Name.second.str();
8186 Register Reg = MatchRegisterAltName(Name: NewRegName);
8187 if (!Reg)
8188 Reg = MatchRegisterName(Name: NewRegName);
8189 if (!Reg)
8190 return Reg;
8191 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8192 if (!ReservedRegs.test(Idx: Reg))
8193 report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" +
8194 StringRef(RegName) + "\"."));
8195 return Reg;
8196}
8197
8198bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
8199 EVT VT, SDValue C) const {
8200 // TODO: Support vectors.
8201 if (!VT.isScalarInteger())
8202 return false;
8203
8204 // Omit the optimization if the data size exceeds GRLen.
8205 if (VT.getSizeInBits() > Subtarget.getGRLen())
8206 return false;
8207
8208 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
8209 const APInt &Imm = ConstNode->getAPIntValue();
8210 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8211 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8212 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8213 return true;
8214 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8215 if (ConstNode->hasOneUse() &&
8216 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8217 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8218 return true;
8219 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8220 // in which the immediate has two set bits. Or Break (MUL x, imm)
8221 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8222 // equals to (1 << s0) - (1 << s1).
8223 if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) {
8224 unsigned Shifts = Imm.countr_zero();
8225 // Reject immediates which can be composed via a single LUI.
8226 if (Shifts >= 12)
8227 return false;
8228 // Reject multiplications can be optimized to
8229 // (SLLI (ALSL x, x, 1/2/3/4), s).
8230 APInt ImmPop = Imm.ashr(ShiftAmt: Shifts);
8231 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8232 return false;
8233 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8234 // since it needs one more instruction than other 3 cases.
8235 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8236 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8237 (ImmSmall - Imm).isPowerOf2())
8238 return true;
8239 }
8240 }
8241
8242 return false;
8243}
8244
8245bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
8246 const AddrMode &AM,
8247 Type *Ty, unsigned AS,
8248 Instruction *I) const {
8249 // LoongArch has four basic addressing modes:
8250 // 1. reg
8251 // 2. reg + 12-bit signed offset
8252 // 3. reg + 14-bit signed offset left-shifted by 2
8253 // 4. reg1 + reg2
8254 // TODO: Add more checks after support vector extension.
8255
8256 // No global is ever allowed as a base.
8257 if (AM.BaseGV)
8258 return false;
8259
8260 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8261 // with `UAL` feature.
8262 if (!isInt<12>(x: AM.BaseOffs) &&
8263 !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL()))
8264 return false;
8265
8266 switch (AM.Scale) {
8267 case 0:
8268 // "r+i" or just "i", depending on HasBaseReg.
8269 break;
8270 case 1:
8271 // "r+r+i" is not allowed.
8272 if (AM.HasBaseReg && AM.BaseOffs)
8273 return false;
8274 // Otherwise we have "r+r" or "r+i".
8275 break;
8276 case 2:
8277 // "2*r+r" or "2*r+i" is not allowed.
8278 if (AM.HasBaseReg || AM.BaseOffs)
8279 return false;
8280 // Allow "2*r" as "r+r".
8281 break;
8282 default:
8283 return false;
8284 }
8285
8286 return true;
8287}
8288
8289bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8290 return isInt<12>(x: Imm);
8291}
8292
8293bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8294 return isInt<12>(x: Imm);
8295}
8296
8297bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
8298 // Zexts are free if they can be combined with a load.
8299 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8300 // poorly with type legalization of compares preferring sext.
8301 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8302 EVT MemVT = LD->getMemoryVT();
8303 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8304 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8305 LD->getExtensionType() == ISD::ZEXTLOAD))
8306 return true;
8307 }
8308
8309 return TargetLowering::isZExtFree(Val, VT2);
8310}
8311
8312bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
8313 EVT DstVT) const {
8314 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8315}
8316
8317bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
8318 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32);
8319}
8320
8321bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
8322 // TODO: Support vectors.
8323 if (Y.getValueType().isVector())
8324 return false;
8325
8326 return !isa<ConstantSDNode>(Val: Y);
8327}
8328
8329ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
8330 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8331 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8332}
8333
8334bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
8335 Type *Ty, bool IsSigned) const {
8336 if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: 32))
8337 return true;
8338
8339 return IsSigned;
8340}
8341
8342bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
8343 // Return false to suppress the unnecessary extensions if the LibCall
8344 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8345 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8346 Type.getSizeInBits() < Subtarget.getGRLen()))
8347 return false;
8348 return true;
8349}
8350
8351// memcpy, and other memory intrinsics, typically tries to use wider load/store
8352// if the source/dest is aligned and the copy size is large enough. We therefore
8353// want to align such objects passed to memory intrinsics.
8354bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI,
8355 unsigned &MinSize,
8356 Align &PrefAlign) const {
8357 if (!isa<MemIntrinsic>(Val: CI))
8358 return false;
8359
8360 if (Subtarget.is64Bit()) {
8361 MinSize = 8;
8362 PrefAlign = Align(8);
8363 } else {
8364 MinSize = 4;
8365 PrefAlign = Align(4);
8366 }
8367
8368 return true;
8369}
8370
8371TargetLoweringBase::LegalizeTypeAction
8372LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const {
8373 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
8374 VT.getVectorElementType() != MVT::i1)
8375 return TypeWidenVector;
8376
8377 return TargetLoweringBase::getPreferredVectorAction(VT);
8378}
8379
8380bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8381 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8382 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8383 bool IsABIRegCopy = CC.has_value();
8384 EVT ValueVT = Val.getValueType();
8385
8386 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8387 PartVT == MVT::f32) {
8388 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8389 // nan, and cast to f32.
8390 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val);
8391 Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val);
8392 Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val,
8393 N2: DAG.getConstant(Val: 0xFFFF0000, DL, VT: MVT::i32));
8394 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val);
8395 Parts[0] = Val;
8396 return true;
8397 }
8398
8399 return false;
8400}
8401
8402SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
8403 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
8404 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
8405 bool IsABIRegCopy = CC.has_value();
8406
8407 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8408 PartVT == MVT::f32) {
8409 SDValue Val = Parts[0];
8410
8411 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
8412 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val);
8413 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val);
8414 Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val);
8415 return Val;
8416 }
8417
8418 return SDValue();
8419}
8420
8421MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
8422 CallingConv::ID CC,
8423 EVT VT) const {
8424 // Use f32 to pass f16.
8425 if (VT == MVT::f16 && Subtarget.hasBasicF())
8426 return MVT::f32;
8427
8428 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
8429}
8430
8431unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
8432 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
8433 // Use f32 to pass f16.
8434 if (VT == MVT::f16 && Subtarget.hasBasicF())
8435 return 1;
8436
8437 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
8438}
8439
8440bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
8441 SDValue Op, const APInt &OriginalDemandedBits,
8442 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
8443 unsigned Depth) const {
8444 EVT VT = Op.getValueType();
8445 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
8446 unsigned Opc = Op.getOpcode();
8447 switch (Opc) {
8448 default:
8449 break;
8450 case LoongArchISD::VMSKLTZ:
8451 case LoongArchISD::XVMSKLTZ: {
8452 SDValue Src = Op.getOperand(i: 0);
8453 MVT SrcVT = Src.getSimpleValueType();
8454 unsigned SrcBits = SrcVT.getScalarSizeInBits();
8455 unsigned NumElts = SrcVT.getVectorNumElements();
8456
8457 // If we don't need the sign bits at all just return zero.
8458 if (OriginalDemandedBits.countr_zero() >= NumElts)
8459 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
8460
8461 // Only demand the vector elements of the sign bits we need.
8462 APInt KnownUndef, KnownZero;
8463 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts);
8464 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero,
8465 TLO, Depth: Depth + 1))
8466 return true;
8467
8468 Known.Zero = KnownZero.zext(width: BitWidth);
8469 Known.Zero.setHighBits(BitWidth - NumElts);
8470
8471 // [X]VMSKLTZ only uses the MSB from each vector element.
8472 KnownBits KnownSrc;
8473 APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits);
8474 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO,
8475 Depth: Depth + 1))
8476 return true;
8477
8478 if (KnownSrc.One[SrcBits - 1])
8479 Known.One.setLowBits(NumElts);
8480 else if (KnownSrc.Zero[SrcBits - 1])
8481 Known.Zero.setLowBits(NumElts);
8482
8483 // Attempt to avoid multi-use ops if we don't need anything from it.
8484 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
8485 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
8486 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT, Operand: NewSrc));
8487 return false;
8488 }
8489 }
8490
8491 return TargetLowering::SimplifyDemandedBitsForTargetNode(
8492 Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth);
8493}
8494