1 | //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that LoongArch uses to lower LLVM code into |
10 | // a selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "LoongArchISelLowering.h" |
15 | #include "LoongArch.h" |
16 | #include "LoongArchMachineFunctionInfo.h" |
17 | #include "LoongArchRegisterInfo.h" |
18 | #include "LoongArchSubtarget.h" |
19 | #include "LoongArchTargetMachine.h" |
20 | #include "MCTargetDesc/LoongArchBaseInfo.h" |
21 | #include "MCTargetDesc/LoongArchMCTargetDesc.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/ADT/StringExtras.h" |
24 | #include "llvm/CodeGen/ISDOpcodes.h" |
25 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
26 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
27 | #include "llvm/IR/IRBuilder.h" |
28 | #include "llvm/IR/IntrinsicsLoongArch.h" |
29 | #include "llvm/Support/CodeGen.h" |
30 | #include "llvm/Support/Debug.h" |
31 | #include "llvm/Support/ErrorHandling.h" |
32 | #include "llvm/Support/KnownBits.h" |
33 | #include "llvm/Support/MathExtras.h" |
34 | |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "loongarch-isel-lowering" |
38 | |
39 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
40 | |
41 | static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division" , cl::Hidden, |
42 | cl::desc("Trap on integer division by zero." ), |
43 | cl::init(Val: false)); |
44 | |
45 | LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, |
46 | const LoongArchSubtarget &STI) |
47 | : TargetLowering(TM), Subtarget(STI) { |
48 | |
49 | MVT GRLenVT = Subtarget.getGRLenVT(); |
50 | |
51 | // Set up the register classes. |
52 | |
53 | addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass); |
54 | if (Subtarget.hasBasicF()) |
55 | addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass); |
56 | if (Subtarget.hasBasicD()) |
57 | addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass); |
58 | |
59 | static const MVT::SimpleValueType LSXVTs[] = { |
60 | MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; |
61 | static const MVT::SimpleValueType LASXVTs[] = { |
62 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; |
63 | |
64 | if (Subtarget.hasExtLSX()) |
65 | for (MVT VT : LSXVTs) |
66 | addRegisterClass(VT, RC: &LoongArch::LSX128RegClass); |
67 | |
68 | if (Subtarget.hasExtLASX()) |
69 | for (MVT VT : LASXVTs) |
70 | addRegisterClass(VT, RC: &LoongArch::LASX256RegClass); |
71 | |
72 | // Set operations for LA32 and LA64. |
73 | |
74 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT, |
75 | MemVT: MVT::i1, Action: Promote); |
76 | |
77 | setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom); |
78 | setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom); |
79 | setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom); |
80 | setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom); |
81 | setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand); |
82 | setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand); |
83 | |
84 | setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
85 | ISD::JumpTable, ISD::GlobalTLSAddress}, |
86 | VT: GRLenVT, Action: Custom); |
87 | |
88 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom); |
89 | |
90 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand); |
91 | setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand); |
92 | setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom); |
93 | setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand); |
94 | |
95 | setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal); |
96 | setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal); |
97 | |
98 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom); |
99 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom); |
100 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
101 | |
102 | // Expand bitreverse.i16 with native-width bitrev and shift for now, before |
103 | // we get to know which of sll and revb.2h is faster. |
104 | setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom); |
105 | setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal); |
106 | |
107 | // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and |
108 | // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 |
109 | // and i32 could still be byte-swapped relatively cheaply. |
110 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom); |
111 | |
112 | setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand); |
113 | setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand); |
114 | setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand); |
115 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand); |
116 | setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand); |
117 | |
118 | setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom); |
119 | setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand); |
120 | |
121 | // Set operations for LA64 only. |
122 | |
123 | if (Subtarget.is64Bit()) { |
124 | setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom); |
125 | setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom); |
126 | setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom); |
127 | setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom); |
128 | setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom); |
129 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom); |
130 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
131 | setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom); |
132 | setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom); |
133 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom); |
134 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom); |
135 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom); |
136 | setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom); |
137 | setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom); |
138 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom); |
139 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom); |
140 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom); |
141 | |
142 | setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom); |
143 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom); |
144 | setOperationAction(Ops: {ISD::UDIV, ISD::UREM}, VT: MVT::i32, Action: Custom); |
145 | } |
146 | |
147 | // Set operations for LA32 only. |
148 | |
149 | if (!Subtarget.is64Bit()) { |
150 | setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom); |
151 | setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom); |
152 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom); |
153 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom); |
154 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom); |
155 | } |
156 | |
157 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
158 | |
159 | static const ISD::CondCode FPCCToExpand[] = { |
160 | ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, |
161 | ISD::SETGE, ISD::SETNE, ISD::SETGT}; |
162 | |
163 | // Set operations for 'F' feature. |
164 | |
165 | if (Subtarget.hasBasicF()) { |
166 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
167 | setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
168 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand); |
169 | |
170 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand); |
171 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand); |
172 | setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal); |
173 | setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal); |
174 | setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal); |
175 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal); |
176 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal); |
177 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal); |
178 | setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand); |
179 | setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand); |
180 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand); |
181 | setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand); |
182 | setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand); |
183 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand); |
184 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand); |
185 | |
186 | if (Subtarget.is64Bit()) |
187 | setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal); |
188 | |
189 | if (!Subtarget.hasBasicD()) { |
190 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom); |
191 | if (Subtarget.is64Bit()) { |
192 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom); |
193 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom); |
194 | } |
195 | } |
196 | } |
197 | |
198 | // Set operations for 'D' feature. |
199 | |
200 | if (Subtarget.hasBasicD()) { |
201 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
202 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
203 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
204 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
205 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand); |
206 | |
207 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand); |
208 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand); |
209 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal); |
210 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal); |
211 | setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal); |
212 | setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal); |
213 | setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal); |
214 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal); |
215 | setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand); |
216 | setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand); |
217 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand); |
218 | setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand); |
219 | setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand); |
220 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand); |
221 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand); |
222 | |
223 | if (Subtarget.is64Bit()) |
224 | setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal); |
225 | } |
226 | |
227 | // Set operations for 'LSX' feature. |
228 | |
229 | if (Subtarget.hasExtLSX()) { |
230 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
231 | // Expand all truncating stores and extending loads. |
232 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
233 | setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand); |
234 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
235 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
236 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
237 | } |
238 | // By default everything must be expanded. Then we will selectively turn |
239 | // on ones that can be effectively codegen'd. |
240 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
241 | setOperationAction(Op, VT, Action: Expand); |
242 | } |
243 | |
244 | for (MVT VT : LSXVTs) { |
245 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal); |
246 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
247 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
248 | |
249 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom); |
250 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal); |
251 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
252 | |
253 | setOperationAction(Op: ISD::SETCC, VT, Action: Legal); |
254 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
255 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
256 | } |
257 | for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { |
258 | setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal); |
259 | setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
260 | Action: Legal); |
261 | setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
262 | VT, Action: Legal); |
263 | setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal); |
264 | setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal); |
265 | setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal); |
266 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal); |
267 | setCondCodeAction( |
268 | CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
269 | Action: Expand); |
270 | } |
271 | for (MVT VT : {MVT::v4i32, MVT::v2i64}) { |
272 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal); |
273 | setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal); |
274 | } |
275 | for (MVT VT : {MVT::v4f32, MVT::v2f64}) { |
276 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal); |
277 | setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal); |
278 | setOperationAction(Op: ISD::FMA, VT, Action: Legal); |
279 | setOperationAction(Op: ISD::FSQRT, VT, Action: Legal); |
280 | setOperationAction(Op: ISD::FNEG, VT, Action: Legal); |
281 | setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
282 | ISD::SETUGE, ISD::SETUGT}, |
283 | VT, Action: Expand); |
284 | } |
285 | } |
286 | |
287 | // Set operations for 'LASX' feature. |
288 | |
289 | if (Subtarget.hasExtLASX()) { |
290 | for (MVT VT : LASXVTs) { |
291 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal); |
292 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
293 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
294 | |
295 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom); |
296 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom); |
297 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
298 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Legal); |
299 | |
300 | setOperationAction(Op: ISD::SETCC, VT, Action: Legal); |
301 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
302 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
303 | } |
304 | for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { |
305 | setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal); |
306 | setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
307 | Action: Legal); |
308 | setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
309 | VT, Action: Legal); |
310 | setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal); |
311 | setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal); |
312 | setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal); |
313 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal); |
314 | setCondCodeAction( |
315 | CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
316 | Action: Expand); |
317 | } |
318 | for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { |
319 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal); |
320 | setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal); |
321 | } |
322 | for (MVT VT : {MVT::v8f32, MVT::v4f64}) { |
323 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal); |
324 | setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal); |
325 | setOperationAction(Op: ISD::FMA, VT, Action: Legal); |
326 | setOperationAction(Op: ISD::FSQRT, VT, Action: Legal); |
327 | setOperationAction(Op: ISD::FNEG, VT, Action: Legal); |
328 | setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
329 | ISD::SETUGE, ISD::SETUGT}, |
330 | VT, Action: Expand); |
331 | } |
332 | } |
333 | |
334 | // Set DAG combine for LA32 and LA64. |
335 | |
336 | setTargetDAGCombine(ISD::AND); |
337 | setTargetDAGCombine(ISD::OR); |
338 | setTargetDAGCombine(ISD::SRL); |
339 | setTargetDAGCombine(ISD::SETCC); |
340 | |
341 | // Set DAG combine for 'LSX' feature. |
342 | |
343 | if (Subtarget.hasExtLSX()) |
344 | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
345 | |
346 | // Compute derived properties from the register classes. |
347 | computeRegisterProperties(TRI: Subtarget.getRegisterInfo()); |
348 | |
349 | setStackPointerRegisterToSaveRestore(LoongArch::R3); |
350 | |
351 | setBooleanContents(ZeroOrOneBooleanContent); |
352 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
353 | |
354 | setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); |
355 | |
356 | setMinCmpXchgSizeInBits(32); |
357 | |
358 | // Function alignments. |
359 | setMinFunctionAlignment(Align(4)); |
360 | // Set preferred alignments. |
361 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
362 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
363 | setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); |
364 | } |
365 | |
366 | bool LoongArchTargetLowering::isOffsetFoldingLegal( |
367 | const GlobalAddressSDNode *GA) const { |
368 | // In order to maximise the opportunity for common subexpression elimination, |
369 | // keep a separate ADD node for the global address offset instead of folding |
370 | // it in the global address node. Later peephole optimisations may choose to |
371 | // fold it back in when profitable. |
372 | return false; |
373 | } |
374 | |
375 | SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, |
376 | SelectionDAG &DAG) const { |
377 | switch (Op.getOpcode()) { |
378 | case ISD::ATOMIC_FENCE: |
379 | return lowerATOMIC_FENCE(Op, DAG); |
380 | case ISD::EH_DWARF_CFA: |
381 | return lowerEH_DWARF_CFA(Op, DAG); |
382 | case ISD::GlobalAddress: |
383 | return lowerGlobalAddress(Op, DAG); |
384 | case ISD::GlobalTLSAddress: |
385 | return lowerGlobalTLSAddress(Op, DAG); |
386 | case ISD::INTRINSIC_WO_CHAIN: |
387 | return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
388 | case ISD::INTRINSIC_W_CHAIN: |
389 | return lowerINTRINSIC_W_CHAIN(Op, DAG); |
390 | case ISD::INTRINSIC_VOID: |
391 | return lowerINTRINSIC_VOID(Op, DAG); |
392 | case ISD::BlockAddress: |
393 | return lowerBlockAddress(Op, DAG); |
394 | case ISD::JumpTable: |
395 | return lowerJumpTable(Op, DAG); |
396 | case ISD::SHL_PARTS: |
397 | return lowerShiftLeftParts(Op, DAG); |
398 | case ISD::SRA_PARTS: |
399 | return lowerShiftRightParts(Op, DAG, IsSRA: true); |
400 | case ISD::SRL_PARTS: |
401 | return lowerShiftRightParts(Op, DAG, IsSRA: false); |
402 | case ISD::ConstantPool: |
403 | return lowerConstantPool(Op, DAG); |
404 | case ISD::FP_TO_SINT: |
405 | return lowerFP_TO_SINT(Op, DAG); |
406 | case ISD::BITCAST: |
407 | return lowerBITCAST(Op, DAG); |
408 | case ISD::UINT_TO_FP: |
409 | return lowerUINT_TO_FP(Op, DAG); |
410 | case ISD::SINT_TO_FP: |
411 | return lowerSINT_TO_FP(Op, DAG); |
412 | case ISD::VASTART: |
413 | return lowerVASTART(Op, DAG); |
414 | case ISD::FRAMEADDR: |
415 | return lowerFRAMEADDR(Op, DAG); |
416 | case ISD::RETURNADDR: |
417 | return lowerRETURNADDR(Op, DAG); |
418 | case ISD::WRITE_REGISTER: |
419 | return lowerWRITE_REGISTER(Op, DAG); |
420 | case ISD::INSERT_VECTOR_ELT: |
421 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
422 | case ISD::EXTRACT_VECTOR_ELT: |
423 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
424 | case ISD::BUILD_VECTOR: |
425 | return lowerBUILD_VECTOR(Op, DAG); |
426 | case ISD::VECTOR_SHUFFLE: |
427 | return lowerVECTOR_SHUFFLE(Op, DAG); |
428 | } |
429 | return SDValue(); |
430 | } |
431 | |
432 | /// Determine whether a range fits a regular pattern of values. |
433 | /// This function accounts for the possibility of jumping over the End iterator. |
434 | template <typename ValType> |
435 | static bool |
436 | fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, |
437 | unsigned CheckStride, |
438 | typename SmallVectorImpl<ValType>::const_iterator End, |
439 | ValType ExpectedIndex, unsigned ExpectedIndexStride) { |
440 | auto &I = Begin; |
441 | |
442 | while (I != End) { |
443 | if (*I != -1 && *I != ExpectedIndex) |
444 | return false; |
445 | ExpectedIndex += ExpectedIndexStride; |
446 | |
447 | // Incrementing past End is undefined behaviour so we must increment one |
448 | // step at a time and check for End at each step. |
449 | for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) |
450 | ; // Empty loop body. |
451 | } |
452 | return true; |
453 | } |
454 | |
455 | /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). |
456 | /// |
457 | /// VREPLVEI performs vector broadcast based on an element specified by an |
458 | /// integer immediate, with its mask being similar to: |
459 | /// <x, x, x, ...> |
460 | /// where x is any valid index. |
461 | /// |
462 | /// When undef's appear in the mask they are treated as if they were whatever |
463 | /// value is necessary in order to fit the above form. |
464 | static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, |
465 | MVT VT, SDValue V1, SDValue V2, |
466 | SelectionDAG &DAG) { |
467 | int SplatIndex = -1; |
468 | for (const auto &M : Mask) { |
469 | if (M != -1) { |
470 | SplatIndex = M; |
471 | break; |
472 | } |
473 | } |
474 | |
475 | if (SplatIndex == -1) |
476 | return DAG.getUNDEF(VT); |
477 | |
478 | assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index" ); |
479 | if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) { |
480 | APInt Imm(64, SplatIndex); |
481 | return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1, |
482 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
483 | } |
484 | |
485 | return SDValue(); |
486 | } |
487 | |
488 | /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). |
489 | /// |
490 | /// VSHUF4I splits the vector into blocks of four elements, then shuffles these |
491 | /// elements according to a <4 x i2> constant (encoded as an integer immediate). |
492 | /// |
493 | /// It is therefore possible to lower into VSHUF4I when the mask takes the form: |
494 | /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> |
495 | /// When undef's appear they are treated as if they were whatever value is |
496 | /// necessary in order to fit the above forms. |
497 | /// |
498 | /// For example: |
499 | /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, |
500 | /// <8 x i32> <i32 3, i32 2, i32 1, i32 0, |
501 | /// i32 7, i32 6, i32 5, i32 4> |
502 | /// is lowered to: |
503 | /// (VSHUF4I_H $v0, $v1, 27) |
504 | /// where the 27 comes from: |
505 | /// 3 + (2 << 2) + (1 << 4) + (0 << 6) |
506 | static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, |
507 | MVT VT, SDValue V1, SDValue V2, |
508 | SelectionDAG &DAG) { |
509 | |
510 | // When the size is less than 4, lower cost instructions may be used. |
511 | if (Mask.size() < 4) |
512 | return SDValue(); |
513 | |
514 | int SubMask[4] = {-1, -1, -1, -1}; |
515 | for (unsigned i = 0; i < 4; ++i) { |
516 | for (unsigned j = i; j < Mask.size(); j += 4) { |
517 | int Idx = Mask[j]; |
518 | |
519 | // Convert from vector index to 4-element subvector index |
520 | // If an index refers to an element outside of the subvector then give up |
521 | if (Idx != -1) { |
522 | Idx -= 4 * (j / 4); |
523 | if (Idx < 0 || Idx >= 4) |
524 | return SDValue(); |
525 | } |
526 | |
527 | // If the mask has an undef, replace it with the current index. |
528 | // Note that it might still be undef if the current index is also undef |
529 | if (SubMask[i] == -1) |
530 | SubMask[i] = Idx; |
531 | // Check that non-undef values are the same as in the mask. If they |
532 | // aren't then give up |
533 | else if (Idx != -1 && Idx != SubMask[i]) |
534 | return SDValue(); |
535 | } |
536 | } |
537 | |
538 | // Calculate the immediate. Replace any remaining undefs with zero |
539 | APInt Imm(64, 0); |
540 | for (int i = 3; i >= 0; --i) { |
541 | int Idx = SubMask[i]; |
542 | |
543 | if (Idx == -1) |
544 | Idx = 0; |
545 | |
546 | Imm <<= 2; |
547 | Imm |= Idx & 0x3; |
548 | } |
549 | |
550 | return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1, |
551 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
552 | } |
553 | |
554 | /// Lower VECTOR_SHUFFLE into VPACKEV (if possible). |
555 | /// |
556 | /// VPACKEV interleaves the even elements from each vector. |
557 | /// |
558 | /// It is possible to lower into VPACKEV when the mask consists of two of the |
559 | /// following forms interleaved: |
560 | /// <0, 2, 4, ...> |
561 | /// <n, n+2, n+4, ...> |
562 | /// where n is the number of elements in the vector. |
563 | /// For example: |
564 | /// <0, 0, 2, 2, 4, 4, ...> |
565 | /// <0, n, 2, n+2, 4, n+4, ...> |
566 | /// |
567 | /// When undef's appear in the mask they are treated as if they were whatever |
568 | /// value is necessary in order to fit the above forms. |
569 | static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask, |
570 | MVT VT, SDValue V1, SDValue V2, |
571 | SelectionDAG &DAG) { |
572 | |
573 | const auto &Begin = Mask.begin(); |
574 | const auto &End = Mask.end(); |
575 | SDValue OriV1 = V1, OriV2 = V2; |
576 | |
577 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
578 | V1 = OriV1; |
579 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
580 | V1 = OriV2; |
581 | else |
582 | return SDValue(); |
583 | |
584 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
585 | V2 = OriV1; |
586 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
587 | V2 = OriV2; |
588 | else |
589 | return SDValue(); |
590 | |
591 | return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1); |
592 | } |
593 | |
594 | /// Lower VECTOR_SHUFFLE into VPACKOD (if possible). |
595 | /// |
596 | /// VPACKOD interleaves the odd elements from each vector. |
597 | /// |
598 | /// It is possible to lower into VPACKOD when the mask consists of two of the |
599 | /// following forms interleaved: |
600 | /// <1, 3, 5, ...> |
601 | /// <n+1, n+3, n+5, ...> |
602 | /// where n is the number of elements in the vector. |
603 | /// For example: |
604 | /// <1, 1, 3, 3, 5, 5, ...> |
605 | /// <1, n+1, 3, n+3, 5, n+5, ...> |
606 | /// |
607 | /// When undef's appear in the mask they are treated as if they were whatever |
608 | /// value is necessary in order to fit the above forms. |
609 | static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask, |
610 | MVT VT, SDValue V1, SDValue V2, |
611 | SelectionDAG &DAG) { |
612 | |
613 | const auto &Begin = Mask.begin(); |
614 | const auto &End = Mask.end(); |
615 | SDValue OriV1 = V1, OriV2 = V2; |
616 | |
617 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
618 | V1 = OriV1; |
619 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
620 | V1 = OriV2; |
621 | else |
622 | return SDValue(); |
623 | |
624 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
625 | V2 = OriV1; |
626 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
627 | V2 = OriV2; |
628 | else |
629 | return SDValue(); |
630 | |
631 | return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1); |
632 | } |
633 | |
634 | /// Lower VECTOR_SHUFFLE into VILVH (if possible). |
635 | /// |
636 | /// VILVH interleaves consecutive elements from the left (highest-indexed) half |
637 | /// of each vector. |
638 | /// |
639 | /// It is possible to lower into VILVH when the mask consists of two of the |
640 | /// following forms interleaved: |
641 | /// <x, x+1, x+2, ...> |
642 | /// <n+x, n+x+1, n+x+2, ...> |
643 | /// where n is the number of elements in the vector and x is half n. |
644 | /// For example: |
645 | /// <x, x, x+1, x+1, x+2, x+2, ...> |
646 | /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> |
647 | /// |
648 | /// When undef's appear in the mask they are treated as if they were whatever |
649 | /// value is necessary in order to fit the above forms. |
650 | static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask, |
651 | MVT VT, SDValue V1, SDValue V2, |
652 | SelectionDAG &DAG) { |
653 | |
654 | const auto &Begin = Mask.begin(); |
655 | const auto &End = Mask.end(); |
656 | unsigned HalfSize = Mask.size() / 2; |
657 | SDValue OriV1 = V1, OriV2 = V2; |
658 | |
659 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
660 | V1 = OriV1; |
661 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
662 | V1 = OriV2; |
663 | else |
664 | return SDValue(); |
665 | |
666 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
667 | V2 = OriV1; |
668 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, |
669 | ExpectedIndexStride: 1)) |
670 | V2 = OriV2; |
671 | else |
672 | return SDValue(); |
673 | |
674 | return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1); |
675 | } |
676 | |
677 | /// Lower VECTOR_SHUFFLE into VILVL (if possible). |
678 | /// |
679 | /// VILVL interleaves consecutive elements from the right (lowest-indexed) half |
680 | /// of each vector. |
681 | /// |
682 | /// It is possible to lower into VILVL when the mask consists of two of the |
683 | /// following forms interleaved: |
684 | /// <0, 1, 2, ...> |
685 | /// <n, n+1, n+2, ...> |
686 | /// where n is the number of elements in the vector. |
687 | /// For example: |
688 | /// <0, 0, 1, 1, 2, 2, ...> |
689 | /// <0, n, 1, n+1, 2, n+2, ...> |
690 | /// |
691 | /// When undef's appear in the mask they are treated as if they were whatever |
692 | /// value is necessary in order to fit the above forms. |
693 | static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask, |
694 | MVT VT, SDValue V1, SDValue V2, |
695 | SelectionDAG &DAG) { |
696 | |
697 | const auto &Begin = Mask.begin(); |
698 | const auto &End = Mask.end(); |
699 | SDValue OriV1 = V1, OriV2 = V2; |
700 | |
701 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
702 | V1 = OriV1; |
703 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1)) |
704 | V1 = OriV2; |
705 | else |
706 | return SDValue(); |
707 | |
708 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
709 | V2 = OriV1; |
710 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1)) |
711 | V2 = OriV2; |
712 | else |
713 | return SDValue(); |
714 | |
715 | return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1); |
716 | } |
717 | |
718 | /// Lower VECTOR_SHUFFLE into VPICKEV (if possible). |
719 | /// |
720 | /// VPICKEV copies the even elements of each vector into the result vector. |
721 | /// |
722 | /// It is possible to lower into VPICKEV when the mask consists of two of the |
723 | /// following forms concatenated: |
724 | /// <0, 2, 4, ...> |
725 | /// <n, n+2, n+4, ...> |
726 | /// where n is the number of elements in the vector. |
727 | /// For example: |
728 | /// <0, 2, 4, ..., 0, 2, 4, ...> |
729 | /// <0, 2, 4, ..., n, n+2, n+4, ...> |
730 | /// |
731 | /// When undef's appear in the mask they are treated as if they were whatever |
732 | /// value is necessary in order to fit the above forms. |
733 | static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask, |
734 | MVT VT, SDValue V1, SDValue V2, |
735 | SelectionDAG &DAG) { |
736 | |
737 | const auto &Begin = Mask.begin(); |
738 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
739 | const auto &End = Mask.end(); |
740 | SDValue OriV1 = V1, OriV2 = V2; |
741 | |
742 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
743 | V1 = OriV1; |
744 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
745 | V1 = OriV2; |
746 | else |
747 | return SDValue(); |
748 | |
749 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
750 | V2 = OriV1; |
751 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
752 | V2 = OriV2; |
753 | |
754 | else |
755 | return SDValue(); |
756 | |
757 | return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1); |
758 | } |
759 | |
760 | /// Lower VECTOR_SHUFFLE into VPICKOD (if possible). |
761 | /// |
762 | /// VPICKOD copies the odd elements of each vector into the result vector. |
763 | /// |
764 | /// It is possible to lower into VPICKOD when the mask consists of two of the |
765 | /// following forms concatenated: |
766 | /// <1, 3, 5, ...> |
767 | /// <n+1, n+3, n+5, ...> |
768 | /// where n is the number of elements in the vector. |
769 | /// For example: |
770 | /// <1, 3, 5, ..., 1, 3, 5, ...> |
771 | /// <1, 3, 5, ..., n+1, n+3, n+5, ...> |
772 | /// |
773 | /// When undef's appear in the mask they are treated as if they were whatever |
774 | /// value is necessary in order to fit the above forms. |
775 | static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask, |
776 | MVT VT, SDValue V1, SDValue V2, |
777 | SelectionDAG &DAG) { |
778 | |
779 | const auto &Begin = Mask.begin(); |
780 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
781 | const auto &End = Mask.end(); |
782 | SDValue OriV1 = V1, OriV2 = V2; |
783 | |
784 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
785 | V1 = OriV1; |
786 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
787 | V1 = OriV2; |
788 | else |
789 | return SDValue(); |
790 | |
791 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
792 | V2 = OriV1; |
793 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
794 | V2 = OriV2; |
795 | else |
796 | return SDValue(); |
797 | |
798 | return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1); |
799 | } |
800 | |
801 | /// Lower VECTOR_SHUFFLE into VSHUF. |
802 | /// |
803 | /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and |
804 | /// adding it as an operand to the resulting VSHUF. |
805 | static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask, |
806 | MVT VT, SDValue V1, SDValue V2, |
807 | SelectionDAG &DAG) { |
808 | |
809 | SmallVector<SDValue, 16> Ops; |
810 | for (auto M : Mask) |
811 | Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64)); |
812 | |
813 | EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); |
814 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops); |
815 | |
816 | // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. |
817 | // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> |
818 | // VSHF concatenates the vectors in a bitwise fashion: |
819 | // <0b00, 0b01> + <0b10, 0b11> -> |
820 | // 0b0100 + 0b1110 -> 0b01001110 |
821 | // <0b10, 0b11, 0b00, 0b01> |
822 | // We must therefore swap the operands to get the correct result. |
823 | return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1); |
824 | } |
825 | |
826 | /// Dispatching routine to lower various 128-bit LoongArch vector shuffles. |
827 | /// |
828 | /// This routine breaks down the specific type of 128-bit shuffle and |
829 | /// dispatches to the lowering routines accordingly. |
830 | static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
831 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
832 | assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || |
833 | VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || |
834 | VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && |
835 | "Vector type is unsupported for lsx!" ); |
836 | assert(V1.getSimpleValueType() == V2.getSimpleValueType() && |
837 | "Two operands have different types!" ); |
838 | assert(VT.getVectorNumElements() == Mask.size() && |
839 | "Unexpected mask size for shuffle!" ); |
840 | assert(Mask.size() % 2 == 0 && "Expected even mask size." ); |
841 | |
842 | SDValue Result; |
843 | // TODO: Add more comparison patterns. |
844 | if (V2.isUndef()) { |
845 | if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) |
846 | return Result; |
847 | if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) |
848 | return Result; |
849 | |
850 | // TODO: This comment may be enabled in the future to better match the |
851 | // pattern for instruction selection. |
852 | /* V2 = V1; */ |
853 | } |
854 | |
855 | // It is recommended not to change the pattern comparison order for better |
856 | // performance. |
857 | if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) |
858 | return Result; |
859 | if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) |
860 | return Result; |
861 | if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) |
862 | return Result; |
863 | if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) |
864 | return Result; |
865 | if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) |
866 | return Result; |
867 | if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) |
868 | return Result; |
869 | if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) |
870 | return Result; |
871 | |
872 | return SDValue(); |
873 | } |
874 | |
875 | /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). |
876 | /// |
877 | /// It is a XVREPLVEI when the mask is: |
878 | /// <x, x, x, ..., x+n, x+n, x+n, ...> |
879 | /// where the number of x is equal to n and n is half the length of vector. |
880 | /// |
881 | /// When undef's appear in the mask they are treated as if they were whatever |
882 | /// value is necessary in order to fit the above form. |
883 | static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, |
884 | ArrayRef<int> Mask, MVT VT, |
885 | SDValue V1, SDValue V2, |
886 | SelectionDAG &DAG) { |
887 | int SplatIndex = -1; |
888 | for (const auto &M : Mask) { |
889 | if (M != -1) { |
890 | SplatIndex = M; |
891 | break; |
892 | } |
893 | } |
894 | |
895 | if (SplatIndex == -1) |
896 | return DAG.getUNDEF(VT); |
897 | |
898 | const auto &Begin = Mask.begin(); |
899 | const auto &End = Mask.end(); |
900 | unsigned HalfSize = Mask.size() / 2; |
901 | |
902 | assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index" ); |
903 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) && |
904 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize, |
905 | ExpectedIndexStride: 0)) { |
906 | APInt Imm(64, SplatIndex); |
907 | return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1, |
908 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
909 | } |
910 | |
911 | return SDValue(); |
912 | } |
913 | |
914 | /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). |
915 | static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, |
916 | MVT VT, SDValue V1, SDValue V2, |
917 | SelectionDAG &DAG) { |
918 | // When the size is less than or equal to 4, lower cost instructions may be |
919 | // used. |
920 | if (Mask.size() <= 4) |
921 | return SDValue(); |
922 | return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); |
923 | } |
924 | |
925 | /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). |
926 | static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask, |
927 | MVT VT, SDValue V1, SDValue V2, |
928 | SelectionDAG &DAG) { |
929 | return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); |
930 | } |
931 | |
932 | /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). |
933 | static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask, |
934 | MVT VT, SDValue V1, SDValue V2, |
935 | SelectionDAG &DAG) { |
936 | return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); |
937 | } |
938 | |
939 | /// Lower VECTOR_SHUFFLE into XVILVH (if possible). |
940 | static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask, |
941 | MVT VT, SDValue V1, SDValue V2, |
942 | SelectionDAG &DAG) { |
943 | |
944 | const auto &Begin = Mask.begin(); |
945 | const auto &End = Mask.end(); |
946 | unsigned HalfSize = Mask.size() / 2; |
947 | unsigned LeftSize = HalfSize / 2; |
948 | SDValue OriV1 = V1, OriV2 = V2; |
949 | |
950 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize, |
951 | ExpectedIndexStride: 1) && |
952 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1)) |
953 | V1 = OriV1; |
954 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, |
955 | ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) && |
956 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, |
957 | ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1)) |
958 | V1 = OriV2; |
959 | else |
960 | return SDValue(); |
961 | |
962 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize, |
963 | ExpectedIndexStride: 1) && |
964 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, |
965 | ExpectedIndexStride: 1)) |
966 | V2 = OriV1; |
967 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, |
968 | ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) && |
969 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, |
970 | ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1)) |
971 | V2 = OriV2; |
972 | else |
973 | return SDValue(); |
974 | |
975 | return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1); |
976 | } |
977 | |
978 | /// Lower VECTOR_SHUFFLE into XVILVL (if possible). |
979 | static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask, |
980 | MVT VT, SDValue V1, SDValue V2, |
981 | SelectionDAG &DAG) { |
982 | |
983 | const auto &Begin = Mask.begin(); |
984 | const auto &End = Mask.end(); |
985 | unsigned HalfSize = Mask.size() / 2; |
986 | SDValue OriV1 = V1, OriV2 = V2; |
987 | |
988 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) && |
989 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
990 | V1 = OriV1; |
991 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) && |
992 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, |
993 | ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
994 | V1 = OriV2; |
995 | else |
996 | return SDValue(); |
997 | |
998 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) && |
999 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
1000 | V2 = OriV1; |
1001 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), |
1002 | ExpectedIndexStride: 1) && |
1003 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, |
1004 | ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
1005 | V2 = OriV2; |
1006 | else |
1007 | return SDValue(); |
1008 | |
1009 | return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1); |
1010 | } |
1011 | |
1012 | /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). |
1013 | static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask, |
1014 | MVT VT, SDValue V1, SDValue V2, |
1015 | SelectionDAG &DAG) { |
1016 | |
1017 | const auto &Begin = Mask.begin(); |
1018 | const auto &LeftMid = Mask.begin() + Mask.size() / 4; |
1019 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1020 | const auto &RightMid = Mask.end() - Mask.size() / 4; |
1021 | const auto &End = Mask.end(); |
1022 | unsigned HalfSize = Mask.size() / 2; |
1023 | SDValue OriV1 = V1, OriV2 = V2; |
1024 | |
1025 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) && |
1026 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2)) |
1027 | V1 = OriV1; |
1028 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) && |
1029 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2)) |
1030 | V1 = OriV2; |
1031 | else |
1032 | return SDValue(); |
1033 | |
1034 | if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) && |
1035 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2)) |
1036 | V2 = OriV1; |
1037 | else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) && |
1038 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2)) |
1039 | V2 = OriV2; |
1040 | |
1041 | else |
1042 | return SDValue(); |
1043 | |
1044 | return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1); |
1045 | } |
1046 | |
1047 | /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). |
1048 | static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask, |
1049 | MVT VT, SDValue V1, SDValue V2, |
1050 | SelectionDAG &DAG) { |
1051 | |
1052 | const auto &Begin = Mask.begin(); |
1053 | const auto &LeftMid = Mask.begin() + Mask.size() / 4; |
1054 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1055 | const auto &RightMid = Mask.end() - Mask.size() / 4; |
1056 | const auto &End = Mask.end(); |
1057 | unsigned HalfSize = Mask.size() / 2; |
1058 | SDValue OriV1 = V1, OriV2 = V2; |
1059 | |
1060 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) && |
1061 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2)) |
1062 | V1 = OriV1; |
1063 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) && |
1064 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1, |
1065 | ExpectedIndexStride: 2)) |
1066 | V1 = OriV2; |
1067 | else |
1068 | return SDValue(); |
1069 | |
1070 | if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) && |
1071 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2)) |
1072 | V2 = OriV1; |
1073 | else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) && |
1074 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1, |
1075 | ExpectedIndexStride: 2)) |
1076 | V2 = OriV2; |
1077 | else |
1078 | return SDValue(); |
1079 | |
1080 | return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1); |
1081 | } |
1082 | |
1083 | /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). |
1084 | static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask, |
1085 | MVT VT, SDValue V1, SDValue V2, |
1086 | SelectionDAG &DAG) { |
1087 | |
1088 | int MaskSize = Mask.size(); |
1089 | int HalfSize = Mask.size() / 2; |
1090 | const auto &Begin = Mask.begin(); |
1091 | const auto &Mid = Mask.begin() + HalfSize; |
1092 | const auto &End = Mask.end(); |
1093 | |
1094 | // VECTOR_SHUFFLE concatenates the vectors: |
1095 | // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> |
1096 | // shuffling -> |
1097 | // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> |
1098 | // |
1099 | // XVSHUF concatenates the vectors: |
1100 | // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7> |
1101 | // shuffling -> |
1102 | // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7> |
1103 | SmallVector<SDValue, 8> MaskAlloc; |
1104 | for (auto it = Begin; it < Mid; it++) { |
1105 | if (*it < 0) // UNDEF |
1106 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64)); |
1107 | else if ((*it >= 0 && *it < HalfSize) || |
1108 | (*it >= MaskSize && *it <= MaskSize + HalfSize)) { |
1109 | int M = *it < HalfSize ? *it : *it - HalfSize; |
1110 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64)); |
1111 | } else |
1112 | return SDValue(); |
1113 | } |
1114 | assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!" ); |
1115 | |
1116 | for (auto it = Mid; it < End; it++) { |
1117 | if (*it < 0) // UNDEF |
1118 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64)); |
1119 | else if ((*it >= HalfSize && *it < MaskSize) || |
1120 | (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { |
1121 | int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; |
1122 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64)); |
1123 | } else |
1124 | return SDValue(); |
1125 | } |
1126 | assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!" ); |
1127 | |
1128 | EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); |
1129 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc); |
1130 | return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1); |
1131 | } |
1132 | |
1133 | /// Shuffle vectors by lane to generate more optimized instructions. |
1134 | /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. |
1135 | /// |
1136 | /// Therefore, except for the following four cases, other cases are regarded |
1137 | /// as cross-lane shuffles, where optimization is relatively limited. |
1138 | /// |
1139 | /// - Shuffle high, low lanes of two inputs vector |
1140 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> |
1141 | /// - Shuffle low, high lanes of two inputs vector |
1142 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> |
1143 | /// - Shuffle low, low lanes of two inputs vector |
1144 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> |
1145 | /// - Shuffle high, high lanes of two inputs vector |
1146 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> |
1147 | /// |
1148 | /// The first case is the closest to LoongArch instructions and the other |
1149 | /// cases need to be converted to it for processing. |
1150 | /// |
1151 | /// This function may modify V1, V2 and Mask |
1152 | static void canonicalizeShuffleVectorByLane(const SDLoc &DL, |
1153 | MutableArrayRef<int> Mask, MVT VT, |
1154 | SDValue &V1, SDValue &V2, |
1155 | SelectionDAG &DAG) { |
1156 | |
1157 | enum HalfMaskType { HighLaneTy, LowLaneTy, None }; |
1158 | |
1159 | int MaskSize = Mask.size(); |
1160 | int HalfSize = Mask.size() / 2; |
1161 | |
1162 | HalfMaskType preMask = None, postMask = None; |
1163 | |
1164 | if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) { |
1165 | return M < 0 || (M >= 0 && M < HalfSize) || |
1166 | (M >= MaskSize && M < MaskSize + HalfSize); |
1167 | })) |
1168 | preMask = HighLaneTy; |
1169 | else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) { |
1170 | return M < 0 || (M >= HalfSize && M < MaskSize) || |
1171 | (M >= MaskSize + HalfSize && M < MaskSize * 2); |
1172 | })) |
1173 | preMask = LowLaneTy; |
1174 | |
1175 | if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) { |
1176 | return M < 0 || (M >= 0 && M < HalfSize) || |
1177 | (M >= MaskSize && M < MaskSize + HalfSize); |
1178 | })) |
1179 | postMask = HighLaneTy; |
1180 | else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) { |
1181 | return M < 0 || (M >= HalfSize && M < MaskSize) || |
1182 | (M >= MaskSize + HalfSize && M < MaskSize * 2); |
1183 | })) |
1184 | postMask = LowLaneTy; |
1185 | |
1186 | // The pre-half of mask is high lane type, and the post-half of mask |
1187 | // is low lane type, which is closest to the LoongArch instructions. |
1188 | // |
1189 | // Note: In the LoongArch architecture, the high lane of mask corresponds |
1190 | // to the lower 128-bit of vector register, and the low lane of mask |
1191 | // corresponds the higher 128-bit of vector register. |
1192 | if (preMask == HighLaneTy && postMask == LowLaneTy) { |
1193 | return; |
1194 | } |
1195 | if (preMask == LowLaneTy && postMask == HighLaneTy) { |
1196 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
1197 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
1198 | N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64)); |
1199 | V1 = DAG.getBitcast(VT, V: V1); |
1200 | |
1201 | if (!V2.isUndef()) { |
1202 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
1203 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
1204 | N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64)); |
1205 | V2 = DAG.getBitcast(VT, V: V2); |
1206 | } |
1207 | |
1208 | for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { |
1209 | *it = *it < 0 ? *it : *it - HalfSize; |
1210 | } |
1211 | for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { |
1212 | *it = *it < 0 ? *it : *it + HalfSize; |
1213 | } |
1214 | } else if (preMask == LowLaneTy && postMask == LowLaneTy) { |
1215 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
1216 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
1217 | N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64)); |
1218 | V1 = DAG.getBitcast(VT, V: V1); |
1219 | |
1220 | if (!V2.isUndef()) { |
1221 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
1222 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
1223 | N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64)); |
1224 | V2 = DAG.getBitcast(VT, V: V2); |
1225 | } |
1226 | |
1227 | for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { |
1228 | *it = *it < 0 ? *it : *it - HalfSize; |
1229 | } |
1230 | } else if (preMask == HighLaneTy && postMask == HighLaneTy) { |
1231 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
1232 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
1233 | N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64)); |
1234 | V1 = DAG.getBitcast(VT, V: V1); |
1235 | |
1236 | if (!V2.isUndef()) { |
1237 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
1238 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
1239 | N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64)); |
1240 | V2 = DAG.getBitcast(VT, V: V2); |
1241 | } |
1242 | |
1243 | for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { |
1244 | *it = *it < 0 ? *it : *it + HalfSize; |
1245 | } |
1246 | } else { // cross-lane |
1247 | return; |
1248 | } |
1249 | } |
1250 | |
1251 | /// Dispatching routine to lower various 256-bit LoongArch vector shuffles. |
1252 | /// |
1253 | /// This routine breaks down the specific type of 256-bit shuffle and |
1254 | /// dispatches to the lowering routines accordingly. |
1255 | static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
1256 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
1257 | assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || |
1258 | VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || |
1259 | VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && |
1260 | "Vector type is unsupported for lasx!" ); |
1261 | assert(V1.getSimpleValueType() == V2.getSimpleValueType() && |
1262 | "Two operands have different types!" ); |
1263 | assert(VT.getVectorNumElements() == Mask.size() && |
1264 | "Unexpected mask size for shuffle!" ); |
1265 | assert(Mask.size() % 2 == 0 && "Expected even mask size." ); |
1266 | assert(Mask.size() >= 4 && "Mask size is less than 4." ); |
1267 | |
1268 | // canonicalize non cross-lane shuffle vector |
1269 | SmallVector<int> NewMask(Mask); |
1270 | canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG); |
1271 | |
1272 | SDValue Result; |
1273 | // TODO: Add more comparison patterns. |
1274 | if (V2.isUndef()) { |
1275 | if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1276 | return Result; |
1277 | if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1278 | return Result; |
1279 | |
1280 | // TODO: This comment may be enabled in the future to better match the |
1281 | // pattern for instruction selection. |
1282 | /* V2 = V1; */ |
1283 | } |
1284 | |
1285 | // It is recommended not to change the pattern comparison order for better |
1286 | // performance. |
1287 | if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1288 | return Result; |
1289 | if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1290 | return Result; |
1291 | if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1292 | return Result; |
1293 | if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1294 | return Result; |
1295 | if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1296 | return Result; |
1297 | if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1298 | return Result; |
1299 | if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1300 | return Result; |
1301 | |
1302 | return SDValue(); |
1303 | } |
1304 | |
1305 | SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
1306 | SelectionDAG &DAG) const { |
1307 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op); |
1308 | ArrayRef<int> OrigMask = SVOp->getMask(); |
1309 | SDValue V1 = Op.getOperand(i: 0); |
1310 | SDValue V2 = Op.getOperand(i: 1); |
1311 | MVT VT = Op.getSimpleValueType(); |
1312 | int NumElements = VT.getVectorNumElements(); |
1313 | SDLoc DL(Op); |
1314 | |
1315 | bool V1IsUndef = V1.isUndef(); |
1316 | bool V2IsUndef = V2.isUndef(); |
1317 | if (V1IsUndef && V2IsUndef) |
1318 | return DAG.getUNDEF(VT); |
1319 | |
1320 | // When we create a shuffle node we put the UNDEF node to second operand, |
1321 | // but in some cases the first operand may be transformed to UNDEF. |
1322 | // In this case we should just commute the node. |
1323 | if (V1IsUndef) |
1324 | return DAG.getCommutedVectorShuffle(SV: *SVOp); |
1325 | |
1326 | // Check for non-undef masks pointing at an undef vector and make the masks |
1327 | // undef as well. This makes it easier to match the shuffle based solely on |
1328 | // the mask. |
1329 | if (V2IsUndef && |
1330 | any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) { |
1331 | SmallVector<int, 8> NewMask(OrigMask); |
1332 | for (int &M : NewMask) |
1333 | if (M >= NumElements) |
1334 | M = -1; |
1335 | return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask); |
1336 | } |
1337 | |
1338 | // Check for illegal shuffle mask element index values. |
1339 | int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); |
1340 | (void)MaskUpperLimit; |
1341 | assert(llvm::all_of(OrigMask, |
1342 | [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && |
1343 | "Out of bounds shuffle index" ); |
1344 | |
1345 | // For each vector width, delegate to a specialized lowering routine. |
1346 | if (VT.is128BitVector()) |
1347 | return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG); |
1348 | |
1349 | if (VT.is256BitVector()) |
1350 | return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG); |
1351 | |
1352 | return SDValue(); |
1353 | } |
1354 | |
1355 | static bool isConstantOrUndef(const SDValue Op) { |
1356 | if (Op->isUndef()) |
1357 | return true; |
1358 | if (isa<ConstantSDNode>(Val: Op)) |
1359 | return true; |
1360 | if (isa<ConstantFPSDNode>(Val: Op)) |
1361 | return true; |
1362 | return false; |
1363 | } |
1364 | |
1365 | static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { |
1366 | for (unsigned i = 0; i < Op->getNumOperands(); ++i) |
1367 | if (isConstantOrUndef(Op: Op->getOperand(Num: i))) |
1368 | return true; |
1369 | return false; |
1370 | } |
1371 | |
1372 | SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, |
1373 | SelectionDAG &DAG) const { |
1374 | BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op); |
1375 | EVT ResTy = Op->getValueType(ResNo: 0); |
1376 | SDLoc DL(Op); |
1377 | APInt SplatValue, SplatUndef; |
1378 | unsigned SplatBitSize; |
1379 | bool HasAnyUndefs; |
1380 | bool Is128Vec = ResTy.is128BitVector(); |
1381 | bool Is256Vec = ResTy.is256BitVector(); |
1382 | |
1383 | if ((!Subtarget.hasExtLSX() || !Is128Vec) && |
1384 | (!Subtarget.hasExtLASX() || !Is256Vec)) |
1385 | return SDValue(); |
1386 | |
1387 | if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
1388 | /*MinSplatBits=*/8) && |
1389 | SplatBitSize <= 64) { |
1390 | // We can only cope with 8, 16, 32, or 64-bit elements. |
1391 | if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && |
1392 | SplatBitSize != 64) |
1393 | return SDValue(); |
1394 | |
1395 | EVT ViaVecTy; |
1396 | |
1397 | switch (SplatBitSize) { |
1398 | default: |
1399 | return SDValue(); |
1400 | case 8: |
1401 | ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; |
1402 | break; |
1403 | case 16: |
1404 | ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; |
1405 | break; |
1406 | case 32: |
1407 | ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; |
1408 | break; |
1409 | case 64: |
1410 | ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; |
1411 | break; |
1412 | } |
1413 | |
1414 | // SelectionDAG::getConstant will promote SplatValue appropriately. |
1415 | SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy); |
1416 | |
1417 | // Bitcast to the type we originally wanted. |
1418 | if (ViaVecTy != ResTy) |
1419 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result); |
1420 | |
1421 | return Result; |
1422 | } |
1423 | |
1424 | if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false)) |
1425 | return Op; |
1426 | |
1427 | if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) { |
1428 | // Use INSERT_VECTOR_ELT operations rather than expand to stores. |
1429 | // The resulting code is the same length as the expansion, but it doesn't |
1430 | // use memory operations. |
1431 | EVT ResTy = Node->getValueType(ResNo: 0); |
1432 | |
1433 | assert(ResTy.isVector()); |
1434 | |
1435 | unsigned NumElts = ResTy.getVectorNumElements(); |
1436 | SDValue Vector = DAG.getUNDEF(VT: ResTy); |
1437 | for (unsigned i = 0; i < NumElts; ++i) { |
1438 | Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, |
1439 | N2: Node->getOperand(Num: i), |
1440 | N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT())); |
1441 | } |
1442 | return Vector; |
1443 | } |
1444 | |
1445 | return SDValue(); |
1446 | } |
1447 | |
1448 | SDValue |
1449 | LoongArchTargetLowering::(SDValue Op, |
1450 | SelectionDAG &DAG) const { |
1451 | EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0); |
1452 | SDValue Idx = Op->getOperand(Num: 1); |
1453 | EVT EltTy = VecTy.getVectorElementType(); |
1454 | unsigned NumElts = VecTy.getVectorNumElements(); |
1455 | |
1456 | if (isa<ConstantSDNode>(Val: Idx) && |
1457 | (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || |
1458 | EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) |
1459 | return Op; |
1460 | |
1461 | return SDValue(); |
1462 | } |
1463 | |
1464 | SDValue |
1465 | LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
1466 | SelectionDAG &DAG) const { |
1467 | if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2))) |
1468 | return Op; |
1469 | return SDValue(); |
1470 | } |
1471 | |
1472 | SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, |
1473 | SelectionDAG &DAG) const { |
1474 | SDLoc DL(Op); |
1475 | SyncScope::ID FenceSSID = |
1476 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
1477 | |
1478 | // singlethread fences only synchronize with signal handlers on the same |
1479 | // thread and thus only need to preserve instruction order, not actually |
1480 | // enforce memory ordering. |
1481 | if (FenceSSID == SyncScope::SingleThread) |
1482 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
1483 | return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0)); |
1484 | |
1485 | return Op; |
1486 | } |
1487 | |
1488 | SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, |
1489 | SelectionDAG &DAG) const { |
1490 | |
1491 | if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) { |
1492 | DAG.getContext()->emitError( |
1493 | ErrorStr: "On LA64, only 64-bit registers can be written." ); |
1494 | return Op.getOperand(i: 0); |
1495 | } |
1496 | |
1497 | if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) { |
1498 | DAG.getContext()->emitError( |
1499 | ErrorStr: "On LA32, only 32-bit registers can be written." ); |
1500 | return Op.getOperand(i: 0); |
1501 | } |
1502 | |
1503 | return Op; |
1504 | } |
1505 | |
1506 | SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, |
1507 | SelectionDAG &DAG) const { |
1508 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) { |
1509 | DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must " |
1510 | "be a constant integer" ); |
1511 | return SDValue(); |
1512 | } |
1513 | |
1514 | MachineFunction &MF = DAG.getMachineFunction(); |
1515 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
1516 | Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); |
1517 | EVT VT = Op.getValueType(); |
1518 | SDLoc DL(Op); |
1519 | SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT); |
1520 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
1521 | int GRLenInBytes = Subtarget.getGRLen() / 8; |
1522 | |
1523 | while (Depth--) { |
1524 | int Offset = -(GRLenInBytes * 2); |
1525 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, |
1526 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
1527 | FrameAddr = |
1528 | DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo()); |
1529 | } |
1530 | return FrameAddr; |
1531 | } |
1532 | |
1533 | SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, |
1534 | SelectionDAG &DAG) const { |
1535 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
1536 | return SDValue(); |
1537 | |
1538 | // Currently only support lowering return address for current frame. |
1539 | if (Op.getConstantOperandVal(i: 0) != 0) { |
1540 | DAG.getContext()->emitError( |
1541 | ErrorStr: "return address can only be determined for the current frame" ); |
1542 | return SDValue(); |
1543 | } |
1544 | |
1545 | MachineFunction &MF = DAG.getMachineFunction(); |
1546 | MF.getFrameInfo().setReturnAddressIsTaken(true); |
1547 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1548 | |
1549 | // Return the value of the return address register, marking it an implicit |
1550 | // live-in. |
1551 | Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(), |
1552 | RC: getRegClassFor(VT: GRLenVT)); |
1553 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT); |
1554 | } |
1555 | |
1556 | SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
1557 | SelectionDAG &DAG) const { |
1558 | MachineFunction &MF = DAG.getMachineFunction(); |
1559 | auto Size = Subtarget.getGRLen() / 8; |
1560 | auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false); |
1561 | return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
1562 | } |
1563 | |
1564 | SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, |
1565 | SelectionDAG &DAG) const { |
1566 | MachineFunction &MF = DAG.getMachineFunction(); |
1567 | auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); |
1568 | |
1569 | SDLoc DL(Op); |
1570 | SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), |
1571 | VT: getPointerTy(DL: MF.getDataLayout())); |
1572 | |
1573 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
1574 | // memory location argument. |
1575 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
1576 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1), |
1577 | PtrInfo: MachinePointerInfo(SV)); |
1578 | } |
1579 | |
1580 | SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, |
1581 | SelectionDAG &DAG) const { |
1582 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
1583 | !Subtarget.hasBasicD() && "unexpected target features" ); |
1584 | |
1585 | SDLoc DL(Op); |
1586 | SDValue Op0 = Op.getOperand(i: 0); |
1587 | if (Op0->getOpcode() == ISD::AND) { |
1588 | auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1)); |
1589 | if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) |
1590 | return Op; |
1591 | } |
1592 | |
1593 | if (Op0->getOpcode() == LoongArchISD::BSTRPICK && |
1594 | Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) && |
1595 | Op0.getConstantOperandVal(i: 2) == UINT64_C(0)) |
1596 | return Op; |
1597 | |
1598 | if (Op0.getOpcode() == ISD::AssertZext && |
1599 | dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32)) |
1600 | return Op; |
1601 | |
1602 | EVT OpVT = Op0.getValueType(); |
1603 | EVT RetVT = Op.getValueType(); |
1604 | RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); |
1605 | MakeLibCallOptions CallOptions; |
1606 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
1607 | SDValue Chain = SDValue(); |
1608 | SDValue Result; |
1609 | std::tie(args&: Result, args&: Chain) = |
1610 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
1611 | return Result; |
1612 | } |
1613 | |
1614 | SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, |
1615 | SelectionDAG &DAG) const { |
1616 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
1617 | !Subtarget.hasBasicD() && "unexpected target features" ); |
1618 | |
1619 | SDLoc DL(Op); |
1620 | SDValue Op0 = Op.getOperand(i: 0); |
1621 | |
1622 | if ((Op0.getOpcode() == ISD::AssertSext || |
1623 | Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && |
1624 | dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32)) |
1625 | return Op; |
1626 | |
1627 | EVT OpVT = Op0.getValueType(); |
1628 | EVT RetVT = Op.getValueType(); |
1629 | RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); |
1630 | MakeLibCallOptions CallOptions; |
1631 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
1632 | SDValue Chain = SDValue(); |
1633 | SDValue Result; |
1634 | std::tie(args&: Result, args&: Chain) = |
1635 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
1636 | return Result; |
1637 | } |
1638 | |
1639 | SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, |
1640 | SelectionDAG &DAG) const { |
1641 | |
1642 | SDLoc DL(Op); |
1643 | SDValue Op0 = Op.getOperand(i: 0); |
1644 | |
1645 | if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && |
1646 | Subtarget.is64Bit() && Subtarget.hasBasicF()) { |
1647 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
1648 | return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0); |
1649 | } |
1650 | return Op; |
1651 | } |
1652 | |
1653 | SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, |
1654 | SelectionDAG &DAG) const { |
1655 | |
1656 | SDLoc DL(Op); |
1657 | |
1658 | if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && |
1659 | !Subtarget.hasBasicD()) { |
1660 | SDValue Dst = |
1661 | DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op.getOperand(i: 0)); |
1662 | return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst); |
1663 | } |
1664 | |
1665 | EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits()); |
1666 | SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op.getOperand(i: 0)); |
1667 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc); |
1668 | } |
1669 | |
1670 | static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, |
1671 | SelectionDAG &DAG, unsigned Flags) { |
1672 | return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags); |
1673 | } |
1674 | |
1675 | static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, |
1676 | SelectionDAG &DAG, unsigned Flags) { |
1677 | return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(), |
1678 | TargetFlags: Flags); |
1679 | } |
1680 | |
1681 | static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, |
1682 | SelectionDAG &DAG, unsigned Flags) { |
1683 | return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(), |
1684 | Offset: N->getOffset(), TargetFlags: Flags); |
1685 | } |
1686 | |
1687 | static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, |
1688 | SelectionDAG &DAG, unsigned Flags) { |
1689 | return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags); |
1690 | } |
1691 | |
1692 | template <class NodeTy> |
1693 | SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
1694 | CodeModel::Model M, |
1695 | bool IsLocal) const { |
1696 | SDLoc DL(N); |
1697 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1698 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
1699 | SDValue Load; |
1700 | |
1701 | switch (M) { |
1702 | default: |
1703 | report_fatal_error(reason: "Unsupported code model" ); |
1704 | |
1705 | case CodeModel::Large: { |
1706 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
1707 | |
1708 | // This is not actually used, but is necessary for successfully matching |
1709 | // the PseudoLA_*_LARGE nodes. |
1710 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1711 | if (IsLocal) { |
1712 | // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that |
1713 | // eventually becomes the desired 5-insn code sequence. |
1714 | Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty, |
1715 | Op1: Tmp, Op2: Addr), |
1716 | 0); |
1717 | } else { |
1718 | // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that |
1719 | // eventually becomes the desired 5-insn code sequence. |
1720 | Load = SDValue( |
1721 | DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), |
1722 | 0); |
1723 | } |
1724 | break; |
1725 | } |
1726 | |
1727 | case CodeModel::Small: |
1728 | case CodeModel::Medium: |
1729 | if (IsLocal) { |
1730 | // This generates the pattern (PseudoLA_PCREL sym), which expands to |
1731 | // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). |
1732 | Load = SDValue( |
1733 | DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0); |
1734 | } else { |
1735 | // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d |
1736 | // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). |
1737 | Load = |
1738 | SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0); |
1739 | } |
1740 | } |
1741 | |
1742 | if (!IsLocal) { |
1743 | // Mark the load instruction as invariant to enable hoisting in MachineLICM. |
1744 | MachineFunction &MF = DAG.getMachineFunction(); |
1745 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
1746 | PtrInfo: MachinePointerInfo::getGOT(MF), |
1747 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
1748 | MachineMemOperand::MOInvariant, |
1749 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
1750 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
1751 | } |
1752 | |
1753 | return Load; |
1754 | } |
1755 | |
1756 | SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, |
1757 | SelectionDAG &DAG) const { |
1758 | return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG, |
1759 | M: DAG.getTarget().getCodeModel()); |
1760 | } |
1761 | |
1762 | SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, |
1763 | SelectionDAG &DAG) const { |
1764 | return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG, |
1765 | M: DAG.getTarget().getCodeModel()); |
1766 | } |
1767 | |
1768 | SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, |
1769 | SelectionDAG &DAG) const { |
1770 | return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG, |
1771 | M: DAG.getTarget().getCodeModel()); |
1772 | } |
1773 | |
1774 | SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, |
1775 | SelectionDAG &DAG) const { |
1776 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
1777 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
1778 | auto CM = DAG.getTarget().getCodeModel(); |
1779 | const GlobalValue *GV = N->getGlobal(); |
1780 | |
1781 | if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) { |
1782 | if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel()) |
1783 | CM = *GCM; |
1784 | } |
1785 | |
1786 | return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal()); |
1787 | } |
1788 | |
1789 | SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
1790 | SelectionDAG &DAG, |
1791 | unsigned Opc, bool UseGOT, |
1792 | bool Large) const { |
1793 | SDLoc DL(N); |
1794 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1795 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1796 | |
1797 | // This is not actually used, but is necessary for successfully matching the |
1798 | // PseudoLA_*_LARGE nodes. |
1799 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1800 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
1801 | SDValue Offset = Large |
1802 | ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
1803 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
1804 | if (UseGOT) { |
1805 | // Mark the load instruction as invariant to enable hoisting in MachineLICM. |
1806 | MachineFunction &MF = DAG.getMachineFunction(); |
1807 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
1808 | PtrInfo: MachinePointerInfo::getGOT(MF), |
1809 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
1810 | MachineMemOperand::MOInvariant, |
1811 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
1812 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp}); |
1813 | } |
1814 | |
1815 | // Add the thread pointer. |
1816 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset, |
1817 | N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT)); |
1818 | } |
1819 | |
1820 | SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
1821 | SelectionDAG &DAG, |
1822 | unsigned Opc, |
1823 | bool Large) const { |
1824 | SDLoc DL(N); |
1825 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1826 | IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits()); |
1827 | |
1828 | // This is not actually used, but is necessary for successfully matching the |
1829 | // PseudoLA_*_LARGE nodes. |
1830 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1831 | |
1832 | // Use a PC-relative addressing mode to access the dynamic GOT address. |
1833 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
1834 | SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
1835 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
1836 | |
1837 | // Prepare argument list to generate call. |
1838 | ArgListTy Args; |
1839 | ArgListEntry Entry; |
1840 | Entry.Node = Load; |
1841 | Entry.Ty = CallTy; |
1842 | Args.push_back(x: Entry); |
1843 | |
1844 | // Setup call to __tls_get_addr. |
1845 | TargetLowering::CallLoweringInfo CLI(DAG); |
1846 | CLI.setDebugLoc(DL) |
1847 | .setChain(DAG.getEntryNode()) |
1848 | .setLibCallee(CC: CallingConv::C, ResultType: CallTy, |
1849 | Target: DAG.getExternalSymbol(Sym: "__tls_get_addr" , VT: Ty), |
1850 | ArgsList: std::move(Args)); |
1851 | |
1852 | return LowerCallTo(CLI).first; |
1853 | } |
1854 | |
1855 | SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, |
1856 | SelectionDAG &DAG, unsigned Opc, |
1857 | bool Large) const { |
1858 | SDLoc DL(N); |
1859 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1860 | const GlobalValue *GV = N->getGlobal(); |
1861 | |
1862 | // This is not actually used, but is necessary for successfully matching the |
1863 | // PseudoLA_*_LARGE nodes. |
1864 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1865 | |
1866 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
1867 | // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym). |
1868 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
1869 | return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
1870 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
1871 | } |
1872 | |
1873 | SDValue |
1874 | LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
1875 | SelectionDAG &DAG) const { |
1876 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
1877 | CallingConv::GHC) |
1878 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
1879 | |
1880 | bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; |
1881 | assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64" ); |
1882 | |
1883 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
1884 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
1885 | |
1886 | if (DAG.getTarget().useEmulatedTLS()) |
1887 | report_fatal_error(reason: "the emulated TLS is prohibited" , |
1888 | /*GenCrashDiag=*/gen_crash_diag: false); |
1889 | |
1890 | bool IsDesc = DAG.getTarget().useTLSDESC(); |
1891 | |
1892 | switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) { |
1893 | case TLSModel::GeneralDynamic: |
1894 | // In this model, application code calls the dynamic linker function |
1895 | // __tls_get_addr to locate TLS offsets into the dynamic thread vector at |
1896 | // runtime. |
1897 | if (!IsDesc) |
1898 | return getDynamicTLSAddr(N, DAG, |
1899 | Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE |
1900 | : LoongArch::PseudoLA_TLS_GD, |
1901 | Large); |
1902 | break; |
1903 | case TLSModel::LocalDynamic: |
1904 | // Same as GeneralDynamic, except for assembly modifiers and relocation |
1905 | // records. |
1906 | if (!IsDesc) |
1907 | return getDynamicTLSAddr(N, DAG, |
1908 | Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE |
1909 | : LoongArch::PseudoLA_TLS_LD, |
1910 | Large); |
1911 | break; |
1912 | case TLSModel::InitialExec: |
1913 | // This model uses the GOT to resolve TLS offsets. |
1914 | return getStaticTLSAddr(N, DAG, |
1915 | Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE |
1916 | : LoongArch::PseudoLA_TLS_IE, |
1917 | /*UseGOT=*/true, Large); |
1918 | case TLSModel::LocalExec: |
1919 | // This model is used when static linking as the TLS offsets are resolved |
1920 | // during program linking. |
1921 | // |
1922 | // This node doesn't need an extra argument for the large code model. |
1923 | return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE, |
1924 | /*UseGOT=*/false); |
1925 | } |
1926 | |
1927 | return getTLSDescAddr(N, DAG, |
1928 | Opc: Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE |
1929 | : LoongArch::PseudoLA_TLS_DESC_PC, |
1930 | Large); |
1931 | } |
1932 | |
1933 | template <unsigned N> |
1934 | static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, |
1935 | SelectionDAG &DAG, bool IsSigned = false) { |
1936 | auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp)); |
1937 | // Check the ImmArg. |
1938 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
1939 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
1940 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + |
1941 | ": argument out of range." ); |
1942 | return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType()); |
1943 | } |
1944 | return SDValue(); |
1945 | } |
1946 | |
1947 | SDValue |
1948 | LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
1949 | SelectionDAG &DAG) const { |
1950 | SDLoc DL(Op); |
1951 | switch (Op.getConstantOperandVal(i: 0)) { |
1952 | default: |
1953 | return SDValue(); // Don't custom lower most intrinsics. |
1954 | case Intrinsic::thread_pointer: { |
1955 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
1956 | return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT); |
1957 | } |
1958 | case Intrinsic::loongarch_lsx_vpickve2gr_d: |
1959 | case Intrinsic::loongarch_lsx_vpickve2gr_du: |
1960 | case Intrinsic::loongarch_lsx_vreplvei_d: |
1961 | case Intrinsic::loongarch_lasx_xvrepl128vei_d: |
1962 | return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG); |
1963 | case Intrinsic::loongarch_lsx_vreplvei_w: |
1964 | case Intrinsic::loongarch_lasx_xvrepl128vei_w: |
1965 | case Intrinsic::loongarch_lasx_xvpickve2gr_d: |
1966 | case Intrinsic::loongarch_lasx_xvpickve2gr_du: |
1967 | case Intrinsic::loongarch_lasx_xvpickve_d: |
1968 | case Intrinsic::loongarch_lasx_xvpickve_d_f: |
1969 | return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG); |
1970 | case Intrinsic::loongarch_lasx_xvinsve0_d: |
1971 | return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG); |
1972 | case Intrinsic::loongarch_lsx_vsat_b: |
1973 | case Intrinsic::loongarch_lsx_vsat_bu: |
1974 | case Intrinsic::loongarch_lsx_vrotri_b: |
1975 | case Intrinsic::loongarch_lsx_vsllwil_h_b: |
1976 | case Intrinsic::loongarch_lsx_vsllwil_hu_bu: |
1977 | case Intrinsic::loongarch_lsx_vsrlri_b: |
1978 | case Intrinsic::loongarch_lsx_vsrari_b: |
1979 | case Intrinsic::loongarch_lsx_vreplvei_h: |
1980 | case Intrinsic::loongarch_lasx_xvsat_b: |
1981 | case Intrinsic::loongarch_lasx_xvsat_bu: |
1982 | case Intrinsic::loongarch_lasx_xvrotri_b: |
1983 | case Intrinsic::loongarch_lasx_xvsllwil_h_b: |
1984 | case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: |
1985 | case Intrinsic::loongarch_lasx_xvsrlri_b: |
1986 | case Intrinsic::loongarch_lasx_xvsrari_b: |
1987 | case Intrinsic::loongarch_lasx_xvrepl128vei_h: |
1988 | case Intrinsic::loongarch_lasx_xvpickve_w: |
1989 | case Intrinsic::loongarch_lasx_xvpickve_w_f: |
1990 | return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG); |
1991 | case Intrinsic::loongarch_lasx_xvinsve0_w: |
1992 | return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG); |
1993 | case Intrinsic::loongarch_lsx_vsat_h: |
1994 | case Intrinsic::loongarch_lsx_vsat_hu: |
1995 | case Intrinsic::loongarch_lsx_vrotri_h: |
1996 | case Intrinsic::loongarch_lsx_vsllwil_w_h: |
1997 | case Intrinsic::loongarch_lsx_vsllwil_wu_hu: |
1998 | case Intrinsic::loongarch_lsx_vsrlri_h: |
1999 | case Intrinsic::loongarch_lsx_vsrari_h: |
2000 | case Intrinsic::loongarch_lsx_vreplvei_b: |
2001 | case Intrinsic::loongarch_lasx_xvsat_h: |
2002 | case Intrinsic::loongarch_lasx_xvsat_hu: |
2003 | case Intrinsic::loongarch_lasx_xvrotri_h: |
2004 | case Intrinsic::loongarch_lasx_xvsllwil_w_h: |
2005 | case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: |
2006 | case Intrinsic::loongarch_lasx_xvsrlri_h: |
2007 | case Intrinsic::loongarch_lasx_xvsrari_h: |
2008 | case Intrinsic::loongarch_lasx_xvrepl128vei_b: |
2009 | return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG); |
2010 | case Intrinsic::loongarch_lsx_vsrlni_b_h: |
2011 | case Intrinsic::loongarch_lsx_vsrani_b_h: |
2012 | case Intrinsic::loongarch_lsx_vsrlrni_b_h: |
2013 | case Intrinsic::loongarch_lsx_vsrarni_b_h: |
2014 | case Intrinsic::loongarch_lsx_vssrlni_b_h: |
2015 | case Intrinsic::loongarch_lsx_vssrani_b_h: |
2016 | case Intrinsic::loongarch_lsx_vssrlni_bu_h: |
2017 | case Intrinsic::loongarch_lsx_vssrani_bu_h: |
2018 | case Intrinsic::loongarch_lsx_vssrlrni_b_h: |
2019 | case Intrinsic::loongarch_lsx_vssrarni_b_h: |
2020 | case Intrinsic::loongarch_lsx_vssrlrni_bu_h: |
2021 | case Intrinsic::loongarch_lsx_vssrarni_bu_h: |
2022 | case Intrinsic::loongarch_lasx_xvsrlni_b_h: |
2023 | case Intrinsic::loongarch_lasx_xvsrani_b_h: |
2024 | case Intrinsic::loongarch_lasx_xvsrlrni_b_h: |
2025 | case Intrinsic::loongarch_lasx_xvsrarni_b_h: |
2026 | case Intrinsic::loongarch_lasx_xvssrlni_b_h: |
2027 | case Intrinsic::loongarch_lasx_xvssrani_b_h: |
2028 | case Intrinsic::loongarch_lasx_xvssrlni_bu_h: |
2029 | case Intrinsic::loongarch_lasx_xvssrani_bu_h: |
2030 | case Intrinsic::loongarch_lasx_xvssrlrni_b_h: |
2031 | case Intrinsic::loongarch_lasx_xvssrarni_b_h: |
2032 | case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: |
2033 | case Intrinsic::loongarch_lasx_xvssrarni_bu_h: |
2034 | return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG); |
2035 | case Intrinsic::loongarch_lsx_vsat_w: |
2036 | case Intrinsic::loongarch_lsx_vsat_wu: |
2037 | case Intrinsic::loongarch_lsx_vrotri_w: |
2038 | case Intrinsic::loongarch_lsx_vsllwil_d_w: |
2039 | case Intrinsic::loongarch_lsx_vsllwil_du_wu: |
2040 | case Intrinsic::loongarch_lsx_vsrlri_w: |
2041 | case Intrinsic::loongarch_lsx_vsrari_w: |
2042 | case Intrinsic::loongarch_lsx_vslei_bu: |
2043 | case Intrinsic::loongarch_lsx_vslei_hu: |
2044 | case Intrinsic::loongarch_lsx_vslei_wu: |
2045 | case Intrinsic::loongarch_lsx_vslei_du: |
2046 | case Intrinsic::loongarch_lsx_vslti_bu: |
2047 | case Intrinsic::loongarch_lsx_vslti_hu: |
2048 | case Intrinsic::loongarch_lsx_vslti_wu: |
2049 | case Intrinsic::loongarch_lsx_vslti_du: |
2050 | case Intrinsic::loongarch_lsx_vbsll_v: |
2051 | case Intrinsic::loongarch_lsx_vbsrl_v: |
2052 | case Intrinsic::loongarch_lasx_xvsat_w: |
2053 | case Intrinsic::loongarch_lasx_xvsat_wu: |
2054 | case Intrinsic::loongarch_lasx_xvrotri_w: |
2055 | case Intrinsic::loongarch_lasx_xvsllwil_d_w: |
2056 | case Intrinsic::loongarch_lasx_xvsllwil_du_wu: |
2057 | case Intrinsic::loongarch_lasx_xvsrlri_w: |
2058 | case Intrinsic::loongarch_lasx_xvsrari_w: |
2059 | case Intrinsic::loongarch_lasx_xvslei_bu: |
2060 | case Intrinsic::loongarch_lasx_xvslei_hu: |
2061 | case Intrinsic::loongarch_lasx_xvslei_wu: |
2062 | case Intrinsic::loongarch_lasx_xvslei_du: |
2063 | case Intrinsic::loongarch_lasx_xvslti_bu: |
2064 | case Intrinsic::loongarch_lasx_xvslti_hu: |
2065 | case Intrinsic::loongarch_lasx_xvslti_wu: |
2066 | case Intrinsic::loongarch_lasx_xvslti_du: |
2067 | case Intrinsic::loongarch_lasx_xvbsll_v: |
2068 | case Intrinsic::loongarch_lasx_xvbsrl_v: |
2069 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG); |
2070 | case Intrinsic::loongarch_lsx_vseqi_b: |
2071 | case Intrinsic::loongarch_lsx_vseqi_h: |
2072 | case Intrinsic::loongarch_lsx_vseqi_w: |
2073 | case Intrinsic::loongarch_lsx_vseqi_d: |
2074 | case Intrinsic::loongarch_lsx_vslei_b: |
2075 | case Intrinsic::loongarch_lsx_vslei_h: |
2076 | case Intrinsic::loongarch_lsx_vslei_w: |
2077 | case Intrinsic::loongarch_lsx_vslei_d: |
2078 | case Intrinsic::loongarch_lsx_vslti_b: |
2079 | case Intrinsic::loongarch_lsx_vslti_h: |
2080 | case Intrinsic::loongarch_lsx_vslti_w: |
2081 | case Intrinsic::loongarch_lsx_vslti_d: |
2082 | case Intrinsic::loongarch_lasx_xvseqi_b: |
2083 | case Intrinsic::loongarch_lasx_xvseqi_h: |
2084 | case Intrinsic::loongarch_lasx_xvseqi_w: |
2085 | case Intrinsic::loongarch_lasx_xvseqi_d: |
2086 | case Intrinsic::loongarch_lasx_xvslei_b: |
2087 | case Intrinsic::loongarch_lasx_xvslei_h: |
2088 | case Intrinsic::loongarch_lasx_xvslei_w: |
2089 | case Intrinsic::loongarch_lasx_xvslei_d: |
2090 | case Intrinsic::loongarch_lasx_xvslti_b: |
2091 | case Intrinsic::loongarch_lasx_xvslti_h: |
2092 | case Intrinsic::loongarch_lasx_xvslti_w: |
2093 | case Intrinsic::loongarch_lasx_xvslti_d: |
2094 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true); |
2095 | case Intrinsic::loongarch_lsx_vsrlni_h_w: |
2096 | case Intrinsic::loongarch_lsx_vsrani_h_w: |
2097 | case Intrinsic::loongarch_lsx_vsrlrni_h_w: |
2098 | case Intrinsic::loongarch_lsx_vsrarni_h_w: |
2099 | case Intrinsic::loongarch_lsx_vssrlni_h_w: |
2100 | case Intrinsic::loongarch_lsx_vssrani_h_w: |
2101 | case Intrinsic::loongarch_lsx_vssrlni_hu_w: |
2102 | case Intrinsic::loongarch_lsx_vssrani_hu_w: |
2103 | case Intrinsic::loongarch_lsx_vssrlrni_h_w: |
2104 | case Intrinsic::loongarch_lsx_vssrarni_h_w: |
2105 | case Intrinsic::loongarch_lsx_vssrlrni_hu_w: |
2106 | case Intrinsic::loongarch_lsx_vssrarni_hu_w: |
2107 | case Intrinsic::loongarch_lsx_vfrstpi_b: |
2108 | case Intrinsic::loongarch_lsx_vfrstpi_h: |
2109 | case Intrinsic::loongarch_lasx_xvsrlni_h_w: |
2110 | case Intrinsic::loongarch_lasx_xvsrani_h_w: |
2111 | case Intrinsic::loongarch_lasx_xvsrlrni_h_w: |
2112 | case Intrinsic::loongarch_lasx_xvsrarni_h_w: |
2113 | case Intrinsic::loongarch_lasx_xvssrlni_h_w: |
2114 | case Intrinsic::loongarch_lasx_xvssrani_h_w: |
2115 | case Intrinsic::loongarch_lasx_xvssrlni_hu_w: |
2116 | case Intrinsic::loongarch_lasx_xvssrani_hu_w: |
2117 | case Intrinsic::loongarch_lasx_xvssrlrni_h_w: |
2118 | case Intrinsic::loongarch_lasx_xvssrarni_h_w: |
2119 | case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: |
2120 | case Intrinsic::loongarch_lasx_xvssrarni_hu_w: |
2121 | case Intrinsic::loongarch_lasx_xvfrstpi_b: |
2122 | case Intrinsic::loongarch_lasx_xvfrstpi_h: |
2123 | return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG); |
2124 | case Intrinsic::loongarch_lsx_vsat_d: |
2125 | case Intrinsic::loongarch_lsx_vsat_du: |
2126 | case Intrinsic::loongarch_lsx_vrotri_d: |
2127 | case Intrinsic::loongarch_lsx_vsrlri_d: |
2128 | case Intrinsic::loongarch_lsx_vsrari_d: |
2129 | case Intrinsic::loongarch_lasx_xvsat_d: |
2130 | case Intrinsic::loongarch_lasx_xvsat_du: |
2131 | case Intrinsic::loongarch_lasx_xvrotri_d: |
2132 | case Intrinsic::loongarch_lasx_xvsrlri_d: |
2133 | case Intrinsic::loongarch_lasx_xvsrari_d: |
2134 | return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG); |
2135 | case Intrinsic::loongarch_lsx_vsrlni_w_d: |
2136 | case Intrinsic::loongarch_lsx_vsrani_w_d: |
2137 | case Intrinsic::loongarch_lsx_vsrlrni_w_d: |
2138 | case Intrinsic::loongarch_lsx_vsrarni_w_d: |
2139 | case Intrinsic::loongarch_lsx_vssrlni_w_d: |
2140 | case Intrinsic::loongarch_lsx_vssrani_w_d: |
2141 | case Intrinsic::loongarch_lsx_vssrlni_wu_d: |
2142 | case Intrinsic::loongarch_lsx_vssrani_wu_d: |
2143 | case Intrinsic::loongarch_lsx_vssrlrni_w_d: |
2144 | case Intrinsic::loongarch_lsx_vssrarni_w_d: |
2145 | case Intrinsic::loongarch_lsx_vssrlrni_wu_d: |
2146 | case Intrinsic::loongarch_lsx_vssrarni_wu_d: |
2147 | case Intrinsic::loongarch_lasx_xvsrlni_w_d: |
2148 | case Intrinsic::loongarch_lasx_xvsrani_w_d: |
2149 | case Intrinsic::loongarch_lasx_xvsrlrni_w_d: |
2150 | case Intrinsic::loongarch_lasx_xvsrarni_w_d: |
2151 | case Intrinsic::loongarch_lasx_xvssrlni_w_d: |
2152 | case Intrinsic::loongarch_lasx_xvssrani_w_d: |
2153 | case Intrinsic::loongarch_lasx_xvssrlni_wu_d: |
2154 | case Intrinsic::loongarch_lasx_xvssrani_wu_d: |
2155 | case Intrinsic::loongarch_lasx_xvssrlrni_w_d: |
2156 | case Intrinsic::loongarch_lasx_xvssrarni_w_d: |
2157 | case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: |
2158 | case Intrinsic::loongarch_lasx_xvssrarni_wu_d: |
2159 | return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG); |
2160 | case Intrinsic::loongarch_lsx_vsrlni_d_q: |
2161 | case Intrinsic::loongarch_lsx_vsrani_d_q: |
2162 | case Intrinsic::loongarch_lsx_vsrlrni_d_q: |
2163 | case Intrinsic::loongarch_lsx_vsrarni_d_q: |
2164 | case Intrinsic::loongarch_lsx_vssrlni_d_q: |
2165 | case Intrinsic::loongarch_lsx_vssrani_d_q: |
2166 | case Intrinsic::loongarch_lsx_vssrlni_du_q: |
2167 | case Intrinsic::loongarch_lsx_vssrani_du_q: |
2168 | case Intrinsic::loongarch_lsx_vssrlrni_d_q: |
2169 | case Intrinsic::loongarch_lsx_vssrarni_d_q: |
2170 | case Intrinsic::loongarch_lsx_vssrlrni_du_q: |
2171 | case Intrinsic::loongarch_lsx_vssrarni_du_q: |
2172 | case Intrinsic::loongarch_lasx_xvsrlni_d_q: |
2173 | case Intrinsic::loongarch_lasx_xvsrani_d_q: |
2174 | case Intrinsic::loongarch_lasx_xvsrlrni_d_q: |
2175 | case Intrinsic::loongarch_lasx_xvsrarni_d_q: |
2176 | case Intrinsic::loongarch_lasx_xvssrlni_d_q: |
2177 | case Intrinsic::loongarch_lasx_xvssrani_d_q: |
2178 | case Intrinsic::loongarch_lasx_xvssrlni_du_q: |
2179 | case Intrinsic::loongarch_lasx_xvssrani_du_q: |
2180 | case Intrinsic::loongarch_lasx_xvssrlrni_d_q: |
2181 | case Intrinsic::loongarch_lasx_xvssrarni_d_q: |
2182 | case Intrinsic::loongarch_lasx_xvssrlrni_du_q: |
2183 | case Intrinsic::loongarch_lasx_xvssrarni_du_q: |
2184 | return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG); |
2185 | case Intrinsic::loongarch_lsx_vnori_b: |
2186 | case Intrinsic::loongarch_lsx_vshuf4i_b: |
2187 | case Intrinsic::loongarch_lsx_vshuf4i_h: |
2188 | case Intrinsic::loongarch_lsx_vshuf4i_w: |
2189 | case Intrinsic::loongarch_lasx_xvnori_b: |
2190 | case Intrinsic::loongarch_lasx_xvshuf4i_b: |
2191 | case Intrinsic::loongarch_lasx_xvshuf4i_h: |
2192 | case Intrinsic::loongarch_lasx_xvshuf4i_w: |
2193 | case Intrinsic::loongarch_lasx_xvpermi_d: |
2194 | return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG); |
2195 | case Intrinsic::loongarch_lsx_vshuf4i_d: |
2196 | case Intrinsic::loongarch_lsx_vpermi_w: |
2197 | case Intrinsic::loongarch_lsx_vbitseli_b: |
2198 | case Intrinsic::loongarch_lsx_vextrins_b: |
2199 | case Intrinsic::loongarch_lsx_vextrins_h: |
2200 | case Intrinsic::loongarch_lsx_vextrins_w: |
2201 | case Intrinsic::loongarch_lsx_vextrins_d: |
2202 | case Intrinsic::loongarch_lasx_xvshuf4i_d: |
2203 | case Intrinsic::loongarch_lasx_xvpermi_w: |
2204 | case Intrinsic::loongarch_lasx_xvpermi_q: |
2205 | case Intrinsic::loongarch_lasx_xvbitseli_b: |
2206 | case Intrinsic::loongarch_lasx_xvextrins_b: |
2207 | case Intrinsic::loongarch_lasx_xvextrins_h: |
2208 | case Intrinsic::loongarch_lasx_xvextrins_w: |
2209 | case Intrinsic::loongarch_lasx_xvextrins_d: |
2210 | return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG); |
2211 | case Intrinsic::loongarch_lsx_vrepli_b: |
2212 | case Intrinsic::loongarch_lsx_vrepli_h: |
2213 | case Intrinsic::loongarch_lsx_vrepli_w: |
2214 | case Intrinsic::loongarch_lsx_vrepli_d: |
2215 | case Intrinsic::loongarch_lasx_xvrepli_b: |
2216 | case Intrinsic::loongarch_lasx_xvrepli_h: |
2217 | case Intrinsic::loongarch_lasx_xvrepli_w: |
2218 | case Intrinsic::loongarch_lasx_xvrepli_d: |
2219 | return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
2220 | case Intrinsic::loongarch_lsx_vldi: |
2221 | case Intrinsic::loongarch_lasx_xvldi: |
2222 | return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
2223 | } |
2224 | } |
2225 | |
2226 | // Helper function that emits error message for intrinsics with chain and return |
2227 | // merge values of a UNDEF and the chain. |
2228 | static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, |
2229 | StringRef ErrorMsg, |
2230 | SelectionDAG &DAG) { |
2231 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
2232 | return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)}, |
2233 | dl: SDLoc(Op)); |
2234 | } |
2235 | |
2236 | SDValue |
2237 | LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
2238 | SelectionDAG &DAG) const { |
2239 | SDLoc DL(Op); |
2240 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2241 | EVT VT = Op.getValueType(); |
2242 | SDValue Chain = Op.getOperand(i: 0); |
2243 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2244 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
2245 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
2246 | |
2247 | switch (Op.getConstantOperandVal(i: 1)) { |
2248 | default: |
2249 | return Op; |
2250 | case Intrinsic::loongarch_crc_w_b_w: |
2251 | case Intrinsic::loongarch_crc_w_h_w: |
2252 | case Intrinsic::loongarch_crc_w_w_w: |
2253 | case Intrinsic::loongarch_crc_w_d_w: |
2254 | case Intrinsic::loongarch_crcc_w_b_w: |
2255 | case Intrinsic::loongarch_crcc_w_h_w: |
2256 | case Intrinsic::loongarch_crcc_w_w_w: |
2257 | case Intrinsic::loongarch_crcc_w_d_w: |
2258 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
2259 | case Intrinsic::loongarch_csrrd_w: |
2260 | case Intrinsic::loongarch_csrrd_d: { |
2261 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
2262 | return !isUInt<14>(x: Imm) |
2263 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2264 | : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other}, |
2265 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2266 | } |
2267 | case Intrinsic::loongarch_csrwr_w: |
2268 | case Intrinsic::loongarch_csrwr_d: { |
2269 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
2270 | return !isUInt<14>(x: Imm) |
2271 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2272 | : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other}, |
2273 | Ops: {Chain, Op.getOperand(i: 2), |
2274 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2275 | } |
2276 | case Intrinsic::loongarch_csrxchg_w: |
2277 | case Intrinsic::loongarch_csrxchg_d: { |
2278 | unsigned Imm = Op.getConstantOperandVal(i: 4); |
2279 | return !isUInt<14>(x: Imm) |
2280 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2281 | : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other}, |
2282 | Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3), |
2283 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2284 | } |
2285 | case Intrinsic::loongarch_iocsrrd_d: { |
2286 | return DAG.getNode( |
2287 | Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other}, |
2288 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))}); |
2289 | } |
2290 | #define IOCSRRD_CASE(NAME, NODE) \ |
2291 | case Intrinsic::loongarch_##NAME: { \ |
2292 | return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ |
2293 | {Chain, Op.getOperand(2)}); \ |
2294 | } |
2295 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
2296 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
2297 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
2298 | #undef IOCSRRD_CASE |
2299 | case Intrinsic::loongarch_cpucfg: { |
2300 | return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other}, |
2301 | Ops: {Chain, Op.getOperand(i: 2)}); |
2302 | } |
2303 | case Intrinsic::loongarch_lddir_d: { |
2304 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
2305 | return !isUInt<8>(x: Imm) |
2306 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2307 | : Op; |
2308 | } |
2309 | case Intrinsic::loongarch_movfcsr2gr: { |
2310 | if (!Subtarget.hasBasicF()) |
2311 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
2312 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
2313 | return !isUInt<2>(x: Imm) |
2314 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2315 | : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other}, |
2316 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2317 | } |
2318 | case Intrinsic::loongarch_lsx_vld: |
2319 | case Intrinsic::loongarch_lsx_vldrepl_b: |
2320 | case Intrinsic::loongarch_lasx_xvld: |
2321 | case Intrinsic::loongarch_lasx_xvldrepl_b: |
2322 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2323 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2324 | : SDValue(); |
2325 | case Intrinsic::loongarch_lsx_vldrepl_h: |
2326 | case Intrinsic::loongarch_lasx_xvldrepl_h: |
2327 | return !isShiftedInt<11, 1>( |
2328 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2329 | ? emitIntrinsicWithChainErrorMessage( |
2330 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
2331 | : SDValue(); |
2332 | case Intrinsic::loongarch_lsx_vldrepl_w: |
2333 | case Intrinsic::loongarch_lasx_xvldrepl_w: |
2334 | return !isShiftedInt<10, 2>( |
2335 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2336 | ? emitIntrinsicWithChainErrorMessage( |
2337 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
2338 | : SDValue(); |
2339 | case Intrinsic::loongarch_lsx_vldrepl_d: |
2340 | case Intrinsic::loongarch_lasx_xvldrepl_d: |
2341 | return !isShiftedInt<9, 3>( |
2342 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2343 | ? emitIntrinsicWithChainErrorMessage( |
2344 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
2345 | : SDValue(); |
2346 | } |
2347 | } |
2348 | |
2349 | // Helper function that emits error message for intrinsics with void return |
2350 | // value and return the chain. |
2351 | static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, |
2352 | SelectionDAG &DAG) { |
2353 | |
2354 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
2355 | return Op.getOperand(i: 0); |
2356 | } |
2357 | |
2358 | SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, |
2359 | SelectionDAG &DAG) const { |
2360 | SDLoc DL(Op); |
2361 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2362 | SDValue Chain = Op.getOperand(i: 0); |
2363 | uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1); |
2364 | SDValue Op2 = Op.getOperand(i: 2); |
2365 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2366 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
2367 | const StringRef ErrorMsgReqLA32 = "requires loongarch32" ; |
2368 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
2369 | |
2370 | switch (IntrinsicEnum) { |
2371 | default: |
2372 | // TODO: Add more Intrinsics. |
2373 | return SDValue(); |
2374 | case Intrinsic::loongarch_cacop_d: |
2375 | case Intrinsic::loongarch_cacop_w: { |
2376 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) |
2377 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
2378 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) |
2379 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG); |
2380 | // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) |
2381 | unsigned Imm1 = Op2->getAsZExtVal(); |
2382 | int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue(); |
2383 | if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2)) |
2384 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG); |
2385 | return Op; |
2386 | } |
2387 | case Intrinsic::loongarch_dbar: { |
2388 | unsigned Imm = Op2->getAsZExtVal(); |
2389 | return !isUInt<15>(x: Imm) |
2390 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2391 | : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain, |
2392 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2393 | } |
2394 | case Intrinsic::loongarch_ibar: { |
2395 | unsigned Imm = Op2->getAsZExtVal(); |
2396 | return !isUInt<15>(x: Imm) |
2397 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2398 | : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain, |
2399 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2400 | } |
2401 | case Intrinsic::loongarch_break: { |
2402 | unsigned Imm = Op2->getAsZExtVal(); |
2403 | return !isUInt<15>(x: Imm) |
2404 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2405 | : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain, |
2406 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2407 | } |
2408 | case Intrinsic::loongarch_movgr2fcsr: { |
2409 | if (!Subtarget.hasBasicF()) |
2410 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
2411 | unsigned Imm = Op2->getAsZExtVal(); |
2412 | return !isUInt<2>(x: Imm) |
2413 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2414 | : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain, |
2415 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT), |
2416 | N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, |
2417 | Operand: Op.getOperand(i: 3))); |
2418 | } |
2419 | case Intrinsic::loongarch_syscall: { |
2420 | unsigned Imm = Op2->getAsZExtVal(); |
2421 | return !isUInt<15>(x: Imm) |
2422 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2423 | : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain, |
2424 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2425 | } |
2426 | #define IOCSRWR_CASE(NAME, NODE) \ |
2427 | case Intrinsic::loongarch_##NAME: { \ |
2428 | SDValue Op3 = Op.getOperand(3); \ |
2429 | return Subtarget.is64Bit() \ |
2430 | ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ |
2431 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
2432 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ |
2433 | : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ |
2434 | Op3); \ |
2435 | } |
2436 | IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); |
2437 | IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); |
2438 | IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); |
2439 | #undef IOCSRWR_CASE |
2440 | case Intrinsic::loongarch_iocsrwr_d: { |
2441 | return !Subtarget.is64Bit() |
2442 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
2443 | : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain, |
2444 | N2: Op2, |
2445 | N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, |
2446 | Operand: Op.getOperand(i: 3))); |
2447 | } |
2448 | #define ASRT_LE_GT_CASE(NAME) \ |
2449 | case Intrinsic::loongarch_##NAME: { \ |
2450 | return !Subtarget.is64Bit() \ |
2451 | ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ |
2452 | : Op; \ |
2453 | } |
2454 | ASRT_LE_GT_CASE(asrtle_d) |
2455 | ASRT_LE_GT_CASE(asrtgt_d) |
2456 | #undef ASRT_LE_GT_CASE |
2457 | case Intrinsic::loongarch_ldpte_d: { |
2458 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
2459 | return !Subtarget.is64Bit() |
2460 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
2461 | : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2462 | : Op; |
2463 | } |
2464 | case Intrinsic::loongarch_lsx_vst: |
2465 | case Intrinsic::loongarch_lasx_xvst: |
2466 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) |
2467 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2468 | : SDValue(); |
2469 | case Intrinsic::loongarch_lasx_xvstelm_b: |
2470 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2471 | !isUInt<5>(x: Op.getConstantOperandVal(i: 5))) |
2472 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2473 | : SDValue(); |
2474 | case Intrinsic::loongarch_lsx_vstelm_b: |
2475 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2476 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
2477 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2478 | : SDValue(); |
2479 | case Intrinsic::loongarch_lasx_xvstelm_h: |
2480 | return (!isShiftedInt<8, 1>( |
2481 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2482 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
2483 | ? emitIntrinsicErrorMessage( |
2484 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
2485 | : SDValue(); |
2486 | case Intrinsic::loongarch_lsx_vstelm_h: |
2487 | return (!isShiftedInt<8, 1>( |
2488 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2489 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
2490 | ? emitIntrinsicErrorMessage( |
2491 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
2492 | : SDValue(); |
2493 | case Intrinsic::loongarch_lasx_xvstelm_w: |
2494 | return (!isShiftedInt<8, 2>( |
2495 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2496 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
2497 | ? emitIntrinsicErrorMessage( |
2498 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
2499 | : SDValue(); |
2500 | case Intrinsic::loongarch_lsx_vstelm_w: |
2501 | return (!isShiftedInt<8, 2>( |
2502 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2503 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
2504 | ? emitIntrinsicErrorMessage( |
2505 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
2506 | : SDValue(); |
2507 | case Intrinsic::loongarch_lasx_xvstelm_d: |
2508 | return (!isShiftedInt<8, 3>( |
2509 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2510 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
2511 | ? emitIntrinsicErrorMessage( |
2512 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
2513 | : SDValue(); |
2514 | case Intrinsic::loongarch_lsx_vstelm_d: |
2515 | return (!isShiftedInt<8, 3>( |
2516 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2517 | !isUInt<1>(x: Op.getConstantOperandVal(i: 5))) |
2518 | ? emitIntrinsicErrorMessage( |
2519 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
2520 | : SDValue(); |
2521 | } |
2522 | } |
2523 | |
2524 | SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, |
2525 | SelectionDAG &DAG) const { |
2526 | SDLoc DL(Op); |
2527 | SDValue Lo = Op.getOperand(i: 0); |
2528 | SDValue Hi = Op.getOperand(i: 1); |
2529 | SDValue Shamt = Op.getOperand(i: 2); |
2530 | EVT VT = Lo.getValueType(); |
2531 | |
2532 | // if Shamt-GRLen < 0: // Shamt < GRLen |
2533 | // Lo = Lo << Shamt |
2534 | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) |
2535 | // else: |
2536 | // Lo = 0 |
2537 | // Hi = Lo << (Shamt-GRLen) |
2538 | |
2539 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
2540 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
2541 | SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
2542 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
2543 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
2544 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
2545 | |
2546 | SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt); |
2547 | SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One); |
2548 | SDValue ShiftRightLo = |
2549 | DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt); |
2550 | SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt); |
2551 | SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo); |
2552 | SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen); |
2553 | |
2554 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
2555 | |
2556 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero); |
2557 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
2558 | |
2559 | SDValue Parts[2] = {Lo, Hi}; |
2560 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
2561 | } |
2562 | |
2563 | SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, |
2564 | SelectionDAG &DAG, |
2565 | bool IsSRA) const { |
2566 | SDLoc DL(Op); |
2567 | SDValue Lo = Op.getOperand(i: 0); |
2568 | SDValue Hi = Op.getOperand(i: 1); |
2569 | SDValue Shamt = Op.getOperand(i: 2); |
2570 | EVT VT = Lo.getValueType(); |
2571 | |
2572 | // SRA expansion: |
2573 | // if Shamt-GRLen < 0: // Shamt < GRLen |
2574 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
2575 | // Hi = Hi >>s Shamt |
2576 | // else: |
2577 | // Lo = Hi >>s (Shamt-GRLen); |
2578 | // Hi = Hi >>s (GRLen-1) |
2579 | // |
2580 | // SRL expansion: |
2581 | // if Shamt-GRLen < 0: // Shamt < GRLen |
2582 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
2583 | // Hi = Hi >>u Shamt |
2584 | // else: |
2585 | // Lo = Hi >>u (Shamt-GRLen); |
2586 | // Hi = 0; |
2587 | |
2588 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
2589 | |
2590 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
2591 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
2592 | SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
2593 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
2594 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
2595 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
2596 | |
2597 | SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt); |
2598 | SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One); |
2599 | SDValue ShiftLeftHi = |
2600 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt); |
2601 | SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi); |
2602 | SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt); |
2603 | SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen); |
2604 | SDValue HiFalse = |
2605 | IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero; |
2606 | |
2607 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
2608 | |
2609 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse); |
2610 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
2611 | |
2612 | SDValue Parts[2] = {Lo, Hi}; |
2613 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
2614 | } |
2615 | |
2616 | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
2617 | // form of the given Opcode. |
2618 | static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { |
2619 | switch (Opcode) { |
2620 | default: |
2621 | llvm_unreachable("Unexpected opcode" ); |
2622 | case ISD::UDIV: |
2623 | return LoongArchISD::DIV_WU; |
2624 | case ISD::UREM: |
2625 | return LoongArchISD::MOD_WU; |
2626 | case ISD::SHL: |
2627 | return LoongArchISD::SLL_W; |
2628 | case ISD::SRA: |
2629 | return LoongArchISD::SRA_W; |
2630 | case ISD::SRL: |
2631 | return LoongArchISD::SRL_W; |
2632 | case ISD::ROTL: |
2633 | case ISD::ROTR: |
2634 | return LoongArchISD::ROTR_W; |
2635 | case ISD::CTTZ: |
2636 | return LoongArchISD::CTZ_W; |
2637 | case ISD::CTLZ: |
2638 | return LoongArchISD::CLZ_W; |
2639 | } |
2640 | } |
2641 | |
2642 | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
2643 | // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would |
2644 | // otherwise be promoted to i64, making it difficult to select the |
2645 | // SLL_W/.../*W later one because the fact the operation was originally of |
2646 | // type i8/i16/i32 is lost. |
2647 | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, |
2648 | unsigned ExtOpc = ISD::ANY_EXTEND) { |
2649 | SDLoc DL(N); |
2650 | LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode()); |
2651 | SDValue NewOp0, NewRes; |
2652 | |
2653 | switch (NumOp) { |
2654 | default: |
2655 | llvm_unreachable("Unexpected NumOp" ); |
2656 | case 1: { |
2657 | NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
2658 | NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0); |
2659 | break; |
2660 | } |
2661 | case 2: { |
2662 | NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
2663 | SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
2664 | if (N->getOpcode() == ISD::ROTL) { |
2665 | SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64); |
2666 | NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1); |
2667 | } |
2668 | NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
2669 | break; |
2670 | } |
2671 | // TODO:Handle more NumOp. |
2672 | } |
2673 | |
2674 | // ReplaceNodeResults requires we maintain the same type for the return |
2675 | // value. |
2676 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes); |
2677 | } |
2678 | |
2679 | // Converts the given 32-bit operation to a i64 operation with signed extension |
2680 | // semantic to reduce the signed extension instructions. |
2681 | static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { |
2682 | SDLoc DL(N); |
2683 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
2684 | SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
2685 | SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
2686 | SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp, |
2687 | N2: DAG.getValueType(MVT::i32)); |
2688 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes); |
2689 | } |
2690 | |
2691 | // Helper function that emits error message for intrinsics with/without chain |
2692 | // and return a UNDEF or and the chain as the results. |
2693 | static void emitErrorAndReplaceIntrinsicResults( |
2694 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, |
2695 | StringRef ErrorMsg, bool WithChain = true) { |
2696 | DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
2697 | Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0))); |
2698 | if (!WithChain) |
2699 | return; |
2700 | Results.push_back(Elt: N->getOperand(Num: 0)); |
2701 | } |
2702 | |
2703 | template <unsigned N> |
2704 | static void |
2705 | replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, |
2706 | SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, |
2707 | unsigned ResOp) { |
2708 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2709 | unsigned Imm = Node->getConstantOperandVal(Num: 2); |
2710 | if (!isUInt<N>(Imm)) { |
2711 | emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR, |
2712 | /*WithChain=*/false); |
2713 | return; |
2714 | } |
2715 | SDLoc DL(Node); |
2716 | SDValue Vec = Node->getOperand(Num: 1); |
2717 | |
2718 | SDValue PickElt = |
2719 | DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec, |
2720 | N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()), |
2721 | N3: DAG.getValueType(Vec.getValueType().getVectorElementType())); |
2722 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0), |
2723 | Operand: PickElt.getValue(R: 0))); |
2724 | } |
2725 | |
2726 | static void replaceVecCondBranchResults(SDNode *N, |
2727 | SmallVectorImpl<SDValue> &Results, |
2728 | SelectionDAG &DAG, |
2729 | const LoongArchSubtarget &Subtarget, |
2730 | unsigned ResOp) { |
2731 | SDLoc DL(N); |
2732 | SDValue Vec = N->getOperand(Num: 1); |
2733 | |
2734 | SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec); |
2735 | Results.push_back( |
2736 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0))); |
2737 | } |
2738 | |
2739 | static void |
2740 | replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
2741 | SelectionDAG &DAG, |
2742 | const LoongArchSubtarget &Subtarget) { |
2743 | switch (N->getConstantOperandVal(Num: 0)) { |
2744 | default: |
2745 | llvm_unreachable("Unexpected Intrinsic." ); |
2746 | case Intrinsic::loongarch_lsx_vpickve2gr_b: |
2747 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
2748 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
2749 | break; |
2750 | case Intrinsic::loongarch_lsx_vpickve2gr_h: |
2751 | case Intrinsic::loongarch_lasx_xvpickve2gr_w: |
2752 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
2753 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
2754 | break; |
2755 | case Intrinsic::loongarch_lsx_vpickve2gr_w: |
2756 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
2757 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
2758 | break; |
2759 | case Intrinsic::loongarch_lsx_vpickve2gr_bu: |
2760 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
2761 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
2762 | break; |
2763 | case Intrinsic::loongarch_lsx_vpickve2gr_hu: |
2764 | case Intrinsic::loongarch_lasx_xvpickve2gr_wu: |
2765 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
2766 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
2767 | break; |
2768 | case Intrinsic::loongarch_lsx_vpickve2gr_wu: |
2769 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
2770 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
2771 | break; |
2772 | case Intrinsic::loongarch_lsx_bz_b: |
2773 | case Intrinsic::loongarch_lsx_bz_h: |
2774 | case Intrinsic::loongarch_lsx_bz_w: |
2775 | case Intrinsic::loongarch_lsx_bz_d: |
2776 | case Intrinsic::loongarch_lasx_xbz_b: |
2777 | case Intrinsic::loongarch_lasx_xbz_h: |
2778 | case Intrinsic::loongarch_lasx_xbz_w: |
2779 | case Intrinsic::loongarch_lasx_xbz_d: |
2780 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2781 | ResOp: LoongArchISD::VALL_ZERO); |
2782 | break; |
2783 | case Intrinsic::loongarch_lsx_bz_v: |
2784 | case Intrinsic::loongarch_lasx_xbz_v: |
2785 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2786 | ResOp: LoongArchISD::VANY_ZERO); |
2787 | break; |
2788 | case Intrinsic::loongarch_lsx_bnz_b: |
2789 | case Intrinsic::loongarch_lsx_bnz_h: |
2790 | case Intrinsic::loongarch_lsx_bnz_w: |
2791 | case Intrinsic::loongarch_lsx_bnz_d: |
2792 | case Intrinsic::loongarch_lasx_xbnz_b: |
2793 | case Intrinsic::loongarch_lasx_xbnz_h: |
2794 | case Intrinsic::loongarch_lasx_xbnz_w: |
2795 | case Intrinsic::loongarch_lasx_xbnz_d: |
2796 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2797 | ResOp: LoongArchISD::VALL_NONZERO); |
2798 | break; |
2799 | case Intrinsic::loongarch_lsx_bnz_v: |
2800 | case Intrinsic::loongarch_lasx_xbnz_v: |
2801 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2802 | ResOp: LoongArchISD::VANY_NONZERO); |
2803 | break; |
2804 | } |
2805 | } |
2806 | |
2807 | void LoongArchTargetLowering::ReplaceNodeResults( |
2808 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
2809 | SDLoc DL(N); |
2810 | EVT VT = N->getValueType(ResNo: 0); |
2811 | switch (N->getOpcode()) { |
2812 | default: |
2813 | llvm_unreachable("Don't know how to legalize this operation" ); |
2814 | case ISD::ADD: |
2815 | case ISD::SUB: |
2816 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
2817 | "Unexpected custom legalisation" ); |
2818 | Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG)); |
2819 | break; |
2820 | case ISD::UDIV: |
2821 | case ISD::UREM: |
2822 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2823 | "Unexpected custom legalisation" ); |
2824 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2, ExtOpc: ISD::SIGN_EXTEND)); |
2825 | break; |
2826 | case ISD::SHL: |
2827 | case ISD::SRA: |
2828 | case ISD::SRL: |
2829 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2830 | "Unexpected custom legalisation" ); |
2831 | if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) { |
2832 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
2833 | break; |
2834 | } |
2835 | break; |
2836 | case ISD::ROTL: |
2837 | case ISD::ROTR: |
2838 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2839 | "Unexpected custom legalisation" ); |
2840 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
2841 | break; |
2842 | case ISD::FP_TO_SINT: { |
2843 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2844 | "Unexpected custom legalisation" ); |
2845 | SDValue Src = N->getOperand(Num: 0); |
2846 | EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0)); |
2847 | if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) != |
2848 | TargetLowering::TypeSoftenFloat) { |
2849 | SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src); |
2850 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst)); |
2851 | return; |
2852 | } |
2853 | // If the FP type needs to be softened, emit a library call using the 'si' |
2854 | // version. If we left it to default legalization we'd end up with 'di'. |
2855 | RTLIB::Libcall LC; |
2856 | LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT); |
2857 | MakeLibCallOptions CallOptions; |
2858 | EVT OpVT = Src.getValueType(); |
2859 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true); |
2860 | SDValue Chain = SDValue(); |
2861 | SDValue Result; |
2862 | std::tie(args&: Result, args&: Chain) = |
2863 | makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain); |
2864 | Results.push_back(Elt: Result); |
2865 | break; |
2866 | } |
2867 | case ISD::BITCAST: { |
2868 | SDValue Src = N->getOperand(Num: 0); |
2869 | EVT SrcVT = Src.getValueType(); |
2870 | if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && |
2871 | Subtarget.hasBasicF()) { |
2872 | SDValue Dst = |
2873 | DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src); |
2874 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst)); |
2875 | } |
2876 | break; |
2877 | } |
2878 | case ISD::FP_TO_UINT: { |
2879 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2880 | "Unexpected custom legalisation" ); |
2881 | auto &TLI = DAG.getTargetLoweringInfo(); |
2882 | SDValue Tmp1, Tmp2; |
2883 | TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG); |
2884 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1)); |
2885 | break; |
2886 | } |
2887 | case ISD::BSWAP: { |
2888 | SDValue Src = N->getOperand(Num: 0); |
2889 | assert((VT == MVT::i16 || VT == MVT::i32) && |
2890 | "Unexpected custom legalization" ); |
2891 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2892 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
2893 | SDValue Tmp; |
2894 | switch (VT.getSizeInBits()) { |
2895 | default: |
2896 | llvm_unreachable("Unexpected operand width" ); |
2897 | case 16: |
2898 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc); |
2899 | break; |
2900 | case 32: |
2901 | // Only LA64 will get to here due to the size mismatch between VT and |
2902 | // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. |
2903 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc); |
2904 | break; |
2905 | } |
2906 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
2907 | break; |
2908 | } |
2909 | case ISD::BITREVERSE: { |
2910 | SDValue Src = N->getOperand(Num: 0); |
2911 | assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
2912 | "Unexpected custom legalization" ); |
2913 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2914 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
2915 | SDValue Tmp; |
2916 | switch (VT.getSizeInBits()) { |
2917 | default: |
2918 | llvm_unreachable("Unexpected operand width" ); |
2919 | case 8: |
2920 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc); |
2921 | break; |
2922 | case 32: |
2923 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc); |
2924 | break; |
2925 | } |
2926 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
2927 | break; |
2928 | } |
2929 | case ISD::CTLZ: |
2930 | case ISD::CTTZ: { |
2931 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2932 | "Unexpected custom legalisation" ); |
2933 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1)); |
2934 | break; |
2935 | } |
2936 | case ISD::INTRINSIC_W_CHAIN: { |
2937 | SDValue Chain = N->getOperand(Num: 0); |
2938 | SDValue Op2 = N->getOperand(Num: 2); |
2939 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2940 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2941 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
2942 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
2943 | |
2944 | switch (N->getConstantOperandVal(Num: 1)) { |
2945 | default: |
2946 | llvm_unreachable("Unexpected Intrinsic." ); |
2947 | case Intrinsic::loongarch_movfcsr2gr: { |
2948 | if (!Subtarget.hasBasicF()) { |
2949 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF); |
2950 | return; |
2951 | } |
2952 | unsigned Imm = Op2->getAsZExtVal(); |
2953 | if (!isUInt<2>(x: Imm)) { |
2954 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
2955 | return; |
2956 | } |
2957 | SDValue MOVFCSR2GRResults = DAG.getNode( |
2958 | Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other}, |
2959 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2960 | Results.push_back( |
2961 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0))); |
2962 | Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1)); |
2963 | break; |
2964 | } |
2965 | #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ |
2966 | case Intrinsic::loongarch_##NAME: { \ |
2967 | SDValue NODE = DAG.getNode( \ |
2968 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
2969 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
2970 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
2971 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
2972 | Results.push_back(NODE.getValue(1)); \ |
2973 | break; \ |
2974 | } |
2975 | CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) |
2976 | CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) |
2977 | CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) |
2978 | CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) |
2979 | CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) |
2980 | CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) |
2981 | #undef CRC_CASE_EXT_BINARYOP |
2982 | |
2983 | #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ |
2984 | case Intrinsic::loongarch_##NAME: { \ |
2985 | SDValue NODE = DAG.getNode( \ |
2986 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
2987 | {Chain, Op2, \ |
2988 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
2989 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
2990 | Results.push_back(NODE.getValue(1)); \ |
2991 | break; \ |
2992 | } |
2993 | CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) |
2994 | CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) |
2995 | #undef CRC_CASE_EXT_UNARYOP |
2996 | #define CSR_CASE(ID) \ |
2997 | case Intrinsic::loongarch_##ID: { \ |
2998 | if (!Subtarget.is64Bit()) \ |
2999 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ |
3000 | break; \ |
3001 | } |
3002 | CSR_CASE(csrrd_d); |
3003 | CSR_CASE(csrwr_d); |
3004 | CSR_CASE(csrxchg_d); |
3005 | CSR_CASE(iocsrrd_d); |
3006 | #undef CSR_CASE |
3007 | case Intrinsic::loongarch_csrrd_w: { |
3008 | unsigned Imm = Op2->getAsZExtVal(); |
3009 | if (!isUInt<14>(x: Imm)) { |
3010 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
3011 | return; |
3012 | } |
3013 | SDValue CSRRDResults = |
3014 | DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other}, |
3015 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3016 | Results.push_back( |
3017 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0))); |
3018 | Results.push_back(Elt: CSRRDResults.getValue(R: 1)); |
3019 | break; |
3020 | } |
3021 | case Intrinsic::loongarch_csrwr_w: { |
3022 | unsigned Imm = N->getConstantOperandVal(Num: 3); |
3023 | if (!isUInt<14>(x: Imm)) { |
3024 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
3025 | return; |
3026 | } |
3027 | SDValue CSRWRResults = |
3028 | DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other}, |
3029 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2), |
3030 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3031 | Results.push_back( |
3032 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0))); |
3033 | Results.push_back(Elt: CSRWRResults.getValue(R: 1)); |
3034 | break; |
3035 | } |
3036 | case Intrinsic::loongarch_csrxchg_w: { |
3037 | unsigned Imm = N->getConstantOperandVal(Num: 4); |
3038 | if (!isUInt<14>(x: Imm)) { |
3039 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
3040 | return; |
3041 | } |
3042 | SDValue CSRXCHGResults = DAG.getNode( |
3043 | Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other}, |
3044 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2), |
3045 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)), |
3046 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3047 | Results.push_back( |
3048 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0))); |
3049 | Results.push_back(Elt: CSRXCHGResults.getValue(R: 1)); |
3050 | break; |
3051 | } |
3052 | #define IOCSRRD_CASE(NAME, NODE) \ |
3053 | case Intrinsic::loongarch_##NAME: { \ |
3054 | SDValue IOCSRRDResults = \ |
3055 | DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
3056 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ |
3057 | Results.push_back( \ |
3058 | DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ |
3059 | Results.push_back(IOCSRRDResults.getValue(1)); \ |
3060 | break; \ |
3061 | } |
3062 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
3063 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
3064 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
3065 | #undef IOCSRRD_CASE |
3066 | case Intrinsic::loongarch_cpucfg: { |
3067 | SDValue CPUCFGResults = |
3068 | DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other}, |
3069 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)}); |
3070 | Results.push_back( |
3071 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0))); |
3072 | Results.push_back(Elt: CPUCFGResults.getValue(R: 1)); |
3073 | break; |
3074 | } |
3075 | case Intrinsic::loongarch_lddir_d: { |
3076 | if (!Subtarget.is64Bit()) { |
3077 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64); |
3078 | return; |
3079 | } |
3080 | break; |
3081 | } |
3082 | } |
3083 | break; |
3084 | } |
3085 | case ISD::READ_REGISTER: { |
3086 | if (Subtarget.is64Bit()) |
3087 | DAG.getContext()->emitError( |
3088 | ErrorStr: "On LA64, only 64-bit registers can be read." ); |
3089 | else |
3090 | DAG.getContext()->emitError( |
3091 | ErrorStr: "On LA32, only 32-bit registers can be read." ); |
3092 | Results.push_back(Elt: DAG.getUNDEF(VT)); |
3093 | Results.push_back(Elt: N->getOperand(Num: 0)); |
3094 | break; |
3095 | } |
3096 | case ISD::INTRINSIC_WO_CHAIN: { |
3097 | replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); |
3098 | break; |
3099 | } |
3100 | } |
3101 | } |
3102 | |
3103 | static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, |
3104 | TargetLowering::DAGCombinerInfo &DCI, |
3105 | const LoongArchSubtarget &Subtarget) { |
3106 | if (DCI.isBeforeLegalizeOps()) |
3107 | return SDValue(); |
3108 | |
3109 | SDValue FirstOperand = N->getOperand(Num: 0); |
3110 | SDValue SecondOperand = N->getOperand(Num: 1); |
3111 | unsigned FirstOperandOpc = FirstOperand.getOpcode(); |
3112 | EVT ValTy = N->getValueType(ResNo: 0); |
3113 | SDLoc DL(N); |
3114 | uint64_t lsb, msb; |
3115 | unsigned SMIdx, SMLen; |
3116 | ConstantSDNode *CN; |
3117 | SDValue NewOperand; |
3118 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3119 | |
3120 | // Op's second operand must be a shifted mask. |
3121 | if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) || |
3122 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen)) |
3123 | return SDValue(); |
3124 | |
3125 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { |
3126 | // Pattern match BSTRPICK. |
3127 | // $dst = and ((sra or srl) $src , lsb), (2**len - 1) |
3128 | // => BSTRPICK $dst, $src, msb, lsb |
3129 | // where msb = lsb + len - 1 |
3130 | |
3131 | // The second operand of the shift must be an immediate. |
3132 | if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1)))) |
3133 | return SDValue(); |
3134 | |
3135 | lsb = CN->getZExtValue(); |
3136 | |
3137 | // Return if the shifted mask does not start at bit 0 or the sum of its |
3138 | // length and lsb exceeds the word's size. |
3139 | if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) |
3140 | return SDValue(); |
3141 | |
3142 | NewOperand = FirstOperand.getOperand(i: 0); |
3143 | } else { |
3144 | // Pattern match BSTRPICK. |
3145 | // $dst = and $src, (2**len- 1) , if len > 12 |
3146 | // => BSTRPICK $dst, $src, msb, lsb |
3147 | // where lsb = 0 and msb = len - 1 |
3148 | |
3149 | // If the mask is <= 0xfff, andi can be used instead. |
3150 | if (CN->getZExtValue() <= 0xfff) |
3151 | return SDValue(); |
3152 | |
3153 | // Return if the MSB exceeds. |
3154 | if (SMIdx + SMLen > ValTy.getSizeInBits()) |
3155 | return SDValue(); |
3156 | |
3157 | if (SMIdx > 0) { |
3158 | // Omit if the constant has more than 2 uses. This a conservative |
3159 | // decision. Whether it is a win depends on the HW microarchitecture. |
3160 | // However it should always be better for 1 and 2 uses. |
3161 | if (CN->use_size() > 2) |
3162 | return SDValue(); |
3163 | // Return if the constant can be composed by a single LU12I.W. |
3164 | if ((CN->getZExtValue() & 0xfff) == 0) |
3165 | return SDValue(); |
3166 | // Return if the constand can be composed by a single ADDI with |
3167 | // the zero register. |
3168 | if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) |
3169 | return SDValue(); |
3170 | } |
3171 | |
3172 | lsb = SMIdx; |
3173 | NewOperand = FirstOperand; |
3174 | } |
3175 | |
3176 | msb = lsb + SMLen - 1; |
3177 | SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand, |
3178 | N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT), |
3179 | N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
3180 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) |
3181 | return NR0; |
3182 | // Try to optimize to |
3183 | // bstrpick $Rd, $Rs, msb, lsb |
3184 | // slli $Rd, $Rd, lsb |
3185 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0, |
3186 | N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
3187 | } |
3188 | |
3189 | static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, |
3190 | TargetLowering::DAGCombinerInfo &DCI, |
3191 | const LoongArchSubtarget &Subtarget) { |
3192 | if (DCI.isBeforeLegalizeOps()) |
3193 | return SDValue(); |
3194 | |
3195 | // $dst = srl (and $src, Mask), Shamt |
3196 | // => |
3197 | // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt |
3198 | // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 |
3199 | // |
3200 | |
3201 | SDValue FirstOperand = N->getOperand(Num: 0); |
3202 | ConstantSDNode *CN; |
3203 | EVT ValTy = N->getValueType(ResNo: 0); |
3204 | SDLoc DL(N); |
3205 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3206 | unsigned MaskIdx, MaskLen; |
3207 | uint64_t Shamt; |
3208 | |
3209 | // The first operand must be an AND and the second operand of the AND must be |
3210 | // a shifted mask. |
3211 | if (FirstOperand.getOpcode() != ISD::AND || |
3212 | !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) || |
3213 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen)) |
3214 | return SDValue(); |
3215 | |
3216 | // The second operand (shift amount) must be an immediate. |
3217 | if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))) |
3218 | return SDValue(); |
3219 | |
3220 | Shamt = CN->getZExtValue(); |
3221 | if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) |
3222 | return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, |
3223 | N1: FirstOperand->getOperand(Num: 0), |
3224 | N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
3225 | N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
3226 | |
3227 | return SDValue(); |
3228 | } |
3229 | |
3230 | static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, |
3231 | TargetLowering::DAGCombinerInfo &DCI, |
3232 | const LoongArchSubtarget &Subtarget) { |
3233 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3234 | EVT ValTy = N->getValueType(ResNo: 0); |
3235 | SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1); |
3236 | ConstantSDNode *CN0, *CN1; |
3237 | SDLoc DL(N); |
3238 | unsigned ValBits = ValTy.getSizeInBits(); |
3239 | unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; |
3240 | unsigned Shamt; |
3241 | bool SwapAndRetried = false; |
3242 | |
3243 | if (DCI.isBeforeLegalizeOps()) |
3244 | return SDValue(); |
3245 | |
3246 | if (ValBits != 32 && ValBits != 64) |
3247 | return SDValue(); |
3248 | |
3249 | Retry: |
3250 | // 1st pattern to match BSTRINS: |
3251 | // R = or (and X, mask0), (and (shl Y, lsb), mask1) |
3252 | // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 |
3253 | // => |
3254 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
3255 | if (N0.getOpcode() == ISD::AND && |
3256 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3257 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3258 | N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
3259 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3260 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
3261 | MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && |
3262 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3263 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
3264 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
3265 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n" ); |
3266 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3267 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3268 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
3269 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3270 | } |
3271 | |
3272 | // 2nd pattern to match BSTRINS: |
3273 | // R = or (and X, mask0), (shl (and Y, mask1), lsb) |
3274 | // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) |
3275 | // => |
3276 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
3277 | if (N0.getOpcode() == ISD::AND && |
3278 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3279 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3280 | N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
3281 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3282 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
3283 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3284 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
3285 | MaskLen0 == MaskLen1 && MaskIdx1 == 0 && |
3286 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
3287 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n" ); |
3288 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3289 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3290 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
3291 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3292 | } |
3293 | |
3294 | // 3rd pattern to match BSTRINS: |
3295 | // R = or (and X, mask0), (and Y, mask1) |
3296 | // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 |
3297 | // => |
3298 | // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb |
3299 | // where msb = lsb + size - 1 |
3300 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && |
3301 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3302 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3303 | (MaskIdx0 + MaskLen0 <= 64) && |
3304 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) && |
3305 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
3306 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n" ); |
3307 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3308 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1, |
3309 | N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)), |
3310 | N3: DAG.getConstant(Val: ValBits == 32 |
3311 | ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
3312 | : (MaskIdx0 + MaskLen0 - 1), |
3313 | DL, VT: GRLenVT), |
3314 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3315 | } |
3316 | |
3317 | // 4th pattern to match BSTRINS: |
3318 | // R = or (and X, mask), (shl Y, shamt) |
3319 | // where mask = (2**shamt - 1) |
3320 | // => |
3321 | // R = BSTRINS X, Y, ValBits - 1, shamt |
3322 | // where ValBits = 32 or 64 |
3323 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && |
3324 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3325 | isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3326 | MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3327 | (Shamt = CN1->getZExtValue()) == MaskLen0 && |
3328 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
3329 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n" ); |
3330 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3331 | N2: N1.getOperand(i: 0), |
3332 | N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT), |
3333 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
3334 | } |
3335 | |
3336 | // 5th pattern to match BSTRINS: |
3337 | // R = or (and X, mask), const |
3338 | // where ~mask = (2**size - 1) << lsb, mask & const = 0 |
3339 | // => |
3340 | // R = BSTRINS X, (const >> lsb), msb, lsb |
3341 | // where msb = lsb + size - 1 |
3342 | if (N0.getOpcode() == ISD::AND && |
3343 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3344 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3345 | (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) && |
3346 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
3347 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n" ); |
3348 | return DAG.getNode( |
3349 | Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3350 | N2: DAG.getConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy), |
3351 | N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
3352 | : (MaskIdx0 + MaskLen0 - 1), |
3353 | DL, VT: GRLenVT), |
3354 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3355 | } |
3356 | |
3357 | // 6th pattern. |
3358 | // a = b | ((c & mask) << shamt), where all positions in b to be overwritten |
3359 | // by the incoming bits are known to be zero. |
3360 | // => |
3361 | // a = BSTRINS b, c, shamt + MaskLen - 1, shamt |
3362 | // |
3363 | // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th |
3364 | // pattern is more common than the 1st. So we put the 1st before the 6th in |
3365 | // order to match as many nodes as possible. |
3366 | ConstantSDNode *CNMask, *CNShamt; |
3367 | unsigned MaskIdx, MaskLen; |
3368 | if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
3369 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3370 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
3371 | MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3372 | CNShamt->getZExtValue() + MaskLen <= ValBits) { |
3373 | Shamt = CNShamt->getZExtValue(); |
3374 | APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); |
3375 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
3376 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n" ); |
3377 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
3378 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3379 | N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT), |
3380 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
3381 | } |
3382 | } |
3383 | |
3384 | // 7th pattern. |
3385 | // a = b | ((c << shamt) & shifted_mask), where all positions in b to be |
3386 | // overwritten by the incoming bits are known to be zero. |
3387 | // => |
3388 | // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx |
3389 | // |
3390 | // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd |
3391 | // before the 7th in order to match as many nodes as possible. |
3392 | if (N1.getOpcode() == ISD::AND && |
3393 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3394 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
3395 | N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
3396 | (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3397 | CNShamt->getZExtValue() == MaskIdx) { |
3398 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
3399 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
3400 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n" ); |
3401 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
3402 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3403 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
3404 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
3405 | } |
3406 | } |
3407 | |
3408 | // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. |
3409 | if (!SwapAndRetried) { |
3410 | std::swap(a&: N0, b&: N1); |
3411 | SwapAndRetried = true; |
3412 | goto Retry; |
3413 | } |
3414 | |
3415 | SwapAndRetried = false; |
3416 | Retry2: |
3417 | // 8th pattern. |
3418 | // a = b | (c & shifted_mask), where all positions in b to be overwritten by |
3419 | // the incoming bits are known to be zero. |
3420 | // => |
3421 | // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx |
3422 | // |
3423 | // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So |
3424 | // we put it here in order to match as many nodes as possible or generate less |
3425 | // instructions. |
3426 | if (N1.getOpcode() == ISD::AND && |
3427 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3428 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) { |
3429 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
3430 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
3431 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n" ); |
3432 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
3433 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), |
3434 | N1: N1->getOperand(Num: 0), |
3435 | N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)), |
3436 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
3437 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
3438 | } |
3439 | } |
3440 | // Swap N0/N1 and retry. |
3441 | if (!SwapAndRetried) { |
3442 | std::swap(a&: N0, b&: N1); |
3443 | SwapAndRetried = true; |
3444 | goto Retry2; |
3445 | } |
3446 | |
3447 | return SDValue(); |
3448 | } |
3449 | |
3450 | static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { |
3451 | ExtType = ISD::NON_EXTLOAD; |
3452 | |
3453 | switch (V.getNode()->getOpcode()) { |
3454 | case ISD::LOAD: { |
3455 | LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode()); |
3456 | if ((LoadNode->getMemoryVT() == MVT::i8) || |
3457 | (LoadNode->getMemoryVT() == MVT::i16)) { |
3458 | ExtType = LoadNode->getExtensionType(); |
3459 | return true; |
3460 | } |
3461 | return false; |
3462 | } |
3463 | case ISD::AssertSext: { |
3464 | VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1)); |
3465 | if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { |
3466 | ExtType = ISD::SEXTLOAD; |
3467 | return true; |
3468 | } |
3469 | return false; |
3470 | } |
3471 | case ISD::AssertZext: { |
3472 | VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1)); |
3473 | if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { |
3474 | ExtType = ISD::ZEXTLOAD; |
3475 | return true; |
3476 | } |
3477 | return false; |
3478 | } |
3479 | default: |
3480 | return false; |
3481 | } |
3482 | |
3483 | return false; |
3484 | } |
3485 | |
3486 | // Eliminate redundant truncation and zero-extension nodes. |
3487 | // * Case 1: |
3488 | // +------------+ +------------+ +------------+ |
3489 | // | Input1 | | Input2 | | CC | |
3490 | // +------------+ +------------+ +------------+ |
3491 | // | | | |
3492 | // V V +----+ |
3493 | // +------------+ +------------+ | |
3494 | // | TRUNCATE | | TRUNCATE | | |
3495 | // +------------+ +------------+ | |
3496 | // | | | |
3497 | // V V | |
3498 | // +------------+ +------------+ | |
3499 | // | ZERO_EXT | | ZERO_EXT | | |
3500 | // +------------+ +------------+ | |
3501 | // | | | |
3502 | // | +-------------+ | |
3503 | // V V | | |
3504 | // +----------------+ | | |
3505 | // | AND | | | |
3506 | // +----------------+ | | |
3507 | // | | | |
3508 | // +---------------+ | | |
3509 | // | | | |
3510 | // V V V |
3511 | // +-------------+ |
3512 | // | CMP | |
3513 | // +-------------+ |
3514 | // * Case 2: |
3515 | // +------------+ +------------+ +-------------+ +------------+ +------------+ |
3516 | // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | |
3517 | // +------------+ +------------+ +-------------+ +------------+ +------------+ |
3518 | // | | | | | |
3519 | // V | | | | |
3520 | // +------------+ | | | | |
3521 | // | XOR |<---------------------+ | | |
3522 | // +------------+ | | | |
3523 | // | | | | |
3524 | // V V +---------------+ | |
3525 | // +------------+ +------------+ | | |
3526 | // | TRUNCATE | | TRUNCATE | | +-------------------------+ |
3527 | // +------------+ +------------+ | | |
3528 | // | | | | |
3529 | // V V | | |
3530 | // +------------+ +------------+ | | |
3531 | // | ZERO_EXT | | ZERO_EXT | | | |
3532 | // +------------+ +------------+ | | |
3533 | // | | | | |
3534 | // V V | | |
3535 | // +----------------+ | | |
3536 | // | AND | | | |
3537 | // +----------------+ | | |
3538 | // | | | |
3539 | // +---------------+ | | |
3540 | // | | | |
3541 | // V V V |
3542 | // +-------------+ |
3543 | // | CMP | |
3544 | // +-------------+ |
3545 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, |
3546 | TargetLowering::DAGCombinerInfo &DCI, |
3547 | const LoongArchSubtarget &Subtarget) { |
3548 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
3549 | |
3550 | SDNode *AndNode = N->getOperand(Num: 0).getNode(); |
3551 | if (AndNode->getOpcode() != ISD::AND) |
3552 | return SDValue(); |
3553 | |
3554 | SDValue AndInputValue2 = AndNode->getOperand(Num: 1); |
3555 | if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) |
3556 | return SDValue(); |
3557 | |
3558 | SDValue CmpInputValue = N->getOperand(Num: 1); |
3559 | SDValue AndInputValue1 = AndNode->getOperand(Num: 0); |
3560 | if (AndInputValue1.getOpcode() == ISD::XOR) { |
3561 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
3562 | return SDValue(); |
3563 | ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1)); |
3564 | if (!CN || CN->getSExtValue() != -1) |
3565 | return SDValue(); |
3566 | CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue); |
3567 | if (!CN || CN->getSExtValue() != 0) |
3568 | return SDValue(); |
3569 | AndInputValue1 = AndInputValue1.getOperand(i: 0); |
3570 | if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) |
3571 | return SDValue(); |
3572 | } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { |
3573 | if (AndInputValue2 != CmpInputValue) |
3574 | return SDValue(); |
3575 | } else { |
3576 | return SDValue(); |
3577 | } |
3578 | |
3579 | SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0); |
3580 | if (TruncValue1.getOpcode() != ISD::TRUNCATE) |
3581 | return SDValue(); |
3582 | |
3583 | SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0); |
3584 | if (TruncValue2.getOpcode() != ISD::TRUNCATE) |
3585 | return SDValue(); |
3586 | |
3587 | SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0); |
3588 | SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0); |
3589 | ISD::LoadExtType ExtType1; |
3590 | ISD::LoadExtType ExtType2; |
3591 | |
3592 | if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) || |
3593 | !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2)) |
3594 | return SDValue(); |
3595 | |
3596 | if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) || |
3597 | AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0)) |
3598 | return SDValue(); |
3599 | |
3600 | if ((ExtType2 != ISD::ZEXTLOAD) && |
3601 | ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) |
3602 | return SDValue(); |
3603 | |
3604 | // These truncation and zero-extension nodes are not necessary, remove them. |
3605 | SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0), |
3606 | N1: TruncInputValue1, N2: TruncInputValue2); |
3607 | SDValue NewSetCC = |
3608 | DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC); |
3609 | DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode()); |
3610 | return SDValue(N, 0); |
3611 | } |
3612 | |
3613 | // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. |
3614 | static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, |
3615 | TargetLowering::DAGCombinerInfo &DCI, |
3616 | const LoongArchSubtarget &Subtarget) { |
3617 | if (DCI.isBeforeLegalizeOps()) |
3618 | return SDValue(); |
3619 | |
3620 | SDValue Src = N->getOperand(Num: 0); |
3621 | if (Src.getOpcode() != LoongArchISD::REVB_2W) |
3622 | return SDValue(); |
3623 | |
3624 | return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3625 | Operand: Src.getOperand(i: 0)); |
3626 | } |
3627 | |
3628 | template <unsigned N> |
3629 | static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, |
3630 | SelectionDAG &DAG, |
3631 | const LoongArchSubtarget &Subtarget, |
3632 | bool IsSigned = false) { |
3633 | SDLoc DL(Node); |
3634 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
3635 | // Check the ImmArg. |
3636 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
3637 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
3638 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3639 | ": argument out of range." ); |
3640 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT()); |
3641 | } |
3642 | return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT()); |
3643 | } |
3644 | |
3645 | template <unsigned N> |
3646 | static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, |
3647 | SelectionDAG &DAG, bool IsSigned = false) { |
3648 | SDLoc DL(Node); |
3649 | EVT ResTy = Node->getValueType(ResNo: 0); |
3650 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
3651 | |
3652 | // Check the ImmArg. |
3653 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
3654 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
3655 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3656 | ": argument out of range." ); |
3657 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3658 | } |
3659 | return DAG.getConstant( |
3660 | Val: APInt(ResTy.getScalarType().getSizeInBits(), |
3661 | IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), |
3662 | DL, VT: ResTy); |
3663 | } |
3664 | |
3665 | static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { |
3666 | SDLoc DL(Node); |
3667 | EVT ResTy = Node->getValueType(ResNo: 0); |
3668 | SDValue Vec = Node->getOperand(Num: 2); |
3669 | SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy); |
3670 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask); |
3671 | } |
3672 | |
3673 | static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { |
3674 | SDLoc DL(Node); |
3675 | EVT ResTy = Node->getValueType(ResNo: 0); |
3676 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy); |
3677 | SDValue Bit = |
3678 | DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG)); |
3679 | |
3680 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), |
3681 | N2: DAG.getNOT(DL, Val: Bit, VT: ResTy)); |
3682 | } |
3683 | |
3684 | template <unsigned N> |
3685 | static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { |
3686 | SDLoc DL(Node); |
3687 | EVT ResTy = Node->getValueType(ResNo: 0); |
3688 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
3689 | // Check the unsigned ImmArg. |
3690 | if (!isUInt<N>(CImm->getZExtValue())) { |
3691 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3692 | ": argument out of range." ); |
3693 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3694 | } |
3695 | |
3696 | APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
3697 | SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy); |
3698 | |
3699 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask); |
3700 | } |
3701 | |
3702 | template <unsigned N> |
3703 | static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { |
3704 | SDLoc DL(Node); |
3705 | EVT ResTy = Node->getValueType(ResNo: 0); |
3706 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
3707 | // Check the unsigned ImmArg. |
3708 | if (!isUInt<N>(CImm->getZExtValue())) { |
3709 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3710 | ": argument out of range." ); |
3711 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3712 | } |
3713 | |
3714 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
3715 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
3716 | return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
3717 | } |
3718 | |
3719 | template <unsigned N> |
3720 | static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { |
3721 | SDLoc DL(Node); |
3722 | EVT ResTy = Node->getValueType(ResNo: 0); |
3723 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
3724 | // Check the unsigned ImmArg. |
3725 | if (!isUInt<N>(CImm->getZExtValue())) { |
3726 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3727 | ": argument out of range." ); |
3728 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3729 | } |
3730 | |
3731 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
3732 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
3733 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
3734 | } |
3735 | |
3736 | static SDValue |
3737 | performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, |
3738 | TargetLowering::DAGCombinerInfo &DCI, |
3739 | const LoongArchSubtarget &Subtarget) { |
3740 | SDLoc DL(N); |
3741 | switch (N->getConstantOperandVal(Num: 0)) { |
3742 | default: |
3743 | break; |
3744 | case Intrinsic::loongarch_lsx_vadd_b: |
3745 | case Intrinsic::loongarch_lsx_vadd_h: |
3746 | case Intrinsic::loongarch_lsx_vadd_w: |
3747 | case Intrinsic::loongarch_lsx_vadd_d: |
3748 | case Intrinsic::loongarch_lasx_xvadd_b: |
3749 | case Intrinsic::loongarch_lasx_xvadd_h: |
3750 | case Intrinsic::loongarch_lasx_xvadd_w: |
3751 | case Intrinsic::loongarch_lasx_xvadd_d: |
3752 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3753 | N2: N->getOperand(Num: 2)); |
3754 | case Intrinsic::loongarch_lsx_vaddi_bu: |
3755 | case Intrinsic::loongarch_lsx_vaddi_hu: |
3756 | case Intrinsic::loongarch_lsx_vaddi_wu: |
3757 | case Intrinsic::loongarch_lsx_vaddi_du: |
3758 | case Intrinsic::loongarch_lasx_xvaddi_bu: |
3759 | case Intrinsic::loongarch_lasx_xvaddi_hu: |
3760 | case Intrinsic::loongarch_lasx_xvaddi_wu: |
3761 | case Intrinsic::loongarch_lasx_xvaddi_du: |
3762 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3763 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3764 | case Intrinsic::loongarch_lsx_vsub_b: |
3765 | case Intrinsic::loongarch_lsx_vsub_h: |
3766 | case Intrinsic::loongarch_lsx_vsub_w: |
3767 | case Intrinsic::loongarch_lsx_vsub_d: |
3768 | case Intrinsic::loongarch_lasx_xvsub_b: |
3769 | case Intrinsic::loongarch_lasx_xvsub_h: |
3770 | case Intrinsic::loongarch_lasx_xvsub_w: |
3771 | case Intrinsic::loongarch_lasx_xvsub_d: |
3772 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3773 | N2: N->getOperand(Num: 2)); |
3774 | case Intrinsic::loongarch_lsx_vsubi_bu: |
3775 | case Intrinsic::loongarch_lsx_vsubi_hu: |
3776 | case Intrinsic::loongarch_lsx_vsubi_wu: |
3777 | case Intrinsic::loongarch_lsx_vsubi_du: |
3778 | case Intrinsic::loongarch_lasx_xvsubi_bu: |
3779 | case Intrinsic::loongarch_lasx_xvsubi_hu: |
3780 | case Intrinsic::loongarch_lasx_xvsubi_wu: |
3781 | case Intrinsic::loongarch_lasx_xvsubi_du: |
3782 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3783 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3784 | case Intrinsic::loongarch_lsx_vneg_b: |
3785 | case Intrinsic::loongarch_lsx_vneg_h: |
3786 | case Intrinsic::loongarch_lsx_vneg_w: |
3787 | case Intrinsic::loongarch_lsx_vneg_d: |
3788 | case Intrinsic::loongarch_lasx_xvneg_b: |
3789 | case Intrinsic::loongarch_lasx_xvneg_h: |
3790 | case Intrinsic::loongarch_lasx_xvneg_w: |
3791 | case Intrinsic::loongarch_lasx_xvneg_d: |
3792 | return DAG.getNode( |
3793 | Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), |
3794 | N1: DAG.getConstant( |
3795 | Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0, |
3796 | /*isSigned=*/true), |
3797 | DL: SDLoc(N), VT: N->getValueType(ResNo: 0)), |
3798 | N2: N->getOperand(Num: 1)); |
3799 | case Intrinsic::loongarch_lsx_vmax_b: |
3800 | case Intrinsic::loongarch_lsx_vmax_h: |
3801 | case Intrinsic::loongarch_lsx_vmax_w: |
3802 | case Intrinsic::loongarch_lsx_vmax_d: |
3803 | case Intrinsic::loongarch_lasx_xvmax_b: |
3804 | case Intrinsic::loongarch_lasx_xvmax_h: |
3805 | case Intrinsic::loongarch_lasx_xvmax_w: |
3806 | case Intrinsic::loongarch_lasx_xvmax_d: |
3807 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3808 | N2: N->getOperand(Num: 2)); |
3809 | case Intrinsic::loongarch_lsx_vmax_bu: |
3810 | case Intrinsic::loongarch_lsx_vmax_hu: |
3811 | case Intrinsic::loongarch_lsx_vmax_wu: |
3812 | case Intrinsic::loongarch_lsx_vmax_du: |
3813 | case Intrinsic::loongarch_lasx_xvmax_bu: |
3814 | case Intrinsic::loongarch_lasx_xvmax_hu: |
3815 | case Intrinsic::loongarch_lasx_xvmax_wu: |
3816 | case Intrinsic::loongarch_lasx_xvmax_du: |
3817 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3818 | N2: N->getOperand(Num: 2)); |
3819 | case Intrinsic::loongarch_lsx_vmaxi_b: |
3820 | case Intrinsic::loongarch_lsx_vmaxi_h: |
3821 | case Intrinsic::loongarch_lsx_vmaxi_w: |
3822 | case Intrinsic::loongarch_lsx_vmaxi_d: |
3823 | case Intrinsic::loongarch_lasx_xvmaxi_b: |
3824 | case Intrinsic::loongarch_lasx_xvmaxi_h: |
3825 | case Intrinsic::loongarch_lasx_xvmaxi_w: |
3826 | case Intrinsic::loongarch_lasx_xvmaxi_d: |
3827 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3828 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
3829 | case Intrinsic::loongarch_lsx_vmaxi_bu: |
3830 | case Intrinsic::loongarch_lsx_vmaxi_hu: |
3831 | case Intrinsic::loongarch_lsx_vmaxi_wu: |
3832 | case Intrinsic::loongarch_lsx_vmaxi_du: |
3833 | case Intrinsic::loongarch_lasx_xvmaxi_bu: |
3834 | case Intrinsic::loongarch_lasx_xvmaxi_hu: |
3835 | case Intrinsic::loongarch_lasx_xvmaxi_wu: |
3836 | case Intrinsic::loongarch_lasx_xvmaxi_du: |
3837 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3838 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3839 | case Intrinsic::loongarch_lsx_vmin_b: |
3840 | case Intrinsic::loongarch_lsx_vmin_h: |
3841 | case Intrinsic::loongarch_lsx_vmin_w: |
3842 | case Intrinsic::loongarch_lsx_vmin_d: |
3843 | case Intrinsic::loongarch_lasx_xvmin_b: |
3844 | case Intrinsic::loongarch_lasx_xvmin_h: |
3845 | case Intrinsic::loongarch_lasx_xvmin_w: |
3846 | case Intrinsic::loongarch_lasx_xvmin_d: |
3847 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3848 | N2: N->getOperand(Num: 2)); |
3849 | case Intrinsic::loongarch_lsx_vmin_bu: |
3850 | case Intrinsic::loongarch_lsx_vmin_hu: |
3851 | case Intrinsic::loongarch_lsx_vmin_wu: |
3852 | case Intrinsic::loongarch_lsx_vmin_du: |
3853 | case Intrinsic::loongarch_lasx_xvmin_bu: |
3854 | case Intrinsic::loongarch_lasx_xvmin_hu: |
3855 | case Intrinsic::loongarch_lasx_xvmin_wu: |
3856 | case Intrinsic::loongarch_lasx_xvmin_du: |
3857 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3858 | N2: N->getOperand(Num: 2)); |
3859 | case Intrinsic::loongarch_lsx_vmini_b: |
3860 | case Intrinsic::loongarch_lsx_vmini_h: |
3861 | case Intrinsic::loongarch_lsx_vmini_w: |
3862 | case Intrinsic::loongarch_lsx_vmini_d: |
3863 | case Intrinsic::loongarch_lasx_xvmini_b: |
3864 | case Intrinsic::loongarch_lasx_xvmini_h: |
3865 | case Intrinsic::loongarch_lasx_xvmini_w: |
3866 | case Intrinsic::loongarch_lasx_xvmini_d: |
3867 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3868 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
3869 | case Intrinsic::loongarch_lsx_vmini_bu: |
3870 | case Intrinsic::loongarch_lsx_vmini_hu: |
3871 | case Intrinsic::loongarch_lsx_vmini_wu: |
3872 | case Intrinsic::loongarch_lsx_vmini_du: |
3873 | case Intrinsic::loongarch_lasx_xvmini_bu: |
3874 | case Intrinsic::loongarch_lasx_xvmini_hu: |
3875 | case Intrinsic::loongarch_lasx_xvmini_wu: |
3876 | case Intrinsic::loongarch_lasx_xvmini_du: |
3877 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3878 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3879 | case Intrinsic::loongarch_lsx_vmul_b: |
3880 | case Intrinsic::loongarch_lsx_vmul_h: |
3881 | case Intrinsic::loongarch_lsx_vmul_w: |
3882 | case Intrinsic::loongarch_lsx_vmul_d: |
3883 | case Intrinsic::loongarch_lasx_xvmul_b: |
3884 | case Intrinsic::loongarch_lasx_xvmul_h: |
3885 | case Intrinsic::loongarch_lasx_xvmul_w: |
3886 | case Intrinsic::loongarch_lasx_xvmul_d: |
3887 | return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3888 | N2: N->getOperand(Num: 2)); |
3889 | case Intrinsic::loongarch_lsx_vmadd_b: |
3890 | case Intrinsic::loongarch_lsx_vmadd_h: |
3891 | case Intrinsic::loongarch_lsx_vmadd_w: |
3892 | case Intrinsic::loongarch_lsx_vmadd_d: |
3893 | case Intrinsic::loongarch_lasx_xvmadd_b: |
3894 | case Intrinsic::loongarch_lasx_xvmadd_h: |
3895 | case Intrinsic::loongarch_lasx_xvmadd_w: |
3896 | case Intrinsic::loongarch_lasx_xvmadd_d: { |
3897 | EVT ResTy = N->getValueType(ResNo: 0); |
3898 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
3899 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
3900 | N2: N->getOperand(Num: 3))); |
3901 | } |
3902 | case Intrinsic::loongarch_lsx_vmsub_b: |
3903 | case Intrinsic::loongarch_lsx_vmsub_h: |
3904 | case Intrinsic::loongarch_lsx_vmsub_w: |
3905 | case Intrinsic::loongarch_lsx_vmsub_d: |
3906 | case Intrinsic::loongarch_lasx_xvmsub_b: |
3907 | case Intrinsic::loongarch_lasx_xvmsub_h: |
3908 | case Intrinsic::loongarch_lasx_xvmsub_w: |
3909 | case Intrinsic::loongarch_lasx_xvmsub_d: { |
3910 | EVT ResTy = N->getValueType(ResNo: 0); |
3911 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
3912 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
3913 | N2: N->getOperand(Num: 3))); |
3914 | } |
3915 | case Intrinsic::loongarch_lsx_vdiv_b: |
3916 | case Intrinsic::loongarch_lsx_vdiv_h: |
3917 | case Intrinsic::loongarch_lsx_vdiv_w: |
3918 | case Intrinsic::loongarch_lsx_vdiv_d: |
3919 | case Intrinsic::loongarch_lasx_xvdiv_b: |
3920 | case Intrinsic::loongarch_lasx_xvdiv_h: |
3921 | case Intrinsic::loongarch_lasx_xvdiv_w: |
3922 | case Intrinsic::loongarch_lasx_xvdiv_d: |
3923 | return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3924 | N2: N->getOperand(Num: 2)); |
3925 | case Intrinsic::loongarch_lsx_vdiv_bu: |
3926 | case Intrinsic::loongarch_lsx_vdiv_hu: |
3927 | case Intrinsic::loongarch_lsx_vdiv_wu: |
3928 | case Intrinsic::loongarch_lsx_vdiv_du: |
3929 | case Intrinsic::loongarch_lasx_xvdiv_bu: |
3930 | case Intrinsic::loongarch_lasx_xvdiv_hu: |
3931 | case Intrinsic::loongarch_lasx_xvdiv_wu: |
3932 | case Intrinsic::loongarch_lasx_xvdiv_du: |
3933 | return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3934 | N2: N->getOperand(Num: 2)); |
3935 | case Intrinsic::loongarch_lsx_vmod_b: |
3936 | case Intrinsic::loongarch_lsx_vmod_h: |
3937 | case Intrinsic::loongarch_lsx_vmod_w: |
3938 | case Intrinsic::loongarch_lsx_vmod_d: |
3939 | case Intrinsic::loongarch_lasx_xvmod_b: |
3940 | case Intrinsic::loongarch_lasx_xvmod_h: |
3941 | case Intrinsic::loongarch_lasx_xvmod_w: |
3942 | case Intrinsic::loongarch_lasx_xvmod_d: |
3943 | return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3944 | N2: N->getOperand(Num: 2)); |
3945 | case Intrinsic::loongarch_lsx_vmod_bu: |
3946 | case Intrinsic::loongarch_lsx_vmod_hu: |
3947 | case Intrinsic::loongarch_lsx_vmod_wu: |
3948 | case Intrinsic::loongarch_lsx_vmod_du: |
3949 | case Intrinsic::loongarch_lasx_xvmod_bu: |
3950 | case Intrinsic::loongarch_lasx_xvmod_hu: |
3951 | case Intrinsic::loongarch_lasx_xvmod_wu: |
3952 | case Intrinsic::loongarch_lasx_xvmod_du: |
3953 | return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3954 | N2: N->getOperand(Num: 2)); |
3955 | case Intrinsic::loongarch_lsx_vand_v: |
3956 | case Intrinsic::loongarch_lasx_xvand_v: |
3957 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3958 | N2: N->getOperand(Num: 2)); |
3959 | case Intrinsic::loongarch_lsx_vor_v: |
3960 | case Intrinsic::loongarch_lasx_xvor_v: |
3961 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3962 | N2: N->getOperand(Num: 2)); |
3963 | case Intrinsic::loongarch_lsx_vxor_v: |
3964 | case Intrinsic::loongarch_lasx_xvxor_v: |
3965 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3966 | N2: N->getOperand(Num: 2)); |
3967 | case Intrinsic::loongarch_lsx_vnor_v: |
3968 | case Intrinsic::loongarch_lasx_xvnor_v: { |
3969 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3970 | N2: N->getOperand(Num: 2)); |
3971 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
3972 | } |
3973 | case Intrinsic::loongarch_lsx_vandi_b: |
3974 | case Intrinsic::loongarch_lasx_xvandi_b: |
3975 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3976 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
3977 | case Intrinsic::loongarch_lsx_vori_b: |
3978 | case Intrinsic::loongarch_lasx_xvori_b: |
3979 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3980 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
3981 | case Intrinsic::loongarch_lsx_vxori_b: |
3982 | case Intrinsic::loongarch_lasx_xvxori_b: |
3983 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3984 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
3985 | case Intrinsic::loongarch_lsx_vsll_b: |
3986 | case Intrinsic::loongarch_lsx_vsll_h: |
3987 | case Intrinsic::loongarch_lsx_vsll_w: |
3988 | case Intrinsic::loongarch_lsx_vsll_d: |
3989 | case Intrinsic::loongarch_lasx_xvsll_b: |
3990 | case Intrinsic::loongarch_lasx_xvsll_h: |
3991 | case Intrinsic::loongarch_lasx_xvsll_w: |
3992 | case Intrinsic::loongarch_lasx_xvsll_d: |
3993 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3994 | N2: truncateVecElts(Node: N, DAG)); |
3995 | case Intrinsic::loongarch_lsx_vslli_b: |
3996 | case Intrinsic::loongarch_lasx_xvslli_b: |
3997 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3998 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
3999 | case Intrinsic::loongarch_lsx_vslli_h: |
4000 | case Intrinsic::loongarch_lasx_xvslli_h: |
4001 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4002 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
4003 | case Intrinsic::loongarch_lsx_vslli_w: |
4004 | case Intrinsic::loongarch_lasx_xvslli_w: |
4005 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4006 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
4007 | case Intrinsic::loongarch_lsx_vslli_d: |
4008 | case Intrinsic::loongarch_lasx_xvslli_d: |
4009 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4010 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
4011 | case Intrinsic::loongarch_lsx_vsrl_b: |
4012 | case Intrinsic::loongarch_lsx_vsrl_h: |
4013 | case Intrinsic::loongarch_lsx_vsrl_w: |
4014 | case Intrinsic::loongarch_lsx_vsrl_d: |
4015 | case Intrinsic::loongarch_lasx_xvsrl_b: |
4016 | case Intrinsic::loongarch_lasx_xvsrl_h: |
4017 | case Intrinsic::loongarch_lasx_xvsrl_w: |
4018 | case Intrinsic::loongarch_lasx_xvsrl_d: |
4019 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4020 | N2: truncateVecElts(Node: N, DAG)); |
4021 | case Intrinsic::loongarch_lsx_vsrli_b: |
4022 | case Intrinsic::loongarch_lasx_xvsrli_b: |
4023 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4024 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
4025 | case Intrinsic::loongarch_lsx_vsrli_h: |
4026 | case Intrinsic::loongarch_lasx_xvsrli_h: |
4027 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4028 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
4029 | case Intrinsic::loongarch_lsx_vsrli_w: |
4030 | case Intrinsic::loongarch_lasx_xvsrli_w: |
4031 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4032 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
4033 | case Intrinsic::loongarch_lsx_vsrli_d: |
4034 | case Intrinsic::loongarch_lasx_xvsrli_d: |
4035 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4036 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
4037 | case Intrinsic::loongarch_lsx_vsra_b: |
4038 | case Intrinsic::loongarch_lsx_vsra_h: |
4039 | case Intrinsic::loongarch_lsx_vsra_w: |
4040 | case Intrinsic::loongarch_lsx_vsra_d: |
4041 | case Intrinsic::loongarch_lasx_xvsra_b: |
4042 | case Intrinsic::loongarch_lasx_xvsra_h: |
4043 | case Intrinsic::loongarch_lasx_xvsra_w: |
4044 | case Intrinsic::loongarch_lasx_xvsra_d: |
4045 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4046 | N2: truncateVecElts(Node: N, DAG)); |
4047 | case Intrinsic::loongarch_lsx_vsrai_b: |
4048 | case Intrinsic::loongarch_lasx_xvsrai_b: |
4049 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4050 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
4051 | case Intrinsic::loongarch_lsx_vsrai_h: |
4052 | case Intrinsic::loongarch_lasx_xvsrai_h: |
4053 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4054 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
4055 | case Intrinsic::loongarch_lsx_vsrai_w: |
4056 | case Intrinsic::loongarch_lasx_xvsrai_w: |
4057 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4058 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
4059 | case Intrinsic::loongarch_lsx_vsrai_d: |
4060 | case Intrinsic::loongarch_lasx_xvsrai_d: |
4061 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4062 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
4063 | case Intrinsic::loongarch_lsx_vclz_b: |
4064 | case Intrinsic::loongarch_lsx_vclz_h: |
4065 | case Intrinsic::loongarch_lsx_vclz_w: |
4066 | case Intrinsic::loongarch_lsx_vclz_d: |
4067 | case Intrinsic::loongarch_lasx_xvclz_b: |
4068 | case Intrinsic::loongarch_lasx_xvclz_h: |
4069 | case Intrinsic::loongarch_lasx_xvclz_w: |
4070 | case Intrinsic::loongarch_lasx_xvclz_d: |
4071 | return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
4072 | case Intrinsic::loongarch_lsx_vpcnt_b: |
4073 | case Intrinsic::loongarch_lsx_vpcnt_h: |
4074 | case Intrinsic::loongarch_lsx_vpcnt_w: |
4075 | case Intrinsic::loongarch_lsx_vpcnt_d: |
4076 | case Intrinsic::loongarch_lasx_xvpcnt_b: |
4077 | case Intrinsic::loongarch_lasx_xvpcnt_h: |
4078 | case Intrinsic::loongarch_lasx_xvpcnt_w: |
4079 | case Intrinsic::loongarch_lasx_xvpcnt_d: |
4080 | return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
4081 | case Intrinsic::loongarch_lsx_vbitclr_b: |
4082 | case Intrinsic::loongarch_lsx_vbitclr_h: |
4083 | case Intrinsic::loongarch_lsx_vbitclr_w: |
4084 | case Intrinsic::loongarch_lsx_vbitclr_d: |
4085 | case Intrinsic::loongarch_lasx_xvbitclr_b: |
4086 | case Intrinsic::loongarch_lasx_xvbitclr_h: |
4087 | case Intrinsic::loongarch_lasx_xvbitclr_w: |
4088 | case Intrinsic::loongarch_lasx_xvbitclr_d: |
4089 | return lowerVectorBitClear(Node: N, DAG); |
4090 | case Intrinsic::loongarch_lsx_vbitclri_b: |
4091 | case Intrinsic::loongarch_lasx_xvbitclri_b: |
4092 | return lowerVectorBitClearImm<3>(Node: N, DAG); |
4093 | case Intrinsic::loongarch_lsx_vbitclri_h: |
4094 | case Intrinsic::loongarch_lasx_xvbitclri_h: |
4095 | return lowerVectorBitClearImm<4>(Node: N, DAG); |
4096 | case Intrinsic::loongarch_lsx_vbitclri_w: |
4097 | case Intrinsic::loongarch_lasx_xvbitclri_w: |
4098 | return lowerVectorBitClearImm<5>(Node: N, DAG); |
4099 | case Intrinsic::loongarch_lsx_vbitclri_d: |
4100 | case Intrinsic::loongarch_lasx_xvbitclri_d: |
4101 | return lowerVectorBitClearImm<6>(Node: N, DAG); |
4102 | case Intrinsic::loongarch_lsx_vbitset_b: |
4103 | case Intrinsic::loongarch_lsx_vbitset_h: |
4104 | case Intrinsic::loongarch_lsx_vbitset_w: |
4105 | case Intrinsic::loongarch_lsx_vbitset_d: |
4106 | case Intrinsic::loongarch_lasx_xvbitset_b: |
4107 | case Intrinsic::loongarch_lasx_xvbitset_h: |
4108 | case Intrinsic::loongarch_lasx_xvbitset_w: |
4109 | case Intrinsic::loongarch_lasx_xvbitset_d: { |
4110 | EVT VecTy = N->getValueType(ResNo: 0); |
4111 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
4112 | return DAG.getNode( |
4113 | Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
4114 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
4115 | } |
4116 | case Intrinsic::loongarch_lsx_vbitseti_b: |
4117 | case Intrinsic::loongarch_lasx_xvbitseti_b: |
4118 | return lowerVectorBitSetImm<3>(Node: N, DAG); |
4119 | case Intrinsic::loongarch_lsx_vbitseti_h: |
4120 | case Intrinsic::loongarch_lasx_xvbitseti_h: |
4121 | return lowerVectorBitSetImm<4>(Node: N, DAG); |
4122 | case Intrinsic::loongarch_lsx_vbitseti_w: |
4123 | case Intrinsic::loongarch_lasx_xvbitseti_w: |
4124 | return lowerVectorBitSetImm<5>(Node: N, DAG); |
4125 | case Intrinsic::loongarch_lsx_vbitseti_d: |
4126 | case Intrinsic::loongarch_lasx_xvbitseti_d: |
4127 | return lowerVectorBitSetImm<6>(Node: N, DAG); |
4128 | case Intrinsic::loongarch_lsx_vbitrev_b: |
4129 | case Intrinsic::loongarch_lsx_vbitrev_h: |
4130 | case Intrinsic::loongarch_lsx_vbitrev_w: |
4131 | case Intrinsic::loongarch_lsx_vbitrev_d: |
4132 | case Intrinsic::loongarch_lasx_xvbitrev_b: |
4133 | case Intrinsic::loongarch_lasx_xvbitrev_h: |
4134 | case Intrinsic::loongarch_lasx_xvbitrev_w: |
4135 | case Intrinsic::loongarch_lasx_xvbitrev_d: { |
4136 | EVT VecTy = N->getValueType(ResNo: 0); |
4137 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
4138 | return DAG.getNode( |
4139 | Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
4140 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
4141 | } |
4142 | case Intrinsic::loongarch_lsx_vbitrevi_b: |
4143 | case Intrinsic::loongarch_lasx_xvbitrevi_b: |
4144 | return lowerVectorBitRevImm<3>(Node: N, DAG); |
4145 | case Intrinsic::loongarch_lsx_vbitrevi_h: |
4146 | case Intrinsic::loongarch_lasx_xvbitrevi_h: |
4147 | return lowerVectorBitRevImm<4>(Node: N, DAG); |
4148 | case Intrinsic::loongarch_lsx_vbitrevi_w: |
4149 | case Intrinsic::loongarch_lasx_xvbitrevi_w: |
4150 | return lowerVectorBitRevImm<5>(Node: N, DAG); |
4151 | case Intrinsic::loongarch_lsx_vbitrevi_d: |
4152 | case Intrinsic::loongarch_lasx_xvbitrevi_d: |
4153 | return lowerVectorBitRevImm<6>(Node: N, DAG); |
4154 | case Intrinsic::loongarch_lsx_vfadd_s: |
4155 | case Intrinsic::loongarch_lsx_vfadd_d: |
4156 | case Intrinsic::loongarch_lasx_xvfadd_s: |
4157 | case Intrinsic::loongarch_lasx_xvfadd_d: |
4158 | return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4159 | N2: N->getOperand(Num: 2)); |
4160 | case Intrinsic::loongarch_lsx_vfsub_s: |
4161 | case Intrinsic::loongarch_lsx_vfsub_d: |
4162 | case Intrinsic::loongarch_lasx_xvfsub_s: |
4163 | case Intrinsic::loongarch_lasx_xvfsub_d: |
4164 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4165 | N2: N->getOperand(Num: 2)); |
4166 | case Intrinsic::loongarch_lsx_vfmul_s: |
4167 | case Intrinsic::loongarch_lsx_vfmul_d: |
4168 | case Intrinsic::loongarch_lasx_xvfmul_s: |
4169 | case Intrinsic::loongarch_lasx_xvfmul_d: |
4170 | return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4171 | N2: N->getOperand(Num: 2)); |
4172 | case Intrinsic::loongarch_lsx_vfdiv_s: |
4173 | case Intrinsic::loongarch_lsx_vfdiv_d: |
4174 | case Intrinsic::loongarch_lasx_xvfdiv_s: |
4175 | case Intrinsic::loongarch_lasx_xvfdiv_d: |
4176 | return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4177 | N2: N->getOperand(Num: 2)); |
4178 | case Intrinsic::loongarch_lsx_vfmadd_s: |
4179 | case Intrinsic::loongarch_lsx_vfmadd_d: |
4180 | case Intrinsic::loongarch_lasx_xvfmadd_s: |
4181 | case Intrinsic::loongarch_lasx_xvfmadd_d: |
4182 | return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4183 | N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3)); |
4184 | case Intrinsic::loongarch_lsx_vinsgr2vr_b: |
4185 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4186 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4187 | N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4188 | case Intrinsic::loongarch_lsx_vinsgr2vr_h: |
4189 | case Intrinsic::loongarch_lasx_xvinsgr2vr_w: |
4190 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4191 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4192 | N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4193 | case Intrinsic::loongarch_lsx_vinsgr2vr_w: |
4194 | case Intrinsic::loongarch_lasx_xvinsgr2vr_d: |
4195 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4196 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4197 | N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4198 | case Intrinsic::loongarch_lsx_vinsgr2vr_d: |
4199 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4200 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4201 | N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4202 | case Intrinsic::loongarch_lsx_vreplgr2vr_b: |
4203 | case Intrinsic::loongarch_lsx_vreplgr2vr_h: |
4204 | case Intrinsic::loongarch_lsx_vreplgr2vr_w: |
4205 | case Intrinsic::loongarch_lsx_vreplgr2vr_d: |
4206 | case Intrinsic::loongarch_lasx_xvreplgr2vr_b: |
4207 | case Intrinsic::loongarch_lasx_xvreplgr2vr_h: |
4208 | case Intrinsic::loongarch_lasx_xvreplgr2vr_w: |
4209 | case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { |
4210 | EVT ResTy = N->getValueType(ResNo: 0); |
4211 | SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(Num: 1)); |
4212 | return DAG.getBuildVector(VT: ResTy, DL, Ops); |
4213 | } |
4214 | case Intrinsic::loongarch_lsx_vreplve_b: |
4215 | case Intrinsic::loongarch_lsx_vreplve_h: |
4216 | case Intrinsic::loongarch_lsx_vreplve_w: |
4217 | case Intrinsic::loongarch_lsx_vreplve_d: |
4218 | case Intrinsic::loongarch_lasx_xvreplve_b: |
4219 | case Intrinsic::loongarch_lasx_xvreplve_h: |
4220 | case Intrinsic::loongarch_lasx_xvreplve_w: |
4221 | case Intrinsic::loongarch_lasx_xvreplve_d: |
4222 | return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0), |
4223 | N1: N->getOperand(Num: 1), |
4224 | N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(), |
4225 | Operand: N->getOperand(Num: 2))); |
4226 | } |
4227 | return SDValue(); |
4228 | } |
4229 | |
4230 | SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, |
4231 | DAGCombinerInfo &DCI) const { |
4232 | SelectionDAG &DAG = DCI.DAG; |
4233 | switch (N->getOpcode()) { |
4234 | default: |
4235 | break; |
4236 | case ISD::AND: |
4237 | return performANDCombine(N, DAG, DCI, Subtarget); |
4238 | case ISD::OR: |
4239 | return performORCombine(N, DAG, DCI, Subtarget); |
4240 | case ISD::SETCC: |
4241 | return performSETCCCombine(N, DAG, DCI, Subtarget); |
4242 | case ISD::SRL: |
4243 | return performSRLCombine(N, DAG, DCI, Subtarget); |
4244 | case LoongArchISD::BITREV_W: |
4245 | return performBITREV_WCombine(N, DAG, DCI, Subtarget); |
4246 | case ISD::INTRINSIC_WO_CHAIN: |
4247 | return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); |
4248 | } |
4249 | return SDValue(); |
4250 | } |
4251 | |
4252 | static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, |
4253 | MachineBasicBlock *MBB) { |
4254 | if (!ZeroDivCheck) |
4255 | return MBB; |
4256 | |
4257 | // Build instructions: |
4258 | // MBB: |
4259 | // div(or mod) $dst, $dividend, $divisor |
4260 | // bnez $divisor, SinkMBB |
4261 | // BreakMBB: |
4262 | // break 7 // BRK_DIVZERO |
4263 | // SinkMBB: |
4264 | // fallthrough |
4265 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
4266 | MachineFunction::iterator It = ++MBB->getIterator(); |
4267 | MachineFunction *MF = MBB->getParent(); |
4268 | auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
4269 | auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
4270 | MF->insert(MBBI: It, MBB: BreakMBB); |
4271 | MF->insert(MBBI: It, MBB: SinkMBB); |
4272 | |
4273 | // Transfer the remainder of MBB and its successor edges to SinkMBB. |
4274 | SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end()); |
4275 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
4276 | |
4277 | const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); |
4278 | DebugLoc DL = MI.getDebugLoc(); |
4279 | MachineOperand &Divisor = MI.getOperand(i: 2); |
4280 | Register DivisorReg = Divisor.getReg(); |
4281 | |
4282 | // MBB: |
4283 | BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNEZ)) |
4284 | .addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill())) |
4285 | .addMBB(MBB: SinkMBB); |
4286 | MBB->addSuccessor(Succ: BreakMBB); |
4287 | MBB->addSuccessor(Succ: SinkMBB); |
4288 | |
4289 | // BreakMBB: |
4290 | // See linux header file arch/loongarch/include/uapi/asm/break.h for the |
4291 | // definition of BRK_DIVZERO. |
4292 | BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/); |
4293 | BreakMBB->addSuccessor(Succ: SinkMBB); |
4294 | |
4295 | // Clear Divisor's kill flag. |
4296 | Divisor.setIsKill(false); |
4297 | |
4298 | return SinkMBB; |
4299 | } |
4300 | |
4301 | static MachineBasicBlock * |
4302 | emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, |
4303 | const LoongArchSubtarget &Subtarget) { |
4304 | unsigned CondOpc; |
4305 | switch (MI.getOpcode()) { |
4306 | default: |
4307 | llvm_unreachable("Unexpected opcode" ); |
4308 | case LoongArch::PseudoVBZ: |
4309 | CondOpc = LoongArch::VSETEQZ_V; |
4310 | break; |
4311 | case LoongArch::PseudoVBZ_B: |
4312 | CondOpc = LoongArch::VSETANYEQZ_B; |
4313 | break; |
4314 | case LoongArch::PseudoVBZ_H: |
4315 | CondOpc = LoongArch::VSETANYEQZ_H; |
4316 | break; |
4317 | case LoongArch::PseudoVBZ_W: |
4318 | CondOpc = LoongArch::VSETANYEQZ_W; |
4319 | break; |
4320 | case LoongArch::PseudoVBZ_D: |
4321 | CondOpc = LoongArch::VSETANYEQZ_D; |
4322 | break; |
4323 | case LoongArch::PseudoVBNZ: |
4324 | CondOpc = LoongArch::VSETNEZ_V; |
4325 | break; |
4326 | case LoongArch::PseudoVBNZ_B: |
4327 | CondOpc = LoongArch::VSETALLNEZ_B; |
4328 | break; |
4329 | case LoongArch::PseudoVBNZ_H: |
4330 | CondOpc = LoongArch::VSETALLNEZ_H; |
4331 | break; |
4332 | case LoongArch::PseudoVBNZ_W: |
4333 | CondOpc = LoongArch::VSETALLNEZ_W; |
4334 | break; |
4335 | case LoongArch::PseudoVBNZ_D: |
4336 | CondOpc = LoongArch::VSETALLNEZ_D; |
4337 | break; |
4338 | case LoongArch::PseudoXVBZ: |
4339 | CondOpc = LoongArch::XVSETEQZ_V; |
4340 | break; |
4341 | case LoongArch::PseudoXVBZ_B: |
4342 | CondOpc = LoongArch::XVSETANYEQZ_B; |
4343 | break; |
4344 | case LoongArch::PseudoXVBZ_H: |
4345 | CondOpc = LoongArch::XVSETANYEQZ_H; |
4346 | break; |
4347 | case LoongArch::PseudoXVBZ_W: |
4348 | CondOpc = LoongArch::XVSETANYEQZ_W; |
4349 | break; |
4350 | case LoongArch::PseudoXVBZ_D: |
4351 | CondOpc = LoongArch::XVSETANYEQZ_D; |
4352 | break; |
4353 | case LoongArch::PseudoXVBNZ: |
4354 | CondOpc = LoongArch::XVSETNEZ_V; |
4355 | break; |
4356 | case LoongArch::PseudoXVBNZ_B: |
4357 | CondOpc = LoongArch::XVSETALLNEZ_B; |
4358 | break; |
4359 | case LoongArch::PseudoXVBNZ_H: |
4360 | CondOpc = LoongArch::XVSETALLNEZ_H; |
4361 | break; |
4362 | case LoongArch::PseudoXVBNZ_W: |
4363 | CondOpc = LoongArch::XVSETALLNEZ_W; |
4364 | break; |
4365 | case LoongArch::PseudoXVBNZ_D: |
4366 | CondOpc = LoongArch::XVSETALLNEZ_D; |
4367 | break; |
4368 | } |
4369 | |
4370 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
4371 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
4372 | DebugLoc DL = MI.getDebugLoc(); |
4373 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
4374 | MachineFunction::iterator It = ++BB->getIterator(); |
4375 | |
4376 | MachineFunction *F = BB->getParent(); |
4377 | MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
4378 | MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
4379 | MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
4380 | |
4381 | F->insert(MBBI: It, MBB: FalseBB); |
4382 | F->insert(MBBI: It, MBB: TrueBB); |
4383 | F->insert(MBBI: It, MBB: SinkBB); |
4384 | |
4385 | // Transfer the remainder of MBB and its successor edges to Sink. |
4386 | SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end()); |
4387 | SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
4388 | |
4389 | // Insert the real instruction to BB. |
4390 | Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass); |
4391 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg()); |
4392 | |
4393 | // Insert branch. |
4394 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB); |
4395 | BB->addSuccessor(Succ: FalseBB); |
4396 | BB->addSuccessor(Succ: TrueBB); |
4397 | |
4398 | // FalseBB. |
4399 | Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
4400 | BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1) |
4401 | .addReg(RegNo: LoongArch::R0) |
4402 | .addImm(Val: 0); |
4403 | BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB); |
4404 | FalseBB->addSuccessor(Succ: SinkBB); |
4405 | |
4406 | // TrueBB. |
4407 | Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
4408 | BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2) |
4409 | .addReg(RegNo: LoongArch::R0) |
4410 | .addImm(Val: 1); |
4411 | TrueBB->addSuccessor(Succ: SinkBB); |
4412 | |
4413 | // SinkBB: merge the results. |
4414 | BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI), |
4415 | DestReg: MI.getOperand(i: 0).getReg()) |
4416 | .addReg(RegNo: RD1) |
4417 | .addMBB(MBB: FalseBB) |
4418 | .addReg(RegNo: RD2) |
4419 | .addMBB(MBB: TrueBB); |
4420 | |
4421 | // The pseudo instruction is gone now. |
4422 | MI.eraseFromParent(); |
4423 | return SinkBB; |
4424 | } |
4425 | |
4426 | static MachineBasicBlock * |
4427 | emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, |
4428 | const LoongArchSubtarget &Subtarget) { |
4429 | unsigned InsOp; |
4430 | unsigned HalfSize; |
4431 | switch (MI.getOpcode()) { |
4432 | default: |
4433 | llvm_unreachable("Unexpected opcode" ); |
4434 | case LoongArch::PseudoXVINSGR2VR_B: |
4435 | HalfSize = 16; |
4436 | InsOp = LoongArch::VINSGR2VR_B; |
4437 | break; |
4438 | case LoongArch::PseudoXVINSGR2VR_H: |
4439 | HalfSize = 8; |
4440 | InsOp = LoongArch::VINSGR2VR_H; |
4441 | break; |
4442 | } |
4443 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
4444 | const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; |
4445 | const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; |
4446 | DebugLoc DL = MI.getDebugLoc(); |
4447 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
4448 | // XDst = vector_insert XSrc, Elt, Idx |
4449 | Register XDst = MI.getOperand(i: 0).getReg(); |
4450 | Register XSrc = MI.getOperand(i: 1).getReg(); |
4451 | Register Elt = MI.getOperand(i: 2).getReg(); |
4452 | unsigned Idx = MI.getOperand(i: 3).getImm(); |
4453 | |
4454 | Register ScratchReg1 = XSrc; |
4455 | if (Idx >= HalfSize) { |
4456 | ScratchReg1 = MRI.createVirtualRegister(RegClass: RC); |
4457 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1) |
4458 | .addReg(RegNo: XSrc) |
4459 | .addReg(RegNo: XSrc) |
4460 | .addImm(Val: 1); |
4461 | } |
4462 | |
4463 | Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC); |
4464 | Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC); |
4465 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1) |
4466 | .addReg(RegNo: ScratchReg1, flags: 0, SubReg: LoongArch::sub_128); |
4467 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2) |
4468 | .addReg(RegNo: ScratchSubReg1) |
4469 | .addReg(RegNo: Elt) |
4470 | .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx); |
4471 | |
4472 | Register ScratchReg2 = XDst; |
4473 | if (Idx >= HalfSize) |
4474 | ScratchReg2 = MRI.createVirtualRegister(RegClass: RC); |
4475 | |
4476 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2) |
4477 | .addImm(Val: 0) |
4478 | .addReg(RegNo: ScratchSubReg2) |
4479 | .addImm(Val: LoongArch::sub_128); |
4480 | |
4481 | if (Idx >= HalfSize) |
4482 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst) |
4483 | .addReg(RegNo: XSrc) |
4484 | .addReg(RegNo: ScratchReg2) |
4485 | .addImm(Val: 2); |
4486 | |
4487 | MI.eraseFromParent(); |
4488 | return BB; |
4489 | } |
4490 | |
4491 | MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( |
4492 | MachineInstr &MI, MachineBasicBlock *BB) const { |
4493 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
4494 | DebugLoc DL = MI.getDebugLoc(); |
4495 | |
4496 | switch (MI.getOpcode()) { |
4497 | default: |
4498 | llvm_unreachable("Unexpected instr type to insert" ); |
4499 | case LoongArch::DIV_W: |
4500 | case LoongArch::DIV_WU: |
4501 | case LoongArch::MOD_W: |
4502 | case LoongArch::MOD_WU: |
4503 | case LoongArch::DIV_D: |
4504 | case LoongArch::DIV_DU: |
4505 | case LoongArch::MOD_D: |
4506 | case LoongArch::MOD_DU: |
4507 | return insertDivByZeroTrap(MI, MBB: BB); |
4508 | break; |
4509 | case LoongArch::WRFCSR: { |
4510 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR), |
4511 | DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm()) |
4512 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
4513 | MI.eraseFromParent(); |
4514 | return BB; |
4515 | } |
4516 | case LoongArch::RDFCSR: { |
4517 | MachineInstr *ReadFCSR = |
4518 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR), |
4519 | DestReg: MI.getOperand(i: 0).getReg()) |
4520 | .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm()); |
4521 | ReadFCSR->getOperand(i: 1).setIsUndef(); |
4522 | MI.eraseFromParent(); |
4523 | return BB; |
4524 | } |
4525 | case LoongArch::PseudoVBZ: |
4526 | case LoongArch::PseudoVBZ_B: |
4527 | case LoongArch::PseudoVBZ_H: |
4528 | case LoongArch::PseudoVBZ_W: |
4529 | case LoongArch::PseudoVBZ_D: |
4530 | case LoongArch::PseudoVBNZ: |
4531 | case LoongArch::PseudoVBNZ_B: |
4532 | case LoongArch::PseudoVBNZ_H: |
4533 | case LoongArch::PseudoVBNZ_W: |
4534 | case LoongArch::PseudoVBNZ_D: |
4535 | case LoongArch::PseudoXVBZ: |
4536 | case LoongArch::PseudoXVBZ_B: |
4537 | case LoongArch::PseudoXVBZ_H: |
4538 | case LoongArch::PseudoXVBZ_W: |
4539 | case LoongArch::PseudoXVBZ_D: |
4540 | case LoongArch::PseudoXVBNZ: |
4541 | case LoongArch::PseudoXVBNZ_B: |
4542 | case LoongArch::PseudoXVBNZ_H: |
4543 | case LoongArch::PseudoXVBNZ_W: |
4544 | case LoongArch::PseudoXVBNZ_D: |
4545 | return emitVecCondBranchPseudo(MI, BB, Subtarget); |
4546 | case LoongArch::PseudoXVINSGR2VR_B: |
4547 | case LoongArch::PseudoXVINSGR2VR_H: |
4548 | return emitPseudoXVINSGR2VR(MI, BB, Subtarget); |
4549 | } |
4550 | } |
4551 | |
4552 | bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( |
4553 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
4554 | unsigned *Fast) const { |
4555 | if (!Subtarget.hasUAL()) |
4556 | return false; |
4557 | |
4558 | // TODO: set reasonable speed number. |
4559 | if (Fast) |
4560 | *Fast = 1; |
4561 | return true; |
4562 | } |
4563 | |
4564 | const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { |
4565 | switch ((LoongArchISD::NodeType)Opcode) { |
4566 | case LoongArchISD::FIRST_NUMBER: |
4567 | break; |
4568 | |
4569 | #define NODE_NAME_CASE(node) \ |
4570 | case LoongArchISD::node: \ |
4571 | return "LoongArchISD::" #node; |
4572 | |
4573 | // TODO: Add more target-dependent nodes later. |
4574 | NODE_NAME_CASE(CALL) |
4575 | NODE_NAME_CASE(CALL_MEDIUM) |
4576 | NODE_NAME_CASE(CALL_LARGE) |
4577 | NODE_NAME_CASE(RET) |
4578 | NODE_NAME_CASE(TAIL) |
4579 | NODE_NAME_CASE(TAIL_MEDIUM) |
4580 | NODE_NAME_CASE(TAIL_LARGE) |
4581 | NODE_NAME_CASE(SLL_W) |
4582 | NODE_NAME_CASE(SRA_W) |
4583 | NODE_NAME_CASE(SRL_W) |
4584 | NODE_NAME_CASE(BSTRINS) |
4585 | NODE_NAME_CASE(BSTRPICK) |
4586 | NODE_NAME_CASE(MOVGR2FR_W_LA64) |
4587 | NODE_NAME_CASE(MOVFR2GR_S_LA64) |
4588 | NODE_NAME_CASE(FTINT) |
4589 | NODE_NAME_CASE(REVB_2H) |
4590 | NODE_NAME_CASE(REVB_2W) |
4591 | NODE_NAME_CASE(BITREV_4B) |
4592 | NODE_NAME_CASE(BITREV_W) |
4593 | NODE_NAME_CASE(ROTR_W) |
4594 | NODE_NAME_CASE(ROTL_W) |
4595 | NODE_NAME_CASE(DIV_WU) |
4596 | NODE_NAME_CASE(MOD_WU) |
4597 | NODE_NAME_CASE(CLZ_W) |
4598 | NODE_NAME_CASE(CTZ_W) |
4599 | NODE_NAME_CASE(DBAR) |
4600 | NODE_NAME_CASE(IBAR) |
4601 | NODE_NAME_CASE(BREAK) |
4602 | NODE_NAME_CASE(SYSCALL) |
4603 | NODE_NAME_CASE(CRC_W_B_W) |
4604 | NODE_NAME_CASE(CRC_W_H_W) |
4605 | NODE_NAME_CASE(CRC_W_W_W) |
4606 | NODE_NAME_CASE(CRC_W_D_W) |
4607 | NODE_NAME_CASE(CRCC_W_B_W) |
4608 | NODE_NAME_CASE(CRCC_W_H_W) |
4609 | NODE_NAME_CASE(CRCC_W_W_W) |
4610 | NODE_NAME_CASE(CRCC_W_D_W) |
4611 | NODE_NAME_CASE(CSRRD) |
4612 | NODE_NAME_CASE(CSRWR) |
4613 | NODE_NAME_CASE(CSRXCHG) |
4614 | NODE_NAME_CASE(IOCSRRD_B) |
4615 | NODE_NAME_CASE(IOCSRRD_H) |
4616 | NODE_NAME_CASE(IOCSRRD_W) |
4617 | NODE_NAME_CASE(IOCSRRD_D) |
4618 | NODE_NAME_CASE(IOCSRWR_B) |
4619 | NODE_NAME_CASE(IOCSRWR_H) |
4620 | NODE_NAME_CASE(IOCSRWR_W) |
4621 | NODE_NAME_CASE(IOCSRWR_D) |
4622 | NODE_NAME_CASE(CPUCFG) |
4623 | NODE_NAME_CASE(MOVGR2FCSR) |
4624 | NODE_NAME_CASE(MOVFCSR2GR) |
4625 | NODE_NAME_CASE(CACOP_D) |
4626 | NODE_NAME_CASE(CACOP_W) |
4627 | NODE_NAME_CASE(VSHUF) |
4628 | NODE_NAME_CASE(VPICKEV) |
4629 | NODE_NAME_CASE(VPICKOD) |
4630 | NODE_NAME_CASE(VPACKEV) |
4631 | NODE_NAME_CASE(VPACKOD) |
4632 | NODE_NAME_CASE(VILVL) |
4633 | NODE_NAME_CASE(VILVH) |
4634 | NODE_NAME_CASE(VSHUF4I) |
4635 | NODE_NAME_CASE(VREPLVEI) |
4636 | NODE_NAME_CASE(XVPERMI) |
4637 | NODE_NAME_CASE(VPICK_SEXT_ELT) |
4638 | NODE_NAME_CASE(VPICK_ZEXT_ELT) |
4639 | NODE_NAME_CASE(VREPLVE) |
4640 | NODE_NAME_CASE(VALL_ZERO) |
4641 | NODE_NAME_CASE(VANY_ZERO) |
4642 | NODE_NAME_CASE(VALL_NONZERO) |
4643 | NODE_NAME_CASE(VANY_NONZERO) |
4644 | } |
4645 | #undef NODE_NAME_CASE |
4646 | return nullptr; |
4647 | } |
4648 | |
4649 | //===----------------------------------------------------------------------===// |
4650 | // Calling Convention Implementation |
4651 | //===----------------------------------------------------------------------===// |
4652 | |
4653 | // Eight general-purpose registers a0-a7 used for passing integer arguments, |
4654 | // with a0-a1 reused to return values. Generally, the GPRs are used to pass |
4655 | // fixed-point arguments, and floating-point arguments when no FPR is available |
4656 | // or with soft float ABI. |
4657 | const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, |
4658 | LoongArch::R7, LoongArch::R8, LoongArch::R9, |
4659 | LoongArch::R10, LoongArch::R11}; |
4660 | // Eight floating-point registers fa0-fa7 used for passing floating-point |
4661 | // arguments, and fa0-fa1 are also used to return values. |
4662 | const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, |
4663 | LoongArch::F3, LoongArch::F4, LoongArch::F5, |
4664 | LoongArch::F6, LoongArch::F7}; |
4665 | // FPR32 and FPR64 alias each other. |
4666 | const MCPhysReg ArgFPR64s[] = { |
4667 | LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, |
4668 | LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; |
4669 | |
4670 | const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, |
4671 | LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, |
4672 | LoongArch::VR6, LoongArch::VR7}; |
4673 | |
4674 | const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, |
4675 | LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, |
4676 | LoongArch::XR6, LoongArch::XR7}; |
4677 | |
4678 | // Pass a 2*GRLen argument that has been split into two GRLen values through |
4679 | // registers or the stack as necessary. |
4680 | static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, |
4681 | CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, |
4682 | unsigned ValNo2, MVT ValVT2, MVT LocVT2, |
4683 | ISD::ArgFlagsTy ArgFlags2) { |
4684 | unsigned GRLenInBytes = GRLen / 8; |
4685 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
4686 | // At least one half can be passed via register. |
4687 | State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg, |
4688 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
4689 | } else { |
4690 | // Both halves must be passed on the stack, with proper alignment. |
4691 | Align StackAlign = |
4692 | std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign()); |
4693 | State.addLoc( |
4694 | V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), |
4695 | Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign), |
4696 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
4697 | State.addLoc(V: CCValAssign::getMem( |
4698 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
4699 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
4700 | return false; |
4701 | } |
4702 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
4703 | // The second half can also be passed via register. |
4704 | State.addLoc( |
4705 | V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full)); |
4706 | } else { |
4707 | // The second half is passed via the stack, without additional alignment. |
4708 | State.addLoc(V: CCValAssign::getMem( |
4709 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
4710 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
4711 | } |
4712 | return false; |
4713 | } |
4714 | |
4715 | // Implements the LoongArch calling convention. Returns true upon failure. |
4716 | static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, |
4717 | unsigned ValNo, MVT ValVT, |
4718 | CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, |
4719 | CCState &State, bool IsFixed, bool IsRet, |
4720 | Type *OrigTy) { |
4721 | unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); |
4722 | assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen" ); |
4723 | MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; |
4724 | MVT LocVT = ValVT; |
4725 | |
4726 | // Any return value split into more than two values can't be returned |
4727 | // directly. |
4728 | if (IsRet && ValNo > 1) |
4729 | return true; |
4730 | |
4731 | // If passing a variadic argument, or if no FPR is available. |
4732 | bool UseGPRForFloat = true; |
4733 | |
4734 | switch (ABI) { |
4735 | default: |
4736 | llvm_unreachable("Unexpected ABI" ); |
4737 | break; |
4738 | case LoongArchABI::ABI_ILP32F: |
4739 | case LoongArchABI::ABI_LP64F: |
4740 | case LoongArchABI::ABI_ILP32D: |
4741 | case LoongArchABI::ABI_LP64D: |
4742 | UseGPRForFloat = !IsFixed; |
4743 | break; |
4744 | case LoongArchABI::ABI_ILP32S: |
4745 | case LoongArchABI::ABI_LP64S: |
4746 | break; |
4747 | } |
4748 | |
4749 | // FPR32 and FPR64 alias each other. |
4750 | if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s)) |
4751 | UseGPRForFloat = true; |
4752 | |
4753 | if (UseGPRForFloat && ValVT == MVT::f32) { |
4754 | LocVT = GRLenVT; |
4755 | LocInfo = CCValAssign::BCvt; |
4756 | } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { |
4757 | LocVT = MVT::i64; |
4758 | LocInfo = CCValAssign::BCvt; |
4759 | } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { |
4760 | // TODO: Handle passing f64 on LA32 with D feature. |
4761 | report_fatal_error(reason: "Passing f64 with GPR on LA32 is undefined" ); |
4762 | } |
4763 | |
4764 | // If this is a variadic argument, the LoongArch calling convention requires |
4765 | // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 |
4766 | // byte alignment. An aligned register should be used regardless of whether |
4767 | // the original argument was split during legalisation or not. The argument |
4768 | // will not be passed by registers if the original type is larger than |
4769 | // 2*GRLen, so the register alignment rule does not apply. |
4770 | unsigned TwoGRLenInBytes = (2 * GRLen) / 8; |
4771 | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && |
4772 | DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) { |
4773 | unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs); |
4774 | // Skip 'odd' register if necessary. |
4775 | if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) |
4776 | State.AllocateReg(Regs: ArgGPRs); |
4777 | } |
4778 | |
4779 | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
4780 | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
4781 | State.getPendingArgFlags(); |
4782 | |
4783 | assert(PendingLocs.size() == PendingArgFlags.size() && |
4784 | "PendingLocs and PendingArgFlags out of sync" ); |
4785 | |
4786 | // Split arguments might be passed indirectly, so keep track of the pending |
4787 | // values. |
4788 | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
4789 | LocVT = GRLenVT; |
4790 | LocInfo = CCValAssign::Indirect; |
4791 | PendingLocs.push_back( |
4792 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
4793 | PendingArgFlags.push_back(Elt: ArgFlags); |
4794 | if (!ArgFlags.isSplitEnd()) { |
4795 | return false; |
4796 | } |
4797 | } |
4798 | |
4799 | // If the split argument only had two elements, it should be passed directly |
4800 | // in registers or on the stack. |
4801 | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
4802 | PendingLocs.size() <= 2) { |
4803 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()" ); |
4804 | // Apply the normal calling convention rules to the first half of the |
4805 | // split argument. |
4806 | CCValAssign VA = PendingLocs[0]; |
4807 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
4808 | PendingLocs.clear(); |
4809 | PendingArgFlags.clear(); |
4810 | return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, |
4811 | ArgFlags2: ArgFlags); |
4812 | } |
4813 | |
4814 | // Allocate to a register if possible, or else a stack slot. |
4815 | Register Reg; |
4816 | unsigned StoreSizeBytes = GRLen / 8; |
4817 | Align StackAlign = Align(GRLen / 8); |
4818 | |
4819 | if (ValVT == MVT::f32 && !UseGPRForFloat) |
4820 | Reg = State.AllocateReg(Regs: ArgFPR32s); |
4821 | else if (ValVT == MVT::f64 && !UseGPRForFloat) |
4822 | Reg = State.AllocateReg(Regs: ArgFPR64s); |
4823 | else if (ValVT.is128BitVector()) |
4824 | Reg = State.AllocateReg(Regs: ArgVRs); |
4825 | else if (ValVT.is256BitVector()) |
4826 | Reg = State.AllocateReg(Regs: ArgXRs); |
4827 | else |
4828 | Reg = State.AllocateReg(Regs: ArgGPRs); |
4829 | |
4830 | unsigned StackOffset = |
4831 | Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign); |
4832 | |
4833 | // If we reach this point and PendingLocs is non-empty, we must be at the |
4834 | // end of a split argument that must be passed indirectly. |
4835 | if (!PendingLocs.empty()) { |
4836 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()" ); |
4837 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()" ); |
4838 | for (auto &It : PendingLocs) { |
4839 | if (Reg) |
4840 | It.convertToReg(RegNo: Reg); |
4841 | else |
4842 | It.convertToMem(Offset: StackOffset); |
4843 | State.addLoc(V: It); |
4844 | } |
4845 | PendingLocs.clear(); |
4846 | PendingArgFlags.clear(); |
4847 | return false; |
4848 | } |
4849 | assert((!UseGPRForFloat || LocVT == GRLenVT) && |
4850 | "Expected an GRLenVT at this stage" ); |
4851 | |
4852 | if (Reg) { |
4853 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
4854 | return false; |
4855 | } |
4856 | |
4857 | // When a floating-point value is passed on the stack, no bit-cast is needed. |
4858 | if (ValVT.isFloatingPoint()) { |
4859 | LocVT = ValVT; |
4860 | LocInfo = CCValAssign::Full; |
4861 | } |
4862 | |
4863 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
4864 | return false; |
4865 | } |
4866 | |
4867 | void LoongArchTargetLowering::analyzeInputArgs( |
4868 | MachineFunction &MF, CCState &CCInfo, |
4869 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
4870 | LoongArchCCAssignFn Fn) const { |
4871 | FunctionType *FType = MF.getFunction().getFunctionType(); |
4872 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
4873 | MVT ArgVT = Ins[i].VT; |
4874 | Type *ArgTy = nullptr; |
4875 | if (IsRet) |
4876 | ArgTy = FType->getReturnType(); |
4877 | else if (Ins[i].isOrigArg()) |
4878 | ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex()); |
4879 | LoongArchABI::ABI ABI = |
4880 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
4881 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, |
4882 | CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { |
4883 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT |
4884 | << '\n'); |
4885 | llvm_unreachable("" ); |
4886 | } |
4887 | } |
4888 | } |
4889 | |
4890 | void LoongArchTargetLowering::analyzeOutputArgs( |
4891 | MachineFunction &MF, CCState &CCInfo, |
4892 | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
4893 | CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { |
4894 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
4895 | MVT ArgVT = Outs[i].VT; |
4896 | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
4897 | LoongArchABI::ABI ABI = |
4898 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
4899 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, |
4900 | CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { |
4901 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT |
4902 | << "\n" ); |
4903 | llvm_unreachable("" ); |
4904 | } |
4905 | } |
4906 | } |
4907 | |
4908 | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
4909 | // values. |
4910 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
4911 | const CCValAssign &VA, const SDLoc &DL) { |
4912 | switch (VA.getLocInfo()) { |
4913 | default: |
4914 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
4915 | case CCValAssign::Full: |
4916 | case CCValAssign::Indirect: |
4917 | break; |
4918 | case CCValAssign::BCvt: |
4919 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
4920 | Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val); |
4921 | else |
4922 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val); |
4923 | break; |
4924 | } |
4925 | return Val; |
4926 | } |
4927 | |
4928 | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
4929 | const CCValAssign &VA, const SDLoc &DL, |
4930 | const ISD::InputArg &In, |
4931 | const LoongArchTargetLowering &TLI) { |
4932 | MachineFunction &MF = DAG.getMachineFunction(); |
4933 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
4934 | EVT LocVT = VA.getLocVT(); |
4935 | SDValue Val; |
4936 | const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT()); |
4937 | Register VReg = RegInfo.createVirtualRegister(RegClass: RC); |
4938 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
4939 | Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
4940 | |
4941 | // If input is sign extended from 32 bits, note it for the OptW pass. |
4942 | if (In.isOrigArg()) { |
4943 | Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex()); |
4944 | if (OrigArg->getType()->isIntegerTy()) { |
4945 | unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); |
4946 | // An input zero extended from i31 can also be considered sign extended. |
4947 | if ((BitWidth <= 32 && In.Flags.isSExt()) || |
4948 | (BitWidth < 32 && In.Flags.isZExt())) { |
4949 | LoongArchMachineFunctionInfo *LAFI = |
4950 | MF.getInfo<LoongArchMachineFunctionInfo>(); |
4951 | LAFI->addSExt32Register(Reg: VReg); |
4952 | } |
4953 | } |
4954 | } |
4955 | |
4956 | return convertLocVTToValVT(DAG, Val, VA, DL); |
4957 | } |
4958 | |
4959 | // The caller is responsible for loading the full value if the argument is |
4960 | // passed with CCValAssign::Indirect. |
4961 | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
4962 | const CCValAssign &VA, const SDLoc &DL) { |
4963 | MachineFunction &MF = DAG.getMachineFunction(); |
4964 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4965 | EVT ValVT = VA.getValVT(); |
4966 | int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
4967 | /*IsImmutable=*/true); |
4968 | SDValue FIN = DAG.getFrameIndex( |
4969 | FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0))); |
4970 | |
4971 | ISD::LoadExtType ExtType; |
4972 | switch (VA.getLocInfo()) { |
4973 | default: |
4974 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
4975 | case CCValAssign::Full: |
4976 | case CCValAssign::Indirect: |
4977 | case CCValAssign::BCvt: |
4978 | ExtType = ISD::NON_EXTLOAD; |
4979 | break; |
4980 | } |
4981 | return DAG.getExtLoad( |
4982 | ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN, |
4983 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT); |
4984 | } |
4985 | |
4986 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
4987 | const CCValAssign &VA, const SDLoc &DL) { |
4988 | EVT LocVT = VA.getLocVT(); |
4989 | |
4990 | switch (VA.getLocInfo()) { |
4991 | default: |
4992 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
4993 | case CCValAssign::Full: |
4994 | break; |
4995 | case CCValAssign::BCvt: |
4996 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
4997 | Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val); |
4998 | else |
4999 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val); |
5000 | break; |
5001 | } |
5002 | return Val; |
5003 | } |
5004 | |
5005 | static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
5006 | CCValAssign::LocInfo LocInfo, |
5007 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
5008 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
5009 | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim |
5010 | // s0 s1 s2 s3 s4 s5 s6 s7 s8 |
5011 | static const MCPhysReg GPRList[] = { |
5012 | LoongArch::R23, LoongArch::R24, LoongArch::R25, |
5013 | LoongArch::R26, LoongArch::R27, LoongArch::R28, |
5014 | LoongArch::R29, LoongArch::R30, LoongArch::R31}; |
5015 | if (unsigned Reg = State.AllocateReg(Regs: GPRList)) { |
5016 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
5017 | return false; |
5018 | } |
5019 | } |
5020 | |
5021 | if (LocVT == MVT::f32) { |
5022 | // Pass in STG registers: F1, F2, F3, F4 |
5023 | // fs0,fs1,fs2,fs3 |
5024 | static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, |
5025 | LoongArch::F26, LoongArch::F27}; |
5026 | if (unsigned Reg = State.AllocateReg(Regs: FPR32List)) { |
5027 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
5028 | return false; |
5029 | } |
5030 | } |
5031 | |
5032 | if (LocVT == MVT::f64) { |
5033 | // Pass in STG registers: D1, D2, D3, D4 |
5034 | // fs4,fs5,fs6,fs7 |
5035 | static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, |
5036 | LoongArch::F30_64, LoongArch::F31_64}; |
5037 | if (unsigned Reg = State.AllocateReg(Regs: FPR64List)) { |
5038 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
5039 | return false; |
5040 | } |
5041 | } |
5042 | |
5043 | report_fatal_error(reason: "No registers left in GHC calling convention" ); |
5044 | return true; |
5045 | } |
5046 | |
5047 | // Transform physical registers into virtual registers. |
5048 | SDValue LoongArchTargetLowering::LowerFormalArguments( |
5049 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
5050 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
5051 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
5052 | |
5053 | MachineFunction &MF = DAG.getMachineFunction(); |
5054 | |
5055 | switch (CallConv) { |
5056 | default: |
5057 | llvm_unreachable("Unsupported calling convention" ); |
5058 | case CallingConv::C: |
5059 | case CallingConv::Fast: |
5060 | break; |
5061 | case CallingConv::GHC: |
5062 | if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) || |
5063 | !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD)) |
5064 | report_fatal_error( |
5065 | reason: "GHC calling convention requires the F and D extensions" ); |
5066 | } |
5067 | |
5068 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
5069 | MVT GRLenVT = Subtarget.getGRLenVT(); |
5070 | unsigned GRLenInBytes = Subtarget.getGRLen() / 8; |
5071 | // Used with varargs to acumulate store chains. |
5072 | std::vector<SDValue> OutChains; |
5073 | |
5074 | // Assign locations to all of the incoming arguments. |
5075 | SmallVector<CCValAssign> ArgLocs; |
5076 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
5077 | |
5078 | if (CallConv == CallingConv::GHC) |
5079 | CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC); |
5080 | else |
5081 | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch); |
5082 | |
5083 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
5084 | CCValAssign &VA = ArgLocs[i]; |
5085 | SDValue ArgValue; |
5086 | if (VA.isRegLoc()) |
5087 | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[i], TLI: *this); |
5088 | else |
5089 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
5090 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
5091 | // If the original argument was split and passed by reference, we need to |
5092 | // load all parts of it here (using the same address). |
5093 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
5094 | PtrInfo: MachinePointerInfo())); |
5095 | unsigned ArgIndex = Ins[i].OrigArgIndex; |
5096 | unsigned ArgPartOffset = Ins[i].PartOffset; |
5097 | assert(ArgPartOffset == 0); |
5098 | while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { |
5099 | CCValAssign &PartVA = ArgLocs[i + 1]; |
5100 | unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; |
5101 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
5102 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset); |
5103 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
5104 | PtrInfo: MachinePointerInfo())); |
5105 | ++i; |
5106 | } |
5107 | continue; |
5108 | } |
5109 | InVals.push_back(Elt: ArgValue); |
5110 | } |
5111 | |
5112 | if (IsVarArg) { |
5113 | ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); |
5114 | unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs); |
5115 | const TargetRegisterClass *RC = &LoongArch::GPRRegClass; |
5116 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
5117 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
5118 | auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); |
5119 | |
5120 | // Offset of the first variable argument from stack pointer, and size of |
5121 | // the vararg save area. For now, the varargs save area is either zero or |
5122 | // large enough to hold a0-a7. |
5123 | int VaArgOffset, VarArgsSaveSize; |
5124 | |
5125 | // If all registers are allocated, then all varargs must be passed on the |
5126 | // stack and we don't need to save any argregs. |
5127 | if (ArgRegs.size() == Idx) { |
5128 | VaArgOffset = CCInfo.getStackSize(); |
5129 | VarArgsSaveSize = 0; |
5130 | } else { |
5131 | VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); |
5132 | VaArgOffset = -VarArgsSaveSize; |
5133 | } |
5134 | |
5135 | // Record the frame index of the first variable argument |
5136 | // which is a value necessary to VASTART. |
5137 | int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
5138 | LoongArchFI->setVarArgsFrameIndex(FI); |
5139 | |
5140 | // If saving an odd number of registers then create an extra stack slot to |
5141 | // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures |
5142 | // offsets to even-numbered registered remain 2*GRLen-aligned. |
5143 | if (Idx % 2) { |
5144 | MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes, |
5145 | IsImmutable: true); |
5146 | VarArgsSaveSize += GRLenInBytes; |
5147 | } |
5148 | |
5149 | // Copy the integer registers that may have been used for passing varargs |
5150 | // to the vararg save area. |
5151 | for (unsigned I = Idx; I < ArgRegs.size(); |
5152 | ++I, VaArgOffset += GRLenInBytes) { |
5153 | const Register Reg = RegInfo.createVirtualRegister(RegClass: RC); |
5154 | RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg); |
5155 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT); |
5156 | FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
5157 | SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
5158 | SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff, |
5159 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
5160 | cast<StoreSDNode>(Val: Store.getNode()) |
5161 | ->getMemOperand() |
5162 | ->setValue((Value *)nullptr); |
5163 | OutChains.push_back(x: Store); |
5164 | } |
5165 | LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); |
5166 | } |
5167 | |
5168 | // All stores are grouped in one node to allow the matching between |
5169 | // the size of Ins and InVals. This only happens for vararg functions. |
5170 | if (!OutChains.empty()) { |
5171 | OutChains.push_back(x: Chain); |
5172 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains); |
5173 | } |
5174 | |
5175 | return Chain; |
5176 | } |
5177 | |
5178 | bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
5179 | return CI->isTailCall(); |
5180 | } |
5181 | |
5182 | // Check if the return value is used as only a return value, as otherwise |
5183 | // we can't perform a tail-call. |
5184 | bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, |
5185 | SDValue &Chain) const { |
5186 | if (N->getNumValues() != 1) |
5187 | return false; |
5188 | if (!N->hasNUsesOfValue(NUses: 1, Value: 0)) |
5189 | return false; |
5190 | |
5191 | SDNode *Copy = *N->use_begin(); |
5192 | if (Copy->getOpcode() != ISD::CopyToReg) |
5193 | return false; |
5194 | |
5195 | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
5196 | // isn't safe to perform a tail call. |
5197 | if (Copy->getGluedNode()) |
5198 | return false; |
5199 | |
5200 | // The copy must be used by a LoongArchISD::RET, and nothing else. |
5201 | bool HasRet = false; |
5202 | for (SDNode *Node : Copy->uses()) { |
5203 | if (Node->getOpcode() != LoongArchISD::RET) |
5204 | return false; |
5205 | HasRet = true; |
5206 | } |
5207 | |
5208 | if (!HasRet) |
5209 | return false; |
5210 | |
5211 | Chain = Copy->getOperand(Num: 0); |
5212 | return true; |
5213 | } |
5214 | |
5215 | // Check whether the call is eligible for tail call optimization. |
5216 | bool LoongArchTargetLowering::isEligibleForTailCallOptimization( |
5217 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
5218 | const SmallVectorImpl<CCValAssign> &ArgLocs) const { |
5219 | |
5220 | auto CalleeCC = CLI.CallConv; |
5221 | auto &Outs = CLI.Outs; |
5222 | auto &Caller = MF.getFunction(); |
5223 | auto CallerCC = Caller.getCallingConv(); |
5224 | |
5225 | // Do not tail call opt if the stack is used to pass parameters. |
5226 | if (CCInfo.getStackSize() != 0) |
5227 | return false; |
5228 | |
5229 | // Do not tail call opt if any parameters need to be passed indirectly. |
5230 | for (auto &VA : ArgLocs) |
5231 | if (VA.getLocInfo() == CCValAssign::Indirect) |
5232 | return false; |
5233 | |
5234 | // Do not tail call opt if either caller or callee uses struct return |
5235 | // semantics. |
5236 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
5237 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
5238 | if (IsCallerStructRet || IsCalleeStructRet) |
5239 | return false; |
5240 | |
5241 | // Do not tail call opt if either the callee or caller has a byval argument. |
5242 | for (auto &Arg : Outs) |
5243 | if (Arg.Flags.isByVal()) |
5244 | return false; |
5245 | |
5246 | // The callee has to preserve all registers the caller needs to preserve. |
5247 | const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
5248 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
5249 | if (CalleeCC != CallerCC) { |
5250 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
5251 | if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved)) |
5252 | return false; |
5253 | } |
5254 | return true; |
5255 | } |
5256 | |
5257 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
5258 | return DAG.getDataLayout().getPrefTypeAlign( |
5259 | Ty: VT.getTypeForEVT(Context&: *DAG.getContext())); |
5260 | } |
5261 | |
5262 | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
5263 | // and output parameter nodes. |
5264 | SDValue |
5265 | LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, |
5266 | SmallVectorImpl<SDValue> &InVals) const { |
5267 | SelectionDAG &DAG = CLI.DAG; |
5268 | SDLoc &DL = CLI.DL; |
5269 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
5270 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
5271 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
5272 | SDValue Chain = CLI.Chain; |
5273 | SDValue Callee = CLI.Callee; |
5274 | CallingConv::ID CallConv = CLI.CallConv; |
5275 | bool IsVarArg = CLI.IsVarArg; |
5276 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
5277 | MVT GRLenVT = Subtarget.getGRLenVT(); |
5278 | bool &IsTailCall = CLI.IsTailCall; |
5279 | |
5280 | MachineFunction &MF = DAG.getMachineFunction(); |
5281 | |
5282 | // Analyze the operands of the call, assigning locations to each operand. |
5283 | SmallVector<CCValAssign> ArgLocs; |
5284 | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
5285 | |
5286 | if (CallConv == CallingConv::GHC) |
5287 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC); |
5288 | else |
5289 | analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch); |
5290 | |
5291 | // Check if it's really possible to do a tail call. |
5292 | if (IsTailCall) |
5293 | IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs); |
5294 | |
5295 | if (IsTailCall) |
5296 | ++NumTailCalls; |
5297 | else if (CLI.CB && CLI.CB->isMustTailCall()) |
5298 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
5299 | "site marked musttail" ); |
5300 | |
5301 | // Get a count of how many bytes are to be pushed on the stack. |
5302 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
5303 | |
5304 | // Create local copies for byval args. |
5305 | SmallVector<SDValue> ByValArgs; |
5306 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
5307 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5308 | if (!Flags.isByVal()) |
5309 | continue; |
5310 | |
5311 | SDValue Arg = OutVals[i]; |
5312 | unsigned Size = Flags.getByValSize(); |
5313 | Align Alignment = Flags.getNonZeroByValAlign(); |
5314 | |
5315 | int FI = |
5316 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false); |
5317 | SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
5318 | SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT); |
5319 | |
5320 | Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment, |
5321 | /*IsVolatile=*/isVol: false, |
5322 | /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt, |
5323 | DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
5324 | ByValArgs.push_back(Elt: FIPtr); |
5325 | } |
5326 | |
5327 | if (!IsTailCall) |
5328 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL); |
5329 | |
5330 | // Copy argument values to their designated locations. |
5331 | SmallVector<std::pair<Register, SDValue>> RegsToPass; |
5332 | SmallVector<SDValue> MemOpChains; |
5333 | SDValue StackPtr; |
5334 | for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { |
5335 | CCValAssign &VA = ArgLocs[i]; |
5336 | SDValue ArgValue = OutVals[i]; |
5337 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5338 | |
5339 | // Promote the value if needed. |
5340 | // For now, only handle fully promoted and indirect arguments. |
5341 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
5342 | // Store the argument in a stack slot and pass its address. |
5343 | Align StackAlign = |
5344 | std::max(a: getPrefTypeAlign(VT: Outs[i].ArgVT, DAG), |
5345 | b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG)); |
5346 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
5347 | // If the original argument was split and passed by reference, we need to |
5348 | // store the required parts of it here (and pass just one address). |
5349 | unsigned ArgIndex = Outs[i].OrigArgIndex; |
5350 | unsigned ArgPartOffset = Outs[i].PartOffset; |
5351 | assert(ArgPartOffset == 0); |
5352 | // Calculate the total size to store. We don't have access to what we're |
5353 | // actually storing other than performing the loop and collecting the |
5354 | // info. |
5355 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
5356 | while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { |
5357 | SDValue PartValue = OutVals[i + 1]; |
5358 | unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; |
5359 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
5360 | EVT PartVT = PartValue.getValueType(); |
5361 | |
5362 | StoredSize += PartVT.getStoreSize(); |
5363 | StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG)); |
5364 | Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset)); |
5365 | ++i; |
5366 | } |
5367 | SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign); |
5368 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
5369 | MemOpChains.push_back( |
5370 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
5371 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
5372 | for (const auto &Part : Parts) { |
5373 | SDValue PartValue = Part.first; |
5374 | SDValue PartOffset = Part.second; |
5375 | SDValue Address = |
5376 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset); |
5377 | MemOpChains.push_back( |
5378 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
5379 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
5380 | } |
5381 | ArgValue = SpillSlot; |
5382 | } else { |
5383 | ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL); |
5384 | } |
5385 | |
5386 | // Use local copy if it is a byval arg. |
5387 | if (Flags.isByVal()) |
5388 | ArgValue = ByValArgs[j++]; |
5389 | |
5390 | if (VA.isRegLoc()) { |
5391 | // Queue up the argument copies and emit them at the end. |
5392 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
5393 | } else { |
5394 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
5395 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
5396 | "for passing parameters" ); |
5397 | |
5398 | // Work out the address of the stack slot. |
5399 | if (!StackPtr.getNode()) |
5400 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT); |
5401 | SDValue Address = |
5402 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
5403 | N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL)); |
5404 | |
5405 | // Emit the store. |
5406 | MemOpChains.push_back( |
5407 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
5408 | } |
5409 | } |
5410 | |
5411 | // Join the stores, which are independent of one another. |
5412 | if (!MemOpChains.empty()) |
5413 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains); |
5414 | |
5415 | SDValue Glue; |
5416 | |
5417 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
5418 | for (auto &Reg : RegsToPass) { |
5419 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue); |
5420 | Glue = Chain.getValue(R: 1); |
5421 | } |
5422 | |
5423 | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
5424 | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
5425 | // split it and then direct call can be matched by PseudoCALL. |
5426 | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
5427 | const GlobalValue *GV = S->getGlobal(); |
5428 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV) |
5429 | ? LoongArchII::MO_CALL |
5430 | : LoongArchII::MO_CALL_PLT; |
5431 | Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags); |
5432 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
5433 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr) |
5434 | ? LoongArchII::MO_CALL |
5435 | : LoongArchII::MO_CALL_PLT; |
5436 | Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags); |
5437 | } |
5438 | |
5439 | // The first call operand is the chain and the second is the target address. |
5440 | SmallVector<SDValue> Ops; |
5441 | Ops.push_back(Elt: Chain); |
5442 | Ops.push_back(Elt: Callee); |
5443 | |
5444 | // Add argument registers to the end of the list so that they are |
5445 | // known live into the call. |
5446 | for (auto &Reg : RegsToPass) |
5447 | Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType())); |
5448 | |
5449 | if (!IsTailCall) { |
5450 | // Add a register mask operand representing the call-preserved registers. |
5451 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
5452 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
5453 | assert(Mask && "Missing call preserved mask for calling convention" ); |
5454 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
5455 | } |
5456 | |
5457 | // Glue the call to the argument copies, if any. |
5458 | if (Glue.getNode()) |
5459 | Ops.push_back(Elt: Glue); |
5460 | |
5461 | // Emit the call. |
5462 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
5463 | unsigned Op; |
5464 | switch (DAG.getTarget().getCodeModel()) { |
5465 | default: |
5466 | report_fatal_error(reason: "Unsupported code model" ); |
5467 | case CodeModel::Small: |
5468 | Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; |
5469 | break; |
5470 | case CodeModel::Medium: |
5471 | assert(Subtarget.is64Bit() && "Medium code model requires LA64" ); |
5472 | Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; |
5473 | break; |
5474 | case CodeModel::Large: |
5475 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
5476 | Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; |
5477 | break; |
5478 | } |
5479 | |
5480 | if (IsTailCall) { |
5481 | MF.getFrameInfo().setHasTailCall(); |
5482 | SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
5483 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
5484 | return Ret; |
5485 | } |
5486 | |
5487 | Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
5488 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
5489 | Glue = Chain.getValue(R: 1); |
5490 | |
5491 | // Mark the end of the call, which is glued to the call itself. |
5492 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
5493 | Glue = Chain.getValue(R: 1); |
5494 | |
5495 | // Assign locations to each value returned by this call. |
5496 | SmallVector<CCValAssign> RVLocs; |
5497 | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
5498 | analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch); |
5499 | |
5500 | // Copy all of the result registers out of their specified physreg. |
5501 | for (auto &VA : RVLocs) { |
5502 | // Copy the value out. |
5503 | SDValue RetValue = |
5504 | DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue); |
5505 | // Glue the RetValue to the end of the call sequence. |
5506 | Chain = RetValue.getValue(R: 1); |
5507 | Glue = RetValue.getValue(R: 2); |
5508 | |
5509 | RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL); |
5510 | |
5511 | InVals.push_back(Elt: RetValue); |
5512 | } |
5513 | |
5514 | return Chain; |
5515 | } |
5516 | |
5517 | bool LoongArchTargetLowering::CanLowerReturn( |
5518 | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
5519 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
5520 | SmallVector<CCValAssign> RVLocs; |
5521 | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
5522 | |
5523 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
5524 | LoongArchABI::ABI ABI = |
5525 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
5526 | if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full, |
5527 | ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true, |
5528 | OrigTy: nullptr)) |
5529 | return false; |
5530 | } |
5531 | return true; |
5532 | } |
5533 | |
5534 | SDValue LoongArchTargetLowering::LowerReturn( |
5535 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
5536 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5537 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
5538 | SelectionDAG &DAG) const { |
5539 | // Stores the assignment of the return value to a location. |
5540 | SmallVector<CCValAssign> RVLocs; |
5541 | |
5542 | // Info about the registers and stack slot. |
5543 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
5544 | *DAG.getContext()); |
5545 | |
5546 | analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
5547 | CLI: nullptr, Fn: CC_LoongArch); |
5548 | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
5549 | report_fatal_error(reason: "GHC functions return void only" ); |
5550 | SDValue Glue; |
5551 | SmallVector<SDValue, 4> RetOps(1, Chain); |
5552 | |
5553 | // Copy the result values into the output registers. |
5554 | for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { |
5555 | CCValAssign &VA = RVLocs[i]; |
5556 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
5557 | |
5558 | // Handle a 'normal' return. |
5559 | SDValue Val = convertValVTToLocVT(DAG, Val: OutVals[i], VA, DL); |
5560 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue); |
5561 | |
5562 | // Guarantee that all emitted copies are stuck together. |
5563 | Glue = Chain.getValue(R: 1); |
5564 | RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT())); |
5565 | } |
5566 | |
5567 | RetOps[0] = Chain; // Update chain. |
5568 | |
5569 | // Add the glue node if we have it. |
5570 | if (Glue.getNode()) |
5571 | RetOps.push_back(Elt: Glue); |
5572 | |
5573 | return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps); |
5574 | } |
5575 | |
5576 | bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
5577 | bool ForCodeSize) const { |
5578 | // TODO: Maybe need more checks here after vector extension is supported. |
5579 | if (VT == MVT::f32 && !Subtarget.hasBasicF()) |
5580 | return false; |
5581 | if (VT == MVT::f64 && !Subtarget.hasBasicD()) |
5582 | return false; |
5583 | return (Imm.isZero() || Imm.isExactlyValue(V: +1.0)); |
5584 | } |
5585 | |
5586 | bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { |
5587 | return true; |
5588 | } |
5589 | |
5590 | bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { |
5591 | return true; |
5592 | } |
5593 | |
5594 | bool LoongArchTargetLowering::shouldInsertFencesForAtomic( |
5595 | const Instruction *I) const { |
5596 | if (!Subtarget.is64Bit()) |
5597 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I); |
5598 | |
5599 | if (isa<LoadInst>(Val: I)) |
5600 | return true; |
5601 | |
5602 | // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not |
5603 | // require fences beacuse we can use amswap_db.[w/d]. |
5604 | Type *Ty = I->getOperand(i: 0)->getType(); |
5605 | if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) { |
5606 | unsigned Size = Ty->getIntegerBitWidth(); |
5607 | return (Size == 8 || Size == 16); |
5608 | } |
5609 | |
5610 | return false; |
5611 | } |
5612 | |
5613 | EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, |
5614 | LLVMContext &Context, |
5615 | EVT VT) const { |
5616 | if (!VT.isVector()) |
5617 | return getPointerTy(DL); |
5618 | return VT.changeVectorElementTypeToInteger(); |
5619 | } |
5620 | |
5621 | bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { |
5622 | // TODO: Support vectors. |
5623 | return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y); |
5624 | } |
5625 | |
5626 | bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
5627 | const CallInst &I, |
5628 | MachineFunction &MF, |
5629 | unsigned Intrinsic) const { |
5630 | switch (Intrinsic) { |
5631 | default: |
5632 | return false; |
5633 | case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: |
5634 | case Intrinsic::loongarch_masked_atomicrmw_add_i32: |
5635 | case Intrinsic::loongarch_masked_atomicrmw_sub_i32: |
5636 | case Intrinsic::loongarch_masked_atomicrmw_nand_i32: |
5637 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5638 | Info.memVT = MVT::i32; |
5639 | Info.ptrVal = I.getArgOperand(i: 0); |
5640 | Info.offset = 0; |
5641 | Info.align = Align(4); |
5642 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
5643 | MachineMemOperand::MOVolatile; |
5644 | return true; |
5645 | // TODO: Add more Intrinsics later. |
5646 | } |
5647 | } |
5648 | |
5649 | TargetLowering::AtomicExpansionKind |
5650 | LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
5651 | // TODO: Add more AtomicRMWInst that needs to be extended. |
5652 | |
5653 | // Since floating-point operation requires a non-trivial set of data |
5654 | // operations, use CmpXChg to expand. |
5655 | if (AI->isFloatingPointOperation() || |
5656 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
5657 | AI->getOperation() == AtomicRMWInst::UDecWrap) |
5658 | return AtomicExpansionKind::CmpXChg; |
5659 | |
5660 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
5661 | if (Size == 8 || Size == 16) |
5662 | return AtomicExpansionKind::MaskedIntrinsic; |
5663 | return AtomicExpansionKind::None; |
5664 | } |
5665 | |
5666 | static Intrinsic::ID |
5667 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, |
5668 | AtomicRMWInst::BinOp BinOp) { |
5669 | if (GRLen == 64) { |
5670 | switch (BinOp) { |
5671 | default: |
5672 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
5673 | case AtomicRMWInst::Xchg: |
5674 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; |
5675 | case AtomicRMWInst::Add: |
5676 | return Intrinsic::loongarch_masked_atomicrmw_add_i64; |
5677 | case AtomicRMWInst::Sub: |
5678 | return Intrinsic::loongarch_masked_atomicrmw_sub_i64; |
5679 | case AtomicRMWInst::Nand: |
5680 | return Intrinsic::loongarch_masked_atomicrmw_nand_i64; |
5681 | case AtomicRMWInst::UMax: |
5682 | return Intrinsic::loongarch_masked_atomicrmw_umax_i64; |
5683 | case AtomicRMWInst::UMin: |
5684 | return Intrinsic::loongarch_masked_atomicrmw_umin_i64; |
5685 | case AtomicRMWInst::Max: |
5686 | return Intrinsic::loongarch_masked_atomicrmw_max_i64; |
5687 | case AtomicRMWInst::Min: |
5688 | return Intrinsic::loongarch_masked_atomicrmw_min_i64; |
5689 | // TODO: support other AtomicRMWInst. |
5690 | } |
5691 | } |
5692 | |
5693 | if (GRLen == 32) { |
5694 | switch (BinOp) { |
5695 | default: |
5696 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
5697 | case AtomicRMWInst::Xchg: |
5698 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; |
5699 | case AtomicRMWInst::Add: |
5700 | return Intrinsic::loongarch_masked_atomicrmw_add_i32; |
5701 | case AtomicRMWInst::Sub: |
5702 | return Intrinsic::loongarch_masked_atomicrmw_sub_i32; |
5703 | case AtomicRMWInst::Nand: |
5704 | return Intrinsic::loongarch_masked_atomicrmw_nand_i32; |
5705 | // TODO: support other AtomicRMWInst. |
5706 | } |
5707 | } |
5708 | |
5709 | llvm_unreachable("Unexpected GRLen\n" ); |
5710 | } |
5711 | |
5712 | TargetLowering::AtomicExpansionKind |
5713 | LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( |
5714 | AtomicCmpXchgInst *CI) const { |
5715 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
5716 | if (Size == 8 || Size == 16) |
5717 | return AtomicExpansionKind::MaskedIntrinsic; |
5718 | return AtomicExpansionKind::None; |
5719 | } |
5720 | |
5721 | Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
5722 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
5723 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
5724 | AtomicOrdering FailOrd = CI->getFailureOrdering(); |
5725 | Value *FailureOrdering = |
5726 | Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd)); |
5727 | |
5728 | // TODO: Support cmpxchg on LA32. |
5729 | Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; |
5730 | CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty()); |
5731 | NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty()); |
5732 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
5733 | Type *Tys[] = {AlignedAddr->getType()}; |
5734 | Function *MaskedCmpXchg = |
5735 | Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys); |
5736 | Value *Result = Builder.CreateCall( |
5737 | Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); |
5738 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
5739 | return Result; |
5740 | } |
5741 | |
5742 | Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( |
5743 | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
5744 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
5745 | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
5746 | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
5747 | // mask, as this produces better code than the LL/SC loop emitted by |
5748 | // int_loongarch_masked_atomicrmw_xchg. |
5749 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
5750 | isa<ConstantInt>(Val: AI->getValOperand())) { |
5751 | ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand()); |
5752 | if (CVal->isZero()) |
5753 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr, |
5754 | Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask" ), |
5755 | Align: AI->getAlign(), Ordering: Ord); |
5756 | if (CVal->isMinusOne()) |
5757 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask, |
5758 | Align: AI->getAlign(), Ordering: Ord); |
5759 | } |
5760 | |
5761 | unsigned GRLen = Subtarget.getGRLen(); |
5762 | Value *Ordering = |
5763 | Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering())); |
5764 | Type *Tys[] = {AlignedAddr->getType()}; |
5765 | Function *LlwOpScwLoop = Intrinsic::getDeclaration( |
5766 | M: AI->getModule(), |
5767 | id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys); |
5768 | |
5769 | if (GRLen == 64) { |
5770 | Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty()); |
5771 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
5772 | ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty()); |
5773 | } |
5774 | |
5775 | Value *Result; |
5776 | |
5777 | // Must pass the shift amount needed to sign extend the loaded value prior |
5778 | // to performing a signed comparison for min/max. ShiftAmt is the number of |
5779 | // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which |
5780 | // is the number of bits to left+right shift the value in order to |
5781 | // sign-extend. |
5782 | if (AI->getOperation() == AtomicRMWInst::Min || |
5783 | AI->getOperation() == AtomicRMWInst::Max) { |
5784 | const DataLayout &DL = AI->getDataLayout(); |
5785 | unsigned ValWidth = |
5786 | DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType()); |
5787 | Value *SextShamt = |
5788 | Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt); |
5789 | Result = Builder.CreateCall(Callee: LlwOpScwLoop, |
5790 | Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
5791 | } else { |
5792 | Result = |
5793 | Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering}); |
5794 | } |
5795 | |
5796 | if (GRLen == 64) |
5797 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
5798 | return Result; |
5799 | } |
5800 | |
5801 | bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( |
5802 | const MachineFunction &MF, EVT VT) const { |
5803 | VT = VT.getScalarType(); |
5804 | |
5805 | if (!VT.isSimple()) |
5806 | return false; |
5807 | |
5808 | switch (VT.getSimpleVT().SimpleTy) { |
5809 | case MVT::f32: |
5810 | case MVT::f64: |
5811 | return true; |
5812 | default: |
5813 | break; |
5814 | } |
5815 | |
5816 | return false; |
5817 | } |
5818 | |
5819 | Register LoongArchTargetLowering::getExceptionPointerRegister( |
5820 | const Constant *PersonalityFn) const { |
5821 | return LoongArch::R4; |
5822 | } |
5823 | |
5824 | Register LoongArchTargetLowering::getExceptionSelectorRegister( |
5825 | const Constant *PersonalityFn) const { |
5826 | return LoongArch::R5; |
5827 | } |
5828 | |
5829 | //===----------------------------------------------------------------------===// |
5830 | // LoongArch Inline Assembly Support |
5831 | //===----------------------------------------------------------------------===// |
5832 | |
5833 | LoongArchTargetLowering::ConstraintType |
5834 | LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { |
5835 | // LoongArch specific constraints in GCC: config/loongarch/constraints.md |
5836 | // |
5837 | // 'f': A floating-point register (if available). |
5838 | // 'k': A memory operand whose address is formed by a base register and |
5839 | // (optionally scaled) index register. |
5840 | // 'l': A signed 16-bit constant. |
5841 | // 'm': A memory operand whose address is formed by a base register and |
5842 | // offset that is suitable for use in instructions with the same |
5843 | // addressing mode as st.w and ld.w. |
5844 | // 'I': A signed 12-bit constant (for arithmetic instructions). |
5845 | // 'J': Integer zero. |
5846 | // 'K': An unsigned 12-bit constant (for logic instructions). |
5847 | // "ZB": An address that is held in a general-purpose register. The offset is |
5848 | // zero. |
5849 | // "ZC": A memory operand whose address is formed by a base register and |
5850 | // offset that is suitable for use in instructions with the same |
5851 | // addressing mode as ll.w and sc.w. |
5852 | if (Constraint.size() == 1) { |
5853 | switch (Constraint[0]) { |
5854 | default: |
5855 | break; |
5856 | case 'f': |
5857 | return C_RegisterClass; |
5858 | case 'l': |
5859 | case 'I': |
5860 | case 'J': |
5861 | case 'K': |
5862 | return C_Immediate; |
5863 | case 'k': |
5864 | return C_Memory; |
5865 | } |
5866 | } |
5867 | |
5868 | if (Constraint == "ZC" || Constraint == "ZB" ) |
5869 | return C_Memory; |
5870 | |
5871 | // 'm' is handled here. |
5872 | return TargetLowering::getConstraintType(Constraint); |
5873 | } |
5874 | |
5875 | InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( |
5876 | StringRef ConstraintCode) const { |
5877 | return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) |
5878 | .Case(S: "k" , Value: InlineAsm::ConstraintCode::k) |
5879 | .Case(S: "ZB" , Value: InlineAsm::ConstraintCode::ZB) |
5880 | .Case(S: "ZC" , Value: InlineAsm::ConstraintCode::ZC) |
5881 | .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); |
5882 | } |
5883 | |
5884 | std::pair<unsigned, const TargetRegisterClass *> |
5885 | LoongArchTargetLowering::getRegForInlineAsmConstraint( |
5886 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
5887 | // First, see if this is a constraint that directly corresponds to a LoongArch |
5888 | // register class. |
5889 | if (Constraint.size() == 1) { |
5890 | switch (Constraint[0]) { |
5891 | case 'r': |
5892 | // TODO: Support fixed vectors up to GRLen? |
5893 | if (VT.isVector()) |
5894 | break; |
5895 | return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass); |
5896 | case 'f': |
5897 | if (Subtarget.hasBasicF() && VT == MVT::f32) |
5898 | return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass); |
5899 | if (Subtarget.hasBasicD() && VT == MVT::f64) |
5900 | return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass); |
5901 | if (Subtarget.hasExtLSX() && |
5902 | TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT)) |
5903 | return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass); |
5904 | if (Subtarget.hasExtLASX() && |
5905 | TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT)) |
5906 | return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass); |
5907 | break; |
5908 | default: |
5909 | break; |
5910 | } |
5911 | } |
5912 | |
5913 | // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen |
5914 | // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm |
5915 | // constraints while the official register name is prefixed with a '$'. So we |
5916 | // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) |
5917 | // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is |
5918 | // case insensitive, so no need to convert the constraint to upper case here. |
5919 | // |
5920 | // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly |
5921 | // decode the usage of register name aliases into their official names. And |
5922 | // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use |
5923 | // official register names. |
5924 | if (Constraint.starts_with(Prefix: "{$r" ) || Constraint.starts_with(Prefix: "{$f" ) || |
5925 | Constraint.starts_with(Prefix: "{$vr" ) || Constraint.starts_with(Prefix: "{$xr" )) { |
5926 | bool IsFP = Constraint[2] == 'f'; |
5927 | std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$'); |
5928 | std::pair<unsigned, const TargetRegisterClass *> R; |
5929 | R = TargetLowering::getRegForInlineAsmConstraint( |
5930 | TRI, Constraint: join_items(Separator: "" , Items&: Temp.first, Items&: Temp.second), VT); |
5931 | // Match those names to the widest floating point register type available. |
5932 | if (IsFP) { |
5933 | unsigned RegNo = R.first; |
5934 | if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { |
5935 | if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { |
5936 | unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; |
5937 | return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass); |
5938 | } |
5939 | } |
5940 | } |
5941 | return R; |
5942 | } |
5943 | |
5944 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
5945 | } |
5946 | |
5947 | void LoongArchTargetLowering::LowerAsmOperandForConstraint( |
5948 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
5949 | SelectionDAG &DAG) const { |
5950 | // Currently only support length 1 constraints. |
5951 | if (Constraint.size() == 1) { |
5952 | switch (Constraint[0]) { |
5953 | case 'l': |
5954 | // Validate & create a 16-bit signed immediate operand. |
5955 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
5956 | uint64_t CVal = C->getSExtValue(); |
5957 | if (isInt<16>(x: CVal)) |
5958 | Ops.push_back( |
5959 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5960 | } |
5961 | return; |
5962 | case 'I': |
5963 | // Validate & create a 12-bit signed immediate operand. |
5964 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
5965 | uint64_t CVal = C->getSExtValue(); |
5966 | if (isInt<12>(x: CVal)) |
5967 | Ops.push_back( |
5968 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5969 | } |
5970 | return; |
5971 | case 'J': |
5972 | // Validate & create an integer zero operand. |
5973 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
5974 | if (C->getZExtValue() == 0) |
5975 | Ops.push_back( |
5976 | x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5977 | return; |
5978 | case 'K': |
5979 | // Validate & create a 12-bit unsigned immediate operand. |
5980 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
5981 | uint64_t CVal = C->getZExtValue(); |
5982 | if (isUInt<12>(x: CVal)) |
5983 | Ops.push_back( |
5984 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5985 | } |
5986 | return; |
5987 | default: |
5988 | break; |
5989 | } |
5990 | } |
5991 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
5992 | } |
5993 | |
5994 | #define GET_REGISTER_MATCHER |
5995 | #include "LoongArchGenAsmMatcher.inc" |
5996 | |
5997 | Register |
5998 | LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
5999 | const MachineFunction &MF) const { |
6000 | std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$'); |
6001 | std::string NewRegName = Name.second.str(); |
6002 | Register Reg = MatchRegisterAltName(Name: NewRegName); |
6003 | if (Reg == LoongArch::NoRegister) |
6004 | Reg = MatchRegisterName(Name: NewRegName); |
6005 | if (Reg == LoongArch::NoRegister) |
6006 | report_fatal_error( |
6007 | reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"." )); |
6008 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
6009 | if (!ReservedRegs.test(Idx: Reg)) |
6010 | report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" + |
6011 | StringRef(RegName) + "\"." )); |
6012 | return Reg; |
6013 | } |
6014 | |
6015 | bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, |
6016 | EVT VT, SDValue C) const { |
6017 | // TODO: Support vectors. |
6018 | if (!VT.isScalarInteger()) |
6019 | return false; |
6020 | |
6021 | // Omit the optimization if the data size exceeds GRLen. |
6022 | if (VT.getSizeInBits() > Subtarget.getGRLen()) |
6023 | return false; |
6024 | |
6025 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) { |
6026 | const APInt &Imm = ConstNode->getAPIntValue(); |
6027 | // Break MUL into (SLLI + ADD/SUB) or ALSL. |
6028 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
6029 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
6030 | return true; |
6031 | // Break MUL into (ALSL x, (SLLI x, imm0), imm1). |
6032 | if (ConstNode->hasOneUse() && |
6033 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
6034 | (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) |
6035 | return true; |
6036 | // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), |
6037 | // in which the immediate has two set bits. Or Break (MUL x, imm) |
6038 | // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate |
6039 | // equals to (1 << s0) - (1 << s1). |
6040 | if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) { |
6041 | unsigned Shifts = Imm.countr_zero(); |
6042 | // Reject immediates which can be composed via a single LUI. |
6043 | if (Shifts >= 12) |
6044 | return false; |
6045 | // Reject multiplications can be optimized to |
6046 | // (SLLI (ALSL x, x, 1/2/3/4), s). |
6047 | APInt ImmPop = Imm.ashr(ShiftAmt: Shifts); |
6048 | if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) |
6049 | return false; |
6050 | // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, |
6051 | // since it needs one more instruction than other 3 cases. |
6052 | APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); |
6053 | if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || |
6054 | (ImmSmall - Imm).isPowerOf2()) |
6055 | return true; |
6056 | } |
6057 | } |
6058 | |
6059 | return false; |
6060 | } |
6061 | |
6062 | bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
6063 | const AddrMode &AM, |
6064 | Type *Ty, unsigned AS, |
6065 | Instruction *I) const { |
6066 | // LoongArch has four basic addressing modes: |
6067 | // 1. reg |
6068 | // 2. reg + 12-bit signed offset |
6069 | // 3. reg + 14-bit signed offset left-shifted by 2 |
6070 | // 4. reg1 + reg2 |
6071 | // TODO: Add more checks after support vector extension. |
6072 | |
6073 | // No global is ever allowed as a base. |
6074 | if (AM.BaseGV) |
6075 | return false; |
6076 | |
6077 | // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2 |
6078 | // with `UAL` feature. |
6079 | if (!isInt<12>(x: AM.BaseOffs) && |
6080 | !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL())) |
6081 | return false; |
6082 | |
6083 | switch (AM.Scale) { |
6084 | case 0: |
6085 | // "r+i" or just "i", depending on HasBaseReg. |
6086 | break; |
6087 | case 1: |
6088 | // "r+r+i" is not allowed. |
6089 | if (AM.HasBaseReg && AM.BaseOffs) |
6090 | return false; |
6091 | // Otherwise we have "r+r" or "r+i". |
6092 | break; |
6093 | case 2: |
6094 | // "2*r+r" or "2*r+i" is not allowed. |
6095 | if (AM.HasBaseReg || AM.BaseOffs) |
6096 | return false; |
6097 | // Allow "2*r" as "r+r". |
6098 | break; |
6099 | default: |
6100 | return false; |
6101 | } |
6102 | |
6103 | return true; |
6104 | } |
6105 | |
6106 | bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
6107 | return isInt<12>(x: Imm); |
6108 | } |
6109 | |
6110 | bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
6111 | return isInt<12>(x: Imm); |
6112 | } |
6113 | |
6114 | bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
6115 | // Zexts are free if they can be combined with a load. |
6116 | // Don't advertise i32->i64 zextload as being free for LA64. It interacts |
6117 | // poorly with type legalization of compares preferring sext. |
6118 | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
6119 | EVT MemVT = LD->getMemoryVT(); |
6120 | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
6121 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
6122 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
6123 | return true; |
6124 | } |
6125 | |
6126 | return TargetLowering::isZExtFree(Val, VT2); |
6127 | } |
6128 | |
6129 | bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, |
6130 | EVT DstVT) const { |
6131 | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
6132 | } |
6133 | |
6134 | bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const { |
6135 | return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32); |
6136 | } |
6137 | |
6138 | bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { |
6139 | // TODO: Support vectors. |
6140 | if (Y.getValueType().isVector()) |
6141 | return false; |
6142 | |
6143 | return !isa<ConstantSDNode>(Val: Y); |
6144 | } |
6145 | |
6146 | ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { |
6147 | // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. |
6148 | return ISD::SIGN_EXTEND; |
6149 | } |
6150 | |
6151 | bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall( |
6152 | EVT Type, bool IsSigned) const { |
6153 | if (Subtarget.is64Bit() && Type == MVT::i32) |
6154 | return true; |
6155 | |
6156 | return IsSigned; |
6157 | } |
6158 | |
6159 | bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { |
6160 | // Return false to suppress the unnecessary extensions if the LibCall |
6161 | // arguments or return value is a float narrower than GRLEN on a soft FP ABI. |
6162 | if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && |
6163 | Type.getSizeInBits() < Subtarget.getGRLen())) |
6164 | return false; |
6165 | return true; |
6166 | } |
6167 | |