1 | //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that LoongArch uses to lower LLVM code into |
10 | // a selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "LoongArchISelLowering.h" |
15 | #include "LoongArch.h" |
16 | #include "LoongArchMachineFunctionInfo.h" |
17 | #include "LoongArchRegisterInfo.h" |
18 | #include "LoongArchSubtarget.h" |
19 | #include "LoongArchTargetMachine.h" |
20 | #include "MCTargetDesc/LoongArchBaseInfo.h" |
21 | #include "MCTargetDesc/LoongArchMCTargetDesc.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/ADT/StringExtras.h" |
24 | #include "llvm/CodeGen/ISDOpcodes.h" |
25 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
26 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
27 | #include "llvm/IR/IRBuilder.h" |
28 | #include "llvm/IR/IntrinsicsLoongArch.h" |
29 | #include "llvm/Support/CodeGen.h" |
30 | #include "llvm/Support/Debug.h" |
31 | #include "llvm/Support/ErrorHandling.h" |
32 | #include "llvm/Support/KnownBits.h" |
33 | #include "llvm/Support/MathExtras.h" |
34 | |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "loongarch-isel-lowering" |
38 | |
39 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
40 | |
41 | static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division" , cl::Hidden, |
42 | cl::desc("Trap on integer division by zero." ), |
43 | cl::init(Val: false)); |
44 | |
45 | LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, |
46 | const LoongArchSubtarget &STI) |
47 | : TargetLowering(TM), Subtarget(STI) { |
48 | |
49 | MVT GRLenVT = Subtarget.getGRLenVT(); |
50 | |
51 | // Set up the register classes. |
52 | |
53 | addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass); |
54 | if (Subtarget.hasBasicF()) |
55 | addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass); |
56 | if (Subtarget.hasBasicD()) |
57 | addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass); |
58 | |
59 | static const MVT::SimpleValueType LSXVTs[] = { |
60 | MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; |
61 | static const MVT::SimpleValueType LASXVTs[] = { |
62 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; |
63 | |
64 | if (Subtarget.hasExtLSX()) |
65 | for (MVT VT : LSXVTs) |
66 | addRegisterClass(VT, RC: &LoongArch::LSX128RegClass); |
67 | |
68 | if (Subtarget.hasExtLASX()) |
69 | for (MVT VT : LASXVTs) |
70 | addRegisterClass(VT, RC: &LoongArch::LASX256RegClass); |
71 | |
72 | // Set operations for LA32 and LA64. |
73 | |
74 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT, |
75 | MemVT: MVT::i1, Action: Promote); |
76 | |
77 | setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom); |
78 | setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom); |
79 | setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom); |
80 | setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom); |
81 | setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand); |
82 | setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand); |
83 | |
84 | setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
85 | ISD::JumpTable, ISD::GlobalTLSAddress}, |
86 | VT: GRLenVT, Action: Custom); |
87 | |
88 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom); |
89 | |
90 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand); |
91 | setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand); |
92 | setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom); |
93 | setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand); |
94 | |
95 | setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal); |
96 | setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal); |
97 | |
98 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom); |
99 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom); |
100 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
101 | |
102 | // Expand bitreverse.i16 with native-width bitrev and shift for now, before |
103 | // we get to know which of sll and revb.2h is faster. |
104 | setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom); |
105 | setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal); |
106 | |
107 | // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and |
108 | // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 |
109 | // and i32 could still be byte-swapped relatively cheaply. |
110 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom); |
111 | |
112 | setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand); |
113 | setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand); |
114 | setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand); |
115 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand); |
116 | setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand); |
117 | |
118 | setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom); |
119 | setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand); |
120 | |
121 | // Set operations for LA64 only. |
122 | |
123 | if (Subtarget.is64Bit()) { |
124 | setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom); |
125 | setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom); |
126 | setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom); |
127 | setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom); |
128 | setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom); |
129 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom); |
130 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
131 | setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom); |
132 | setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom); |
133 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom); |
134 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom); |
135 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom); |
136 | setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom); |
137 | setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom); |
138 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom); |
139 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom); |
140 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom); |
141 | |
142 | setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom); |
143 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom); |
144 | setOperationAction(Ops: {ISD::UDIV, ISD::UREM}, VT: MVT::i32, Action: Custom); |
145 | } |
146 | |
147 | // Set operations for LA32 only. |
148 | |
149 | if (!Subtarget.is64Bit()) { |
150 | setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom); |
151 | setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom); |
152 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom); |
153 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom); |
154 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom); |
155 | } |
156 | |
157 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
158 | |
159 | static const ISD::CondCode FPCCToExpand[] = { |
160 | ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, |
161 | ISD::SETGE, ISD::SETNE, ISD::SETGT}; |
162 | |
163 | // Set operations for 'F' feature. |
164 | |
165 | if (Subtarget.hasBasicF()) { |
166 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
167 | setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
168 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand); |
169 | |
170 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand); |
171 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand); |
172 | setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal); |
173 | setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal); |
174 | setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal); |
175 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal); |
176 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal); |
177 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal); |
178 | setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand); |
179 | setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand); |
180 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand); |
181 | setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand); |
182 | setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand); |
183 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand); |
184 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand); |
185 | |
186 | if (Subtarget.is64Bit()) |
187 | setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal); |
188 | |
189 | if (!Subtarget.hasBasicD()) { |
190 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom); |
191 | if (Subtarget.is64Bit()) { |
192 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom); |
193 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom); |
194 | } |
195 | } |
196 | } |
197 | |
198 | // Set operations for 'D' feature. |
199 | |
200 | if (Subtarget.hasBasicD()) { |
201 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
202 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
203 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
204 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
205 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand); |
206 | |
207 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand); |
208 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand); |
209 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal); |
210 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal); |
211 | setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal); |
212 | setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal); |
213 | setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal); |
214 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal); |
215 | setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand); |
216 | setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand); |
217 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand); |
218 | setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand); |
219 | setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand); |
220 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand); |
221 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand); |
222 | |
223 | if (Subtarget.is64Bit()) |
224 | setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal); |
225 | } |
226 | |
227 | // Set operations for 'LSX' feature. |
228 | |
229 | if (Subtarget.hasExtLSX()) { |
230 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
231 | // Expand all truncating stores and extending loads. |
232 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
233 | setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand); |
234 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
235 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
236 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
237 | } |
238 | // By default everything must be expanded. Then we will selectively turn |
239 | // on ones that can be effectively codegen'd. |
240 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
241 | setOperationAction(Op, VT, Action: Expand); |
242 | } |
243 | |
244 | for (MVT VT : LSXVTs) { |
245 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal); |
246 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
247 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
248 | |
249 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom); |
250 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal); |
251 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
252 | |
253 | setOperationAction(Op: ISD::SETCC, VT, Action: Legal); |
254 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
255 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
256 | } |
257 | for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { |
258 | setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal); |
259 | setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
260 | Action: Legal); |
261 | setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
262 | VT, Action: Legal); |
263 | setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal); |
264 | setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal); |
265 | setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal); |
266 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal); |
267 | setCondCodeAction( |
268 | CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
269 | Action: Expand); |
270 | } |
271 | for (MVT VT : {MVT::v4i32, MVT::v2i64}) { |
272 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal); |
273 | setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal); |
274 | } |
275 | for (MVT VT : {MVT::v4f32, MVT::v2f64}) { |
276 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal); |
277 | setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal); |
278 | setOperationAction(Op: ISD::FMA, VT, Action: Legal); |
279 | setOperationAction(Op: ISD::FSQRT, VT, Action: Legal); |
280 | setOperationAction(Op: ISD::FNEG, VT, Action: Legal); |
281 | setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
282 | ISD::SETUGE, ISD::SETUGT}, |
283 | VT, Action: Expand); |
284 | } |
285 | } |
286 | |
287 | // Set operations for 'LASX' feature. |
288 | |
289 | if (Subtarget.hasExtLASX()) { |
290 | for (MVT VT : LASXVTs) { |
291 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal); |
292 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
293 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
294 | |
295 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom); |
296 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom); |
297 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
298 | |
299 | setOperationAction(Op: ISD::SETCC, VT, Action: Legal); |
300 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
301 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
302 | } |
303 | for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { |
304 | setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal); |
305 | setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
306 | Action: Legal); |
307 | setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
308 | VT, Action: Legal); |
309 | setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal); |
310 | setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal); |
311 | setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal); |
312 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal); |
313 | setCondCodeAction( |
314 | CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
315 | Action: Expand); |
316 | } |
317 | for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { |
318 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal); |
319 | setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal); |
320 | } |
321 | for (MVT VT : {MVT::v8f32, MVT::v4f64}) { |
322 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal); |
323 | setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal); |
324 | setOperationAction(Op: ISD::FMA, VT, Action: Legal); |
325 | setOperationAction(Op: ISD::FSQRT, VT, Action: Legal); |
326 | setOperationAction(Op: ISD::FNEG, VT, Action: Legal); |
327 | setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
328 | ISD::SETUGE, ISD::SETUGT}, |
329 | VT, Action: Expand); |
330 | } |
331 | } |
332 | |
333 | // Set DAG combine for LA32 and LA64. |
334 | |
335 | setTargetDAGCombine(ISD::AND); |
336 | setTargetDAGCombine(ISD::OR); |
337 | setTargetDAGCombine(ISD::SRL); |
338 | setTargetDAGCombine(ISD::SETCC); |
339 | |
340 | // Set DAG combine for 'LSX' feature. |
341 | |
342 | if (Subtarget.hasExtLSX()) |
343 | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
344 | |
345 | // Compute derived properties from the register classes. |
346 | computeRegisterProperties(TRI: Subtarget.getRegisterInfo()); |
347 | |
348 | setStackPointerRegisterToSaveRestore(LoongArch::R3); |
349 | |
350 | setBooleanContents(ZeroOrOneBooleanContent); |
351 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
352 | |
353 | setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); |
354 | |
355 | setMinCmpXchgSizeInBits(32); |
356 | |
357 | // Function alignments. |
358 | setMinFunctionAlignment(Align(4)); |
359 | // Set preferred alignments. |
360 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
361 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
362 | setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); |
363 | } |
364 | |
365 | bool LoongArchTargetLowering::isOffsetFoldingLegal( |
366 | const GlobalAddressSDNode *GA) const { |
367 | // In order to maximise the opportunity for common subexpression elimination, |
368 | // keep a separate ADD node for the global address offset instead of folding |
369 | // it in the global address node. Later peephole optimisations may choose to |
370 | // fold it back in when profitable. |
371 | return false; |
372 | } |
373 | |
374 | SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, |
375 | SelectionDAG &DAG) const { |
376 | switch (Op.getOpcode()) { |
377 | case ISD::ATOMIC_FENCE: |
378 | return lowerATOMIC_FENCE(Op, DAG); |
379 | case ISD::EH_DWARF_CFA: |
380 | return lowerEH_DWARF_CFA(Op, DAG); |
381 | case ISD::GlobalAddress: |
382 | return lowerGlobalAddress(Op, DAG); |
383 | case ISD::GlobalTLSAddress: |
384 | return lowerGlobalTLSAddress(Op, DAG); |
385 | case ISD::INTRINSIC_WO_CHAIN: |
386 | return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
387 | case ISD::INTRINSIC_W_CHAIN: |
388 | return lowerINTRINSIC_W_CHAIN(Op, DAG); |
389 | case ISD::INTRINSIC_VOID: |
390 | return lowerINTRINSIC_VOID(Op, DAG); |
391 | case ISD::BlockAddress: |
392 | return lowerBlockAddress(Op, DAG); |
393 | case ISD::JumpTable: |
394 | return lowerJumpTable(Op, DAG); |
395 | case ISD::SHL_PARTS: |
396 | return lowerShiftLeftParts(Op, DAG); |
397 | case ISD::SRA_PARTS: |
398 | return lowerShiftRightParts(Op, DAG, IsSRA: true); |
399 | case ISD::SRL_PARTS: |
400 | return lowerShiftRightParts(Op, DAG, IsSRA: false); |
401 | case ISD::ConstantPool: |
402 | return lowerConstantPool(Op, DAG); |
403 | case ISD::FP_TO_SINT: |
404 | return lowerFP_TO_SINT(Op, DAG); |
405 | case ISD::BITCAST: |
406 | return lowerBITCAST(Op, DAG); |
407 | case ISD::UINT_TO_FP: |
408 | return lowerUINT_TO_FP(Op, DAG); |
409 | case ISD::SINT_TO_FP: |
410 | return lowerSINT_TO_FP(Op, DAG); |
411 | case ISD::VASTART: |
412 | return lowerVASTART(Op, DAG); |
413 | case ISD::FRAMEADDR: |
414 | return lowerFRAMEADDR(Op, DAG); |
415 | case ISD::RETURNADDR: |
416 | return lowerRETURNADDR(Op, DAG); |
417 | case ISD::WRITE_REGISTER: |
418 | return lowerWRITE_REGISTER(Op, DAG); |
419 | case ISD::INSERT_VECTOR_ELT: |
420 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
421 | case ISD::EXTRACT_VECTOR_ELT: |
422 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
423 | case ISD::BUILD_VECTOR: |
424 | return lowerBUILD_VECTOR(Op, DAG); |
425 | case ISD::VECTOR_SHUFFLE: |
426 | return lowerVECTOR_SHUFFLE(Op, DAG); |
427 | } |
428 | return SDValue(); |
429 | } |
430 | |
431 | /// Determine whether a range fits a regular pattern of values. |
432 | /// This function accounts for the possibility of jumping over the End iterator. |
433 | template <typename ValType> |
434 | static bool |
435 | fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, |
436 | unsigned CheckStride, |
437 | typename SmallVectorImpl<ValType>::const_iterator End, |
438 | ValType ExpectedIndex, unsigned ExpectedIndexStride) { |
439 | auto &I = Begin; |
440 | |
441 | while (I != End) { |
442 | if (*I != -1 && *I != ExpectedIndex) |
443 | return false; |
444 | ExpectedIndex += ExpectedIndexStride; |
445 | |
446 | // Incrementing past End is undefined behaviour so we must increment one |
447 | // step at a time and check for End at each step. |
448 | for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) |
449 | ; // Empty loop body. |
450 | } |
451 | return true; |
452 | } |
453 | |
454 | /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). |
455 | /// |
456 | /// VREPLVEI performs vector broadcast based on an element specified by an |
457 | /// integer immediate, with its mask being similar to: |
458 | /// <x, x, x, ...> |
459 | /// where x is any valid index. |
460 | /// |
461 | /// When undef's appear in the mask they are treated as if they were whatever |
462 | /// value is necessary in order to fit the above form. |
463 | static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, |
464 | MVT VT, SDValue V1, SDValue V2, |
465 | SelectionDAG &DAG) { |
466 | int SplatIndex = -1; |
467 | for (const auto &M : Mask) { |
468 | if (M != -1) { |
469 | SplatIndex = M; |
470 | break; |
471 | } |
472 | } |
473 | |
474 | if (SplatIndex == -1) |
475 | return DAG.getUNDEF(VT); |
476 | |
477 | assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index" ); |
478 | if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) { |
479 | APInt Imm(64, SplatIndex); |
480 | return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1, |
481 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
482 | } |
483 | |
484 | return SDValue(); |
485 | } |
486 | |
487 | /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). |
488 | /// |
489 | /// VSHUF4I splits the vector into blocks of four elements, then shuffles these |
490 | /// elements according to a <4 x i2> constant (encoded as an integer immediate). |
491 | /// |
492 | /// It is therefore possible to lower into VSHUF4I when the mask takes the form: |
493 | /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> |
494 | /// When undef's appear they are treated as if they were whatever value is |
495 | /// necessary in order to fit the above forms. |
496 | /// |
497 | /// For example: |
498 | /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, |
499 | /// <8 x i32> <i32 3, i32 2, i32 1, i32 0, |
500 | /// i32 7, i32 6, i32 5, i32 4> |
501 | /// is lowered to: |
502 | /// (VSHUF4I_H $v0, $v1, 27) |
503 | /// where the 27 comes from: |
504 | /// 3 + (2 << 2) + (1 << 4) + (0 << 6) |
505 | static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, |
506 | MVT VT, SDValue V1, SDValue V2, |
507 | SelectionDAG &DAG) { |
508 | |
509 | // When the size is less than 4, lower cost instructions may be used. |
510 | if (Mask.size() < 4) |
511 | return SDValue(); |
512 | |
513 | int SubMask[4] = {-1, -1, -1, -1}; |
514 | for (unsigned i = 0; i < 4; ++i) { |
515 | for (unsigned j = i; j < Mask.size(); j += 4) { |
516 | int Idx = Mask[j]; |
517 | |
518 | // Convert from vector index to 4-element subvector index |
519 | // If an index refers to an element outside of the subvector then give up |
520 | if (Idx != -1) { |
521 | Idx -= 4 * (j / 4); |
522 | if (Idx < 0 || Idx >= 4) |
523 | return SDValue(); |
524 | } |
525 | |
526 | // If the mask has an undef, replace it with the current index. |
527 | // Note that it might still be undef if the current index is also undef |
528 | if (SubMask[i] == -1) |
529 | SubMask[i] = Idx; |
530 | // Check that non-undef values are the same as in the mask. If they |
531 | // aren't then give up |
532 | else if (Idx != -1 && Idx != SubMask[i]) |
533 | return SDValue(); |
534 | } |
535 | } |
536 | |
537 | // Calculate the immediate. Replace any remaining undefs with zero |
538 | APInt Imm(64, 0); |
539 | for (int i = 3; i >= 0; --i) { |
540 | int Idx = SubMask[i]; |
541 | |
542 | if (Idx == -1) |
543 | Idx = 0; |
544 | |
545 | Imm <<= 2; |
546 | Imm |= Idx & 0x3; |
547 | } |
548 | |
549 | return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1, |
550 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
551 | } |
552 | |
553 | /// Lower VECTOR_SHUFFLE into VPACKEV (if possible). |
554 | /// |
555 | /// VPACKEV interleaves the even elements from each vector. |
556 | /// |
557 | /// It is possible to lower into VPACKEV when the mask consists of two of the |
558 | /// following forms interleaved: |
559 | /// <0, 2, 4, ...> |
560 | /// <n, n+2, n+4, ...> |
561 | /// where n is the number of elements in the vector. |
562 | /// For example: |
563 | /// <0, 0, 2, 2, 4, 4, ...> |
564 | /// <0, n, 2, n+2, 4, n+4, ...> |
565 | /// |
566 | /// When undef's appear in the mask they are treated as if they were whatever |
567 | /// value is necessary in order to fit the above forms. |
568 | static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask, |
569 | MVT VT, SDValue V1, SDValue V2, |
570 | SelectionDAG &DAG) { |
571 | |
572 | const auto &Begin = Mask.begin(); |
573 | const auto &End = Mask.end(); |
574 | SDValue OriV1 = V1, OriV2 = V2; |
575 | |
576 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
577 | V1 = OriV1; |
578 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
579 | V1 = OriV2; |
580 | else |
581 | return SDValue(); |
582 | |
583 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
584 | V2 = OriV1; |
585 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
586 | V2 = OriV2; |
587 | else |
588 | return SDValue(); |
589 | |
590 | return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1); |
591 | } |
592 | |
593 | /// Lower VECTOR_SHUFFLE into VPACKOD (if possible). |
594 | /// |
595 | /// VPACKOD interleaves the odd elements from each vector. |
596 | /// |
597 | /// It is possible to lower into VPACKOD when the mask consists of two of the |
598 | /// following forms interleaved: |
599 | /// <1, 3, 5, ...> |
600 | /// <n+1, n+3, n+5, ...> |
601 | /// where n is the number of elements in the vector. |
602 | /// For example: |
603 | /// <1, 1, 3, 3, 5, 5, ...> |
604 | /// <1, n+1, 3, n+3, 5, n+5, ...> |
605 | /// |
606 | /// When undef's appear in the mask they are treated as if they were whatever |
607 | /// value is necessary in order to fit the above forms. |
608 | static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask, |
609 | MVT VT, SDValue V1, SDValue V2, |
610 | SelectionDAG &DAG) { |
611 | |
612 | const auto &Begin = Mask.begin(); |
613 | const auto &End = Mask.end(); |
614 | SDValue OriV1 = V1, OriV2 = V2; |
615 | |
616 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
617 | V1 = OriV1; |
618 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
619 | V1 = OriV2; |
620 | else |
621 | return SDValue(); |
622 | |
623 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
624 | V2 = OriV1; |
625 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
626 | V2 = OriV2; |
627 | else |
628 | return SDValue(); |
629 | |
630 | return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1); |
631 | } |
632 | |
633 | /// Lower VECTOR_SHUFFLE into VILVH (if possible). |
634 | /// |
635 | /// VILVH interleaves consecutive elements from the left (highest-indexed) half |
636 | /// of each vector. |
637 | /// |
638 | /// It is possible to lower into VILVH when the mask consists of two of the |
639 | /// following forms interleaved: |
640 | /// <x, x+1, x+2, ...> |
641 | /// <n+x, n+x+1, n+x+2, ...> |
642 | /// where n is the number of elements in the vector and x is half n. |
643 | /// For example: |
644 | /// <x, x, x+1, x+1, x+2, x+2, ...> |
645 | /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> |
646 | /// |
647 | /// When undef's appear in the mask they are treated as if they were whatever |
648 | /// value is necessary in order to fit the above forms. |
649 | static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask, |
650 | MVT VT, SDValue V1, SDValue V2, |
651 | SelectionDAG &DAG) { |
652 | |
653 | const auto &Begin = Mask.begin(); |
654 | const auto &End = Mask.end(); |
655 | unsigned HalfSize = Mask.size() / 2; |
656 | SDValue OriV1 = V1, OriV2 = V2; |
657 | |
658 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
659 | V1 = OriV1; |
660 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
661 | V1 = OriV2; |
662 | else |
663 | return SDValue(); |
664 | |
665 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
666 | V2 = OriV1; |
667 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, |
668 | ExpectedIndexStride: 1)) |
669 | V2 = OriV2; |
670 | else |
671 | return SDValue(); |
672 | |
673 | return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1); |
674 | } |
675 | |
676 | /// Lower VECTOR_SHUFFLE into VILVL (if possible). |
677 | /// |
678 | /// VILVL interleaves consecutive elements from the right (lowest-indexed) half |
679 | /// of each vector. |
680 | /// |
681 | /// It is possible to lower into VILVL when the mask consists of two of the |
682 | /// following forms interleaved: |
683 | /// <0, 1, 2, ...> |
684 | /// <n, n+1, n+2, ...> |
685 | /// where n is the number of elements in the vector. |
686 | /// For example: |
687 | /// <0, 0, 1, 1, 2, 2, ...> |
688 | /// <0, n, 1, n+1, 2, n+2, ...> |
689 | /// |
690 | /// When undef's appear in the mask they are treated as if they were whatever |
691 | /// value is necessary in order to fit the above forms. |
692 | static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask, |
693 | MVT VT, SDValue V1, SDValue V2, |
694 | SelectionDAG &DAG) { |
695 | |
696 | const auto &Begin = Mask.begin(); |
697 | const auto &End = Mask.end(); |
698 | SDValue OriV1 = V1, OriV2 = V2; |
699 | |
700 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
701 | V1 = OriV1; |
702 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1)) |
703 | V1 = OriV2; |
704 | else |
705 | return SDValue(); |
706 | |
707 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
708 | V2 = OriV1; |
709 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1)) |
710 | V2 = OriV2; |
711 | else |
712 | return SDValue(); |
713 | |
714 | return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1); |
715 | } |
716 | |
717 | /// Lower VECTOR_SHUFFLE into VPICKEV (if possible). |
718 | /// |
719 | /// VPICKEV copies the even elements of each vector into the result vector. |
720 | /// |
721 | /// It is possible to lower into VPICKEV when the mask consists of two of the |
722 | /// following forms concatenated: |
723 | /// <0, 2, 4, ...> |
724 | /// <n, n+2, n+4, ...> |
725 | /// where n is the number of elements in the vector. |
726 | /// For example: |
727 | /// <0, 2, 4, ..., 0, 2, 4, ...> |
728 | /// <0, 2, 4, ..., n, n+2, n+4, ...> |
729 | /// |
730 | /// When undef's appear in the mask they are treated as if they were whatever |
731 | /// value is necessary in order to fit the above forms. |
732 | static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask, |
733 | MVT VT, SDValue V1, SDValue V2, |
734 | SelectionDAG &DAG) { |
735 | |
736 | const auto &Begin = Mask.begin(); |
737 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
738 | const auto &End = Mask.end(); |
739 | SDValue OriV1 = V1, OriV2 = V2; |
740 | |
741 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
742 | V1 = OriV1; |
743 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
744 | V1 = OriV2; |
745 | else |
746 | return SDValue(); |
747 | |
748 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
749 | V2 = OriV1; |
750 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
751 | V2 = OriV2; |
752 | |
753 | else |
754 | return SDValue(); |
755 | |
756 | return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1); |
757 | } |
758 | |
759 | /// Lower VECTOR_SHUFFLE into VPICKOD (if possible). |
760 | /// |
761 | /// VPICKOD copies the odd elements of each vector into the result vector. |
762 | /// |
763 | /// It is possible to lower into VPICKOD when the mask consists of two of the |
764 | /// following forms concatenated: |
765 | /// <1, 3, 5, ...> |
766 | /// <n+1, n+3, n+5, ...> |
767 | /// where n is the number of elements in the vector. |
768 | /// For example: |
769 | /// <1, 3, 5, ..., 1, 3, 5, ...> |
770 | /// <1, 3, 5, ..., n+1, n+3, n+5, ...> |
771 | /// |
772 | /// When undef's appear in the mask they are treated as if they were whatever |
773 | /// value is necessary in order to fit the above forms. |
774 | static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask, |
775 | MVT VT, SDValue V1, SDValue V2, |
776 | SelectionDAG &DAG) { |
777 | |
778 | const auto &Begin = Mask.begin(); |
779 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
780 | const auto &End = Mask.end(); |
781 | SDValue OriV1 = V1, OriV2 = V2; |
782 | |
783 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
784 | V1 = OriV1; |
785 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
786 | V1 = OriV2; |
787 | else |
788 | return SDValue(); |
789 | |
790 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
791 | V2 = OriV1; |
792 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
793 | V2 = OriV2; |
794 | else |
795 | return SDValue(); |
796 | |
797 | return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1); |
798 | } |
799 | |
800 | /// Lower VECTOR_SHUFFLE into VSHUF. |
801 | /// |
802 | /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and |
803 | /// adding it as an operand to the resulting VSHUF. |
804 | static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask, |
805 | MVT VT, SDValue V1, SDValue V2, |
806 | SelectionDAG &DAG) { |
807 | |
808 | SmallVector<SDValue, 16> Ops; |
809 | for (auto M : Mask) |
810 | Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64)); |
811 | |
812 | EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); |
813 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops); |
814 | |
815 | // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. |
816 | // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> |
817 | // VSHF concatenates the vectors in a bitwise fashion: |
818 | // <0b00, 0b01> + <0b10, 0b11> -> |
819 | // 0b0100 + 0b1110 -> 0b01001110 |
820 | // <0b10, 0b11, 0b00, 0b01> |
821 | // We must therefore swap the operands to get the correct result. |
822 | return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1); |
823 | } |
824 | |
825 | /// Dispatching routine to lower various 128-bit LoongArch vector shuffles. |
826 | /// |
827 | /// This routine breaks down the specific type of 128-bit shuffle and |
828 | /// dispatches to the lowering routines accordingly. |
829 | static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
830 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
831 | assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || |
832 | VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || |
833 | VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && |
834 | "Vector type is unsupported for lsx!" ); |
835 | assert(V1.getSimpleValueType() == V2.getSimpleValueType() && |
836 | "Two operands have different types!" ); |
837 | assert(VT.getVectorNumElements() == Mask.size() && |
838 | "Unexpected mask size for shuffle!" ); |
839 | assert(Mask.size() % 2 == 0 && "Expected even mask size." ); |
840 | |
841 | SDValue Result; |
842 | // TODO: Add more comparison patterns. |
843 | if (V2.isUndef()) { |
844 | if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) |
845 | return Result; |
846 | if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) |
847 | return Result; |
848 | |
849 | // TODO: This comment may be enabled in the future to better match the |
850 | // pattern for instruction selection. |
851 | /* V2 = V1; */ |
852 | } |
853 | |
854 | // It is recommended not to change the pattern comparison order for better |
855 | // performance. |
856 | if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) |
857 | return Result; |
858 | if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) |
859 | return Result; |
860 | if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) |
861 | return Result; |
862 | if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) |
863 | return Result; |
864 | if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) |
865 | return Result; |
866 | if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) |
867 | return Result; |
868 | if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) |
869 | return Result; |
870 | |
871 | return SDValue(); |
872 | } |
873 | |
874 | /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). |
875 | /// |
876 | /// It is a XVREPLVEI when the mask is: |
877 | /// <x, x, x, ..., x+n, x+n, x+n, ...> |
878 | /// where the number of x is equal to n and n is half the length of vector. |
879 | /// |
880 | /// When undef's appear in the mask they are treated as if they were whatever |
881 | /// value is necessary in order to fit the above form. |
882 | static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, |
883 | ArrayRef<int> Mask, MVT VT, |
884 | SDValue V1, SDValue V2, |
885 | SelectionDAG &DAG) { |
886 | int SplatIndex = -1; |
887 | for (const auto &M : Mask) { |
888 | if (M != -1) { |
889 | SplatIndex = M; |
890 | break; |
891 | } |
892 | } |
893 | |
894 | if (SplatIndex == -1) |
895 | return DAG.getUNDEF(VT); |
896 | |
897 | const auto &Begin = Mask.begin(); |
898 | const auto &End = Mask.end(); |
899 | unsigned HalfSize = Mask.size() / 2; |
900 | |
901 | assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index" ); |
902 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) && |
903 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize, |
904 | ExpectedIndexStride: 0)) { |
905 | APInt Imm(64, SplatIndex); |
906 | return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1, |
907 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
908 | } |
909 | |
910 | return SDValue(); |
911 | } |
912 | |
913 | /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). |
914 | static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, |
915 | MVT VT, SDValue V1, SDValue V2, |
916 | SelectionDAG &DAG) { |
917 | // When the size is less than or equal to 4, lower cost instructions may be |
918 | // used. |
919 | if (Mask.size() <= 4) |
920 | return SDValue(); |
921 | return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); |
922 | } |
923 | |
924 | /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). |
925 | static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask, |
926 | MVT VT, SDValue V1, SDValue V2, |
927 | SelectionDAG &DAG) { |
928 | return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); |
929 | } |
930 | |
931 | /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). |
932 | static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask, |
933 | MVT VT, SDValue V1, SDValue V2, |
934 | SelectionDAG &DAG) { |
935 | return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); |
936 | } |
937 | |
938 | /// Lower VECTOR_SHUFFLE into XVILVH (if possible). |
939 | static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask, |
940 | MVT VT, SDValue V1, SDValue V2, |
941 | SelectionDAG &DAG) { |
942 | |
943 | const auto &Begin = Mask.begin(); |
944 | const auto &End = Mask.end(); |
945 | unsigned HalfSize = Mask.size() / 2; |
946 | unsigned LeftSize = HalfSize / 2; |
947 | SDValue OriV1 = V1, OriV2 = V2; |
948 | |
949 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize, |
950 | ExpectedIndexStride: 1) && |
951 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1)) |
952 | V1 = OriV1; |
953 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, |
954 | ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) && |
955 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, |
956 | ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1)) |
957 | V1 = OriV2; |
958 | else |
959 | return SDValue(); |
960 | |
961 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize, |
962 | ExpectedIndexStride: 1) && |
963 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, |
964 | ExpectedIndexStride: 1)) |
965 | V2 = OriV1; |
966 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, |
967 | ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) && |
968 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, |
969 | ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1)) |
970 | V2 = OriV2; |
971 | else |
972 | return SDValue(); |
973 | |
974 | return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1); |
975 | } |
976 | |
977 | /// Lower VECTOR_SHUFFLE into XVILVL (if possible). |
978 | static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask, |
979 | MVT VT, SDValue V1, SDValue V2, |
980 | SelectionDAG &DAG) { |
981 | |
982 | const auto &Begin = Mask.begin(); |
983 | const auto &End = Mask.end(); |
984 | unsigned HalfSize = Mask.size() / 2; |
985 | SDValue OriV1 = V1, OriV2 = V2; |
986 | |
987 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) && |
988 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
989 | V1 = OriV1; |
990 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) && |
991 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, |
992 | ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
993 | V1 = OriV2; |
994 | else |
995 | return SDValue(); |
996 | |
997 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) && |
998 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
999 | V2 = OriV1; |
1000 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), |
1001 | ExpectedIndexStride: 1) && |
1002 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, |
1003 | ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
1004 | V2 = OriV2; |
1005 | else |
1006 | return SDValue(); |
1007 | |
1008 | return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1); |
1009 | } |
1010 | |
1011 | /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). |
1012 | static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask, |
1013 | MVT VT, SDValue V1, SDValue V2, |
1014 | SelectionDAG &DAG) { |
1015 | |
1016 | const auto &Begin = Mask.begin(); |
1017 | const auto &LeftMid = Mask.begin() + Mask.size() / 4; |
1018 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1019 | const auto &RightMid = Mask.end() - Mask.size() / 4; |
1020 | const auto &End = Mask.end(); |
1021 | unsigned HalfSize = Mask.size() / 2; |
1022 | SDValue OriV1 = V1, OriV2 = V2; |
1023 | |
1024 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) && |
1025 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2)) |
1026 | V1 = OriV1; |
1027 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) && |
1028 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2)) |
1029 | V1 = OriV2; |
1030 | else |
1031 | return SDValue(); |
1032 | |
1033 | if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) && |
1034 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2)) |
1035 | V2 = OriV1; |
1036 | else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) && |
1037 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2)) |
1038 | V2 = OriV2; |
1039 | |
1040 | else |
1041 | return SDValue(); |
1042 | |
1043 | return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1); |
1044 | } |
1045 | |
1046 | /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). |
1047 | static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask, |
1048 | MVT VT, SDValue V1, SDValue V2, |
1049 | SelectionDAG &DAG) { |
1050 | |
1051 | const auto &Begin = Mask.begin(); |
1052 | const auto &LeftMid = Mask.begin() + Mask.size() / 4; |
1053 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1054 | const auto &RightMid = Mask.end() - Mask.size() / 4; |
1055 | const auto &End = Mask.end(); |
1056 | unsigned HalfSize = Mask.size() / 2; |
1057 | SDValue OriV1 = V1, OriV2 = V2; |
1058 | |
1059 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) && |
1060 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2)) |
1061 | V1 = OriV1; |
1062 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) && |
1063 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1, |
1064 | ExpectedIndexStride: 2)) |
1065 | V1 = OriV2; |
1066 | else |
1067 | return SDValue(); |
1068 | |
1069 | if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) && |
1070 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2)) |
1071 | V2 = OriV1; |
1072 | else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) && |
1073 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1, |
1074 | ExpectedIndexStride: 2)) |
1075 | V2 = OriV2; |
1076 | else |
1077 | return SDValue(); |
1078 | |
1079 | return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1); |
1080 | } |
1081 | |
1082 | /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). |
1083 | static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask, |
1084 | MVT VT, SDValue V1, SDValue V2, |
1085 | SelectionDAG &DAG) { |
1086 | |
1087 | int MaskSize = Mask.size(); |
1088 | int HalfSize = Mask.size() / 2; |
1089 | const auto &Begin = Mask.begin(); |
1090 | const auto &Mid = Mask.begin() + HalfSize; |
1091 | const auto &End = Mask.end(); |
1092 | |
1093 | // VECTOR_SHUFFLE concatenates the vectors: |
1094 | // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> |
1095 | // shuffling -> |
1096 | // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> |
1097 | // |
1098 | // XVSHUF concatenates the vectors: |
1099 | // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7> |
1100 | // shuffling -> |
1101 | // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7> |
1102 | SmallVector<SDValue, 8> MaskAlloc; |
1103 | for (auto it = Begin; it < Mid; it++) { |
1104 | if (*it < 0) // UNDEF |
1105 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64)); |
1106 | else if ((*it >= 0 && *it < HalfSize) || |
1107 | (*it >= MaskSize && *it <= MaskSize + HalfSize)) { |
1108 | int M = *it < HalfSize ? *it : *it - HalfSize; |
1109 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64)); |
1110 | } else |
1111 | return SDValue(); |
1112 | } |
1113 | assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!" ); |
1114 | |
1115 | for (auto it = Mid; it < End; it++) { |
1116 | if (*it < 0) // UNDEF |
1117 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64)); |
1118 | else if ((*it >= HalfSize && *it < MaskSize) || |
1119 | (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { |
1120 | int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; |
1121 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64)); |
1122 | } else |
1123 | return SDValue(); |
1124 | } |
1125 | assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!" ); |
1126 | |
1127 | EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); |
1128 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc); |
1129 | return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1); |
1130 | } |
1131 | |
1132 | /// Shuffle vectors by lane to generate more optimized instructions. |
1133 | /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. |
1134 | /// |
1135 | /// Therefore, except for the following four cases, other cases are regarded |
1136 | /// as cross-lane shuffles, where optimization is relatively limited. |
1137 | /// |
1138 | /// - Shuffle high, low lanes of two inputs vector |
1139 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> |
1140 | /// - Shuffle low, high lanes of two inputs vector |
1141 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> |
1142 | /// - Shuffle low, low lanes of two inputs vector |
1143 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> |
1144 | /// - Shuffle high, high lanes of two inputs vector |
1145 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> |
1146 | /// |
1147 | /// The first case is the closest to LoongArch instructions and the other |
1148 | /// cases need to be converted to it for processing. |
1149 | /// |
1150 | /// This function may modify V1, V2 and Mask |
1151 | static void canonicalizeShuffleVectorByLane(const SDLoc &DL, |
1152 | MutableArrayRef<int> Mask, MVT VT, |
1153 | SDValue &V1, SDValue &V2, |
1154 | SelectionDAG &DAG) { |
1155 | |
1156 | enum HalfMaskType { HighLaneTy, LowLaneTy, None }; |
1157 | |
1158 | int MaskSize = Mask.size(); |
1159 | int HalfSize = Mask.size() / 2; |
1160 | |
1161 | HalfMaskType preMask = None, postMask = None; |
1162 | |
1163 | if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) { |
1164 | return M < 0 || (M >= 0 && M < HalfSize) || |
1165 | (M >= MaskSize && M < MaskSize + HalfSize); |
1166 | })) |
1167 | preMask = HighLaneTy; |
1168 | else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) { |
1169 | return M < 0 || (M >= HalfSize && M < MaskSize) || |
1170 | (M >= MaskSize + HalfSize && M < MaskSize * 2); |
1171 | })) |
1172 | preMask = LowLaneTy; |
1173 | |
1174 | if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) { |
1175 | return M < 0 || (M >= 0 && M < HalfSize) || |
1176 | (M >= MaskSize && M < MaskSize + HalfSize); |
1177 | })) |
1178 | postMask = HighLaneTy; |
1179 | else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) { |
1180 | return M < 0 || (M >= HalfSize && M < MaskSize) || |
1181 | (M >= MaskSize + HalfSize && M < MaskSize * 2); |
1182 | })) |
1183 | postMask = LowLaneTy; |
1184 | |
1185 | // The pre-half of mask is high lane type, and the post-half of mask |
1186 | // is low lane type, which is closest to the LoongArch instructions. |
1187 | // |
1188 | // Note: In the LoongArch architecture, the high lane of mask corresponds |
1189 | // to the lower 128-bit of vector register, and the low lane of mask |
1190 | // corresponds the higher 128-bit of vector register. |
1191 | if (preMask == HighLaneTy && postMask == LowLaneTy) { |
1192 | return; |
1193 | } |
1194 | if (preMask == LowLaneTy && postMask == HighLaneTy) { |
1195 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
1196 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
1197 | N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64)); |
1198 | V1 = DAG.getBitcast(VT, V: V1); |
1199 | |
1200 | if (!V2.isUndef()) { |
1201 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
1202 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
1203 | N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64)); |
1204 | V2 = DAG.getBitcast(VT, V: V2); |
1205 | } |
1206 | |
1207 | for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { |
1208 | *it = *it < 0 ? *it : *it - HalfSize; |
1209 | } |
1210 | for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { |
1211 | *it = *it < 0 ? *it : *it + HalfSize; |
1212 | } |
1213 | } else if (preMask == LowLaneTy && postMask == LowLaneTy) { |
1214 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
1215 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
1216 | N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64)); |
1217 | V1 = DAG.getBitcast(VT, V: V1); |
1218 | |
1219 | if (!V2.isUndef()) { |
1220 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
1221 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
1222 | N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64)); |
1223 | V2 = DAG.getBitcast(VT, V: V2); |
1224 | } |
1225 | |
1226 | for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { |
1227 | *it = *it < 0 ? *it : *it - HalfSize; |
1228 | } |
1229 | } else if (preMask == HighLaneTy && postMask == HighLaneTy) { |
1230 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
1231 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
1232 | N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64)); |
1233 | V1 = DAG.getBitcast(VT, V: V1); |
1234 | |
1235 | if (!V2.isUndef()) { |
1236 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
1237 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
1238 | N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64)); |
1239 | V2 = DAG.getBitcast(VT, V: V2); |
1240 | } |
1241 | |
1242 | for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { |
1243 | *it = *it < 0 ? *it : *it + HalfSize; |
1244 | } |
1245 | } else { // cross-lane |
1246 | return; |
1247 | } |
1248 | } |
1249 | |
1250 | /// Dispatching routine to lower various 256-bit LoongArch vector shuffles. |
1251 | /// |
1252 | /// This routine breaks down the specific type of 256-bit shuffle and |
1253 | /// dispatches to the lowering routines accordingly. |
1254 | static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
1255 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
1256 | assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || |
1257 | VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || |
1258 | VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && |
1259 | "Vector type is unsupported for lasx!" ); |
1260 | assert(V1.getSimpleValueType() == V2.getSimpleValueType() && |
1261 | "Two operands have different types!" ); |
1262 | assert(VT.getVectorNumElements() == Mask.size() && |
1263 | "Unexpected mask size for shuffle!" ); |
1264 | assert(Mask.size() % 2 == 0 && "Expected even mask size." ); |
1265 | assert(Mask.size() >= 4 && "Mask size is less than 4." ); |
1266 | |
1267 | // canonicalize non cross-lane shuffle vector |
1268 | SmallVector<int> NewMask(Mask); |
1269 | canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG); |
1270 | |
1271 | SDValue Result; |
1272 | // TODO: Add more comparison patterns. |
1273 | if (V2.isUndef()) { |
1274 | if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1275 | return Result; |
1276 | if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1277 | return Result; |
1278 | |
1279 | // TODO: This comment may be enabled in the future to better match the |
1280 | // pattern for instruction selection. |
1281 | /* V2 = V1; */ |
1282 | } |
1283 | |
1284 | // It is recommended not to change the pattern comparison order for better |
1285 | // performance. |
1286 | if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1287 | return Result; |
1288 | if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1289 | return Result; |
1290 | if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1291 | return Result; |
1292 | if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1293 | return Result; |
1294 | if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1295 | return Result; |
1296 | if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1297 | return Result; |
1298 | if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG))) |
1299 | return Result; |
1300 | |
1301 | return SDValue(); |
1302 | } |
1303 | |
1304 | SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
1305 | SelectionDAG &DAG) const { |
1306 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op); |
1307 | ArrayRef<int> OrigMask = SVOp->getMask(); |
1308 | SDValue V1 = Op.getOperand(i: 0); |
1309 | SDValue V2 = Op.getOperand(i: 1); |
1310 | MVT VT = Op.getSimpleValueType(); |
1311 | int NumElements = VT.getVectorNumElements(); |
1312 | SDLoc DL(Op); |
1313 | |
1314 | bool V1IsUndef = V1.isUndef(); |
1315 | bool V2IsUndef = V2.isUndef(); |
1316 | if (V1IsUndef && V2IsUndef) |
1317 | return DAG.getUNDEF(VT); |
1318 | |
1319 | // When we create a shuffle node we put the UNDEF node to second operand, |
1320 | // but in some cases the first operand may be transformed to UNDEF. |
1321 | // In this case we should just commute the node. |
1322 | if (V1IsUndef) |
1323 | return DAG.getCommutedVectorShuffle(SV: *SVOp); |
1324 | |
1325 | // Check for non-undef masks pointing at an undef vector and make the masks |
1326 | // undef as well. This makes it easier to match the shuffle based solely on |
1327 | // the mask. |
1328 | if (V2IsUndef && |
1329 | any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) { |
1330 | SmallVector<int, 8> NewMask(OrigMask); |
1331 | for (int &M : NewMask) |
1332 | if (M >= NumElements) |
1333 | M = -1; |
1334 | return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask); |
1335 | } |
1336 | |
1337 | // Check for illegal shuffle mask element index values. |
1338 | int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); |
1339 | (void)MaskUpperLimit; |
1340 | assert(llvm::all_of(OrigMask, |
1341 | [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && |
1342 | "Out of bounds shuffle index" ); |
1343 | |
1344 | // For each vector width, delegate to a specialized lowering routine. |
1345 | if (VT.is128BitVector()) |
1346 | return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG); |
1347 | |
1348 | if (VT.is256BitVector()) |
1349 | return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG); |
1350 | |
1351 | return SDValue(); |
1352 | } |
1353 | |
1354 | static bool isConstantOrUndef(const SDValue Op) { |
1355 | if (Op->isUndef()) |
1356 | return true; |
1357 | if (isa<ConstantSDNode>(Val: Op)) |
1358 | return true; |
1359 | if (isa<ConstantFPSDNode>(Val: Op)) |
1360 | return true; |
1361 | return false; |
1362 | } |
1363 | |
1364 | static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { |
1365 | for (unsigned i = 0; i < Op->getNumOperands(); ++i) |
1366 | if (isConstantOrUndef(Op: Op->getOperand(Num: i))) |
1367 | return true; |
1368 | return false; |
1369 | } |
1370 | |
1371 | SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, |
1372 | SelectionDAG &DAG) const { |
1373 | BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op); |
1374 | EVT ResTy = Op->getValueType(ResNo: 0); |
1375 | SDLoc DL(Op); |
1376 | APInt SplatValue, SplatUndef; |
1377 | unsigned SplatBitSize; |
1378 | bool HasAnyUndefs; |
1379 | bool Is128Vec = ResTy.is128BitVector(); |
1380 | bool Is256Vec = ResTy.is256BitVector(); |
1381 | |
1382 | if ((!Subtarget.hasExtLSX() || !Is128Vec) && |
1383 | (!Subtarget.hasExtLASX() || !Is256Vec)) |
1384 | return SDValue(); |
1385 | |
1386 | if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
1387 | /*MinSplatBits=*/8) && |
1388 | SplatBitSize <= 64) { |
1389 | // We can only cope with 8, 16, 32, or 64-bit elements. |
1390 | if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && |
1391 | SplatBitSize != 64) |
1392 | return SDValue(); |
1393 | |
1394 | EVT ViaVecTy; |
1395 | |
1396 | switch (SplatBitSize) { |
1397 | default: |
1398 | return SDValue(); |
1399 | case 8: |
1400 | ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; |
1401 | break; |
1402 | case 16: |
1403 | ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; |
1404 | break; |
1405 | case 32: |
1406 | ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; |
1407 | break; |
1408 | case 64: |
1409 | ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; |
1410 | break; |
1411 | } |
1412 | |
1413 | // SelectionDAG::getConstant will promote SplatValue appropriately. |
1414 | SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy); |
1415 | |
1416 | // Bitcast to the type we originally wanted. |
1417 | if (ViaVecTy != ResTy) |
1418 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result); |
1419 | |
1420 | return Result; |
1421 | } |
1422 | |
1423 | if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false)) |
1424 | return Op; |
1425 | |
1426 | if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) { |
1427 | // Use INSERT_VECTOR_ELT operations rather than expand to stores. |
1428 | // The resulting code is the same length as the expansion, but it doesn't |
1429 | // use memory operations. |
1430 | EVT ResTy = Node->getValueType(ResNo: 0); |
1431 | |
1432 | assert(ResTy.isVector()); |
1433 | |
1434 | unsigned NumElts = ResTy.getVectorNumElements(); |
1435 | SDValue Vector = DAG.getUNDEF(VT: ResTy); |
1436 | for (unsigned i = 0; i < NumElts; ++i) { |
1437 | Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, |
1438 | N2: Node->getOperand(Num: i), |
1439 | N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT())); |
1440 | } |
1441 | return Vector; |
1442 | } |
1443 | |
1444 | return SDValue(); |
1445 | } |
1446 | |
1447 | SDValue |
1448 | LoongArchTargetLowering::(SDValue Op, |
1449 | SelectionDAG &DAG) const { |
1450 | EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0); |
1451 | SDValue Idx = Op->getOperand(Num: 1); |
1452 | EVT EltTy = VecTy.getVectorElementType(); |
1453 | unsigned NumElts = VecTy.getVectorNumElements(); |
1454 | |
1455 | if (isa<ConstantSDNode>(Val: Idx) && |
1456 | (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || |
1457 | EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) |
1458 | return Op; |
1459 | |
1460 | return SDValue(); |
1461 | } |
1462 | |
1463 | SDValue |
1464 | LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
1465 | SelectionDAG &DAG) const { |
1466 | if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2))) |
1467 | return Op; |
1468 | return SDValue(); |
1469 | } |
1470 | |
1471 | SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, |
1472 | SelectionDAG &DAG) const { |
1473 | SDLoc DL(Op); |
1474 | SyncScope::ID FenceSSID = |
1475 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
1476 | |
1477 | // singlethread fences only synchronize with signal handlers on the same |
1478 | // thread and thus only need to preserve instruction order, not actually |
1479 | // enforce memory ordering. |
1480 | if (FenceSSID == SyncScope::SingleThread) |
1481 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
1482 | return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0)); |
1483 | |
1484 | return Op; |
1485 | } |
1486 | |
1487 | SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, |
1488 | SelectionDAG &DAG) const { |
1489 | |
1490 | if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) { |
1491 | DAG.getContext()->emitError( |
1492 | ErrorStr: "On LA64, only 64-bit registers can be written." ); |
1493 | return Op.getOperand(i: 0); |
1494 | } |
1495 | |
1496 | if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) { |
1497 | DAG.getContext()->emitError( |
1498 | ErrorStr: "On LA32, only 32-bit registers can be written." ); |
1499 | return Op.getOperand(i: 0); |
1500 | } |
1501 | |
1502 | return Op; |
1503 | } |
1504 | |
1505 | SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, |
1506 | SelectionDAG &DAG) const { |
1507 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) { |
1508 | DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must " |
1509 | "be a constant integer" ); |
1510 | return SDValue(); |
1511 | } |
1512 | |
1513 | MachineFunction &MF = DAG.getMachineFunction(); |
1514 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
1515 | Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); |
1516 | EVT VT = Op.getValueType(); |
1517 | SDLoc DL(Op); |
1518 | SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT); |
1519 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
1520 | int GRLenInBytes = Subtarget.getGRLen() / 8; |
1521 | |
1522 | while (Depth--) { |
1523 | int Offset = -(GRLenInBytes * 2); |
1524 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, |
1525 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
1526 | FrameAddr = |
1527 | DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo()); |
1528 | } |
1529 | return FrameAddr; |
1530 | } |
1531 | |
1532 | SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, |
1533 | SelectionDAG &DAG) const { |
1534 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
1535 | return SDValue(); |
1536 | |
1537 | // Currently only support lowering return address for current frame. |
1538 | if (Op.getConstantOperandVal(i: 0) != 0) { |
1539 | DAG.getContext()->emitError( |
1540 | ErrorStr: "return address can only be determined for the current frame" ); |
1541 | return SDValue(); |
1542 | } |
1543 | |
1544 | MachineFunction &MF = DAG.getMachineFunction(); |
1545 | MF.getFrameInfo().setReturnAddressIsTaken(true); |
1546 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1547 | |
1548 | // Return the value of the return address register, marking it an implicit |
1549 | // live-in. |
1550 | Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(), |
1551 | RC: getRegClassFor(VT: GRLenVT)); |
1552 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT); |
1553 | } |
1554 | |
1555 | SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
1556 | SelectionDAG &DAG) const { |
1557 | MachineFunction &MF = DAG.getMachineFunction(); |
1558 | auto Size = Subtarget.getGRLen() / 8; |
1559 | auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false); |
1560 | return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
1561 | } |
1562 | |
1563 | SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, |
1564 | SelectionDAG &DAG) const { |
1565 | MachineFunction &MF = DAG.getMachineFunction(); |
1566 | auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); |
1567 | |
1568 | SDLoc DL(Op); |
1569 | SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), |
1570 | VT: getPointerTy(DL: MF.getDataLayout())); |
1571 | |
1572 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
1573 | // memory location argument. |
1574 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
1575 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1), |
1576 | PtrInfo: MachinePointerInfo(SV)); |
1577 | } |
1578 | |
1579 | SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, |
1580 | SelectionDAG &DAG) const { |
1581 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
1582 | !Subtarget.hasBasicD() && "unexpected target features" ); |
1583 | |
1584 | SDLoc DL(Op); |
1585 | SDValue Op0 = Op.getOperand(i: 0); |
1586 | if (Op0->getOpcode() == ISD::AND) { |
1587 | auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1)); |
1588 | if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) |
1589 | return Op; |
1590 | } |
1591 | |
1592 | if (Op0->getOpcode() == LoongArchISD::BSTRPICK && |
1593 | Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) && |
1594 | Op0.getConstantOperandVal(i: 2) == UINT64_C(0)) |
1595 | return Op; |
1596 | |
1597 | if (Op0.getOpcode() == ISD::AssertZext && |
1598 | dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32)) |
1599 | return Op; |
1600 | |
1601 | EVT OpVT = Op0.getValueType(); |
1602 | EVT RetVT = Op.getValueType(); |
1603 | RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); |
1604 | MakeLibCallOptions CallOptions; |
1605 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
1606 | SDValue Chain = SDValue(); |
1607 | SDValue Result; |
1608 | std::tie(args&: Result, args&: Chain) = |
1609 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
1610 | return Result; |
1611 | } |
1612 | |
1613 | SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, |
1614 | SelectionDAG &DAG) const { |
1615 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
1616 | !Subtarget.hasBasicD() && "unexpected target features" ); |
1617 | |
1618 | SDLoc DL(Op); |
1619 | SDValue Op0 = Op.getOperand(i: 0); |
1620 | |
1621 | if ((Op0.getOpcode() == ISD::AssertSext || |
1622 | Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && |
1623 | dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32)) |
1624 | return Op; |
1625 | |
1626 | EVT OpVT = Op0.getValueType(); |
1627 | EVT RetVT = Op.getValueType(); |
1628 | RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); |
1629 | MakeLibCallOptions CallOptions; |
1630 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
1631 | SDValue Chain = SDValue(); |
1632 | SDValue Result; |
1633 | std::tie(args&: Result, args&: Chain) = |
1634 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
1635 | return Result; |
1636 | } |
1637 | |
1638 | SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, |
1639 | SelectionDAG &DAG) const { |
1640 | |
1641 | SDLoc DL(Op); |
1642 | SDValue Op0 = Op.getOperand(i: 0); |
1643 | |
1644 | if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && |
1645 | Subtarget.is64Bit() && Subtarget.hasBasicF()) { |
1646 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
1647 | return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0); |
1648 | } |
1649 | return Op; |
1650 | } |
1651 | |
1652 | SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, |
1653 | SelectionDAG &DAG) const { |
1654 | |
1655 | SDLoc DL(Op); |
1656 | |
1657 | if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && |
1658 | !Subtarget.hasBasicD()) { |
1659 | SDValue Dst = |
1660 | DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op.getOperand(i: 0)); |
1661 | return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst); |
1662 | } |
1663 | |
1664 | EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits()); |
1665 | SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op.getOperand(i: 0)); |
1666 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc); |
1667 | } |
1668 | |
1669 | static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, |
1670 | SelectionDAG &DAG, unsigned Flags) { |
1671 | return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags); |
1672 | } |
1673 | |
1674 | static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, |
1675 | SelectionDAG &DAG, unsigned Flags) { |
1676 | return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(), |
1677 | TargetFlags: Flags); |
1678 | } |
1679 | |
1680 | static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, |
1681 | SelectionDAG &DAG, unsigned Flags) { |
1682 | return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(), |
1683 | Offset: N->getOffset(), TargetFlags: Flags); |
1684 | } |
1685 | |
1686 | static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, |
1687 | SelectionDAG &DAG, unsigned Flags) { |
1688 | return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags); |
1689 | } |
1690 | |
1691 | template <class NodeTy> |
1692 | SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
1693 | CodeModel::Model M, |
1694 | bool IsLocal) const { |
1695 | SDLoc DL(N); |
1696 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1697 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
1698 | SDValue Load; |
1699 | |
1700 | switch (M) { |
1701 | default: |
1702 | report_fatal_error(reason: "Unsupported code model" ); |
1703 | |
1704 | case CodeModel::Large: { |
1705 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
1706 | |
1707 | // This is not actually used, but is necessary for successfully matching |
1708 | // the PseudoLA_*_LARGE nodes. |
1709 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1710 | if (IsLocal) { |
1711 | // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that |
1712 | // eventually becomes the desired 5-insn code sequence. |
1713 | Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty, |
1714 | Op1: Tmp, Op2: Addr), |
1715 | 0); |
1716 | } else { |
1717 | // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that |
1718 | // eventually becomes the desired 5-insn code sequence. |
1719 | Load = SDValue( |
1720 | DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), |
1721 | 0); |
1722 | } |
1723 | break; |
1724 | } |
1725 | |
1726 | case CodeModel::Small: |
1727 | case CodeModel::Medium: |
1728 | if (IsLocal) { |
1729 | // This generates the pattern (PseudoLA_PCREL sym), which expands to |
1730 | // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). |
1731 | Load = SDValue( |
1732 | DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0); |
1733 | } else { |
1734 | // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d |
1735 | // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). |
1736 | Load = |
1737 | SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0); |
1738 | } |
1739 | } |
1740 | |
1741 | if (!IsLocal) { |
1742 | // Mark the load instruction as invariant to enable hoisting in MachineLICM. |
1743 | MachineFunction &MF = DAG.getMachineFunction(); |
1744 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
1745 | PtrInfo: MachinePointerInfo::getGOT(MF), |
1746 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
1747 | MachineMemOperand::MOInvariant, |
1748 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
1749 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
1750 | } |
1751 | |
1752 | return Load; |
1753 | } |
1754 | |
1755 | SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, |
1756 | SelectionDAG &DAG) const { |
1757 | return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG, |
1758 | M: DAG.getTarget().getCodeModel()); |
1759 | } |
1760 | |
1761 | SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, |
1762 | SelectionDAG &DAG) const { |
1763 | return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG, |
1764 | M: DAG.getTarget().getCodeModel()); |
1765 | } |
1766 | |
1767 | SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, |
1768 | SelectionDAG &DAG) const { |
1769 | return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG, |
1770 | M: DAG.getTarget().getCodeModel()); |
1771 | } |
1772 | |
1773 | SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, |
1774 | SelectionDAG &DAG) const { |
1775 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
1776 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
1777 | auto CM = DAG.getTarget().getCodeModel(); |
1778 | const GlobalValue *GV = N->getGlobal(); |
1779 | |
1780 | if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) { |
1781 | if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel()) |
1782 | CM = *GCM; |
1783 | } |
1784 | |
1785 | return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal()); |
1786 | } |
1787 | |
1788 | SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
1789 | SelectionDAG &DAG, |
1790 | unsigned Opc, bool UseGOT, |
1791 | bool Large) const { |
1792 | SDLoc DL(N); |
1793 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1794 | MVT GRLenVT = Subtarget.getGRLenVT(); |
1795 | |
1796 | // This is not actually used, but is necessary for successfully matching the |
1797 | // PseudoLA_*_LARGE nodes. |
1798 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1799 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
1800 | SDValue Offset = Large |
1801 | ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
1802 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
1803 | if (UseGOT) { |
1804 | // Mark the load instruction as invariant to enable hoisting in MachineLICM. |
1805 | MachineFunction &MF = DAG.getMachineFunction(); |
1806 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
1807 | PtrInfo: MachinePointerInfo::getGOT(MF), |
1808 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
1809 | MachineMemOperand::MOInvariant, |
1810 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
1811 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp}); |
1812 | } |
1813 | |
1814 | // Add the thread pointer. |
1815 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset, |
1816 | N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT)); |
1817 | } |
1818 | |
1819 | SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
1820 | SelectionDAG &DAG, |
1821 | unsigned Opc, |
1822 | bool Large) const { |
1823 | SDLoc DL(N); |
1824 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1825 | IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits()); |
1826 | |
1827 | // This is not actually used, but is necessary for successfully matching the |
1828 | // PseudoLA_*_LARGE nodes. |
1829 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1830 | |
1831 | // Use a PC-relative addressing mode to access the dynamic GOT address. |
1832 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
1833 | SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
1834 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
1835 | |
1836 | // Prepare argument list to generate call. |
1837 | ArgListTy Args; |
1838 | ArgListEntry Entry; |
1839 | Entry.Node = Load; |
1840 | Entry.Ty = CallTy; |
1841 | Args.push_back(x: Entry); |
1842 | |
1843 | // Setup call to __tls_get_addr. |
1844 | TargetLowering::CallLoweringInfo CLI(DAG); |
1845 | CLI.setDebugLoc(DL) |
1846 | .setChain(DAG.getEntryNode()) |
1847 | .setLibCallee(CC: CallingConv::C, ResultType: CallTy, |
1848 | Target: DAG.getExternalSymbol(Sym: "__tls_get_addr" , VT: Ty), |
1849 | ArgsList: std::move(Args)); |
1850 | |
1851 | return LowerCallTo(CLI).first; |
1852 | } |
1853 | |
1854 | SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, |
1855 | SelectionDAG &DAG, unsigned Opc, |
1856 | bool Large) const { |
1857 | SDLoc DL(N); |
1858 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
1859 | const GlobalValue *GV = N->getGlobal(); |
1860 | |
1861 | // This is not actually used, but is necessary for successfully matching the |
1862 | // PseudoLA_*_LARGE nodes. |
1863 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
1864 | |
1865 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
1866 | // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym). |
1867 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
1868 | return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
1869 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
1870 | } |
1871 | |
1872 | SDValue |
1873 | LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
1874 | SelectionDAG &DAG) const { |
1875 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
1876 | CallingConv::GHC) |
1877 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
1878 | |
1879 | bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; |
1880 | assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64" ); |
1881 | |
1882 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
1883 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
1884 | |
1885 | if (DAG.getTarget().useEmulatedTLS()) |
1886 | report_fatal_error(reason: "the emulated TLS is prohibited" , |
1887 | /*GenCrashDiag=*/gen_crash_diag: false); |
1888 | |
1889 | bool IsDesc = DAG.getTarget().useTLSDESC(); |
1890 | |
1891 | switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) { |
1892 | case TLSModel::GeneralDynamic: |
1893 | // In this model, application code calls the dynamic linker function |
1894 | // __tls_get_addr to locate TLS offsets into the dynamic thread vector at |
1895 | // runtime. |
1896 | if (!IsDesc) |
1897 | return getDynamicTLSAddr(N, DAG, |
1898 | Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE |
1899 | : LoongArch::PseudoLA_TLS_GD, |
1900 | Large); |
1901 | break; |
1902 | case TLSModel::LocalDynamic: |
1903 | // Same as GeneralDynamic, except for assembly modifiers and relocation |
1904 | // records. |
1905 | if (!IsDesc) |
1906 | return getDynamicTLSAddr(N, DAG, |
1907 | Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE |
1908 | : LoongArch::PseudoLA_TLS_LD, |
1909 | Large); |
1910 | break; |
1911 | case TLSModel::InitialExec: |
1912 | // This model uses the GOT to resolve TLS offsets. |
1913 | return getStaticTLSAddr(N, DAG, |
1914 | Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE |
1915 | : LoongArch::PseudoLA_TLS_IE, |
1916 | /*UseGOT=*/true, Large); |
1917 | case TLSModel::LocalExec: |
1918 | // This model is used when static linking as the TLS offsets are resolved |
1919 | // during program linking. |
1920 | // |
1921 | // This node doesn't need an extra argument for the large code model. |
1922 | return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE, |
1923 | /*UseGOT=*/false); |
1924 | } |
1925 | |
1926 | return getTLSDescAddr(N, DAG, |
1927 | Opc: Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE |
1928 | : LoongArch::PseudoLA_TLS_DESC_PC, |
1929 | Large); |
1930 | } |
1931 | |
1932 | template <unsigned N> |
1933 | static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, |
1934 | SelectionDAG &DAG, bool IsSigned = false) { |
1935 | auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp)); |
1936 | // Check the ImmArg. |
1937 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
1938 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
1939 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + |
1940 | ": argument out of range." ); |
1941 | return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType()); |
1942 | } |
1943 | return SDValue(); |
1944 | } |
1945 | |
1946 | SDValue |
1947 | LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
1948 | SelectionDAG &DAG) const { |
1949 | SDLoc DL(Op); |
1950 | switch (Op.getConstantOperandVal(i: 0)) { |
1951 | default: |
1952 | return SDValue(); // Don't custom lower most intrinsics. |
1953 | case Intrinsic::thread_pointer: { |
1954 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
1955 | return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT); |
1956 | } |
1957 | case Intrinsic::loongarch_lsx_vpickve2gr_d: |
1958 | case Intrinsic::loongarch_lsx_vpickve2gr_du: |
1959 | case Intrinsic::loongarch_lsx_vreplvei_d: |
1960 | case Intrinsic::loongarch_lasx_xvrepl128vei_d: |
1961 | return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG); |
1962 | case Intrinsic::loongarch_lsx_vreplvei_w: |
1963 | case Intrinsic::loongarch_lasx_xvrepl128vei_w: |
1964 | case Intrinsic::loongarch_lasx_xvpickve2gr_d: |
1965 | case Intrinsic::loongarch_lasx_xvpickve2gr_du: |
1966 | case Intrinsic::loongarch_lasx_xvpickve_d: |
1967 | case Intrinsic::loongarch_lasx_xvpickve_d_f: |
1968 | return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG); |
1969 | case Intrinsic::loongarch_lasx_xvinsve0_d: |
1970 | return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG); |
1971 | case Intrinsic::loongarch_lsx_vsat_b: |
1972 | case Intrinsic::loongarch_lsx_vsat_bu: |
1973 | case Intrinsic::loongarch_lsx_vrotri_b: |
1974 | case Intrinsic::loongarch_lsx_vsllwil_h_b: |
1975 | case Intrinsic::loongarch_lsx_vsllwil_hu_bu: |
1976 | case Intrinsic::loongarch_lsx_vsrlri_b: |
1977 | case Intrinsic::loongarch_lsx_vsrari_b: |
1978 | case Intrinsic::loongarch_lsx_vreplvei_h: |
1979 | case Intrinsic::loongarch_lasx_xvsat_b: |
1980 | case Intrinsic::loongarch_lasx_xvsat_bu: |
1981 | case Intrinsic::loongarch_lasx_xvrotri_b: |
1982 | case Intrinsic::loongarch_lasx_xvsllwil_h_b: |
1983 | case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: |
1984 | case Intrinsic::loongarch_lasx_xvsrlri_b: |
1985 | case Intrinsic::loongarch_lasx_xvsrari_b: |
1986 | case Intrinsic::loongarch_lasx_xvrepl128vei_h: |
1987 | case Intrinsic::loongarch_lasx_xvpickve_w: |
1988 | case Intrinsic::loongarch_lasx_xvpickve_w_f: |
1989 | return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG); |
1990 | case Intrinsic::loongarch_lasx_xvinsve0_w: |
1991 | return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG); |
1992 | case Intrinsic::loongarch_lsx_vsat_h: |
1993 | case Intrinsic::loongarch_lsx_vsat_hu: |
1994 | case Intrinsic::loongarch_lsx_vrotri_h: |
1995 | case Intrinsic::loongarch_lsx_vsllwil_w_h: |
1996 | case Intrinsic::loongarch_lsx_vsllwil_wu_hu: |
1997 | case Intrinsic::loongarch_lsx_vsrlri_h: |
1998 | case Intrinsic::loongarch_lsx_vsrari_h: |
1999 | case Intrinsic::loongarch_lsx_vreplvei_b: |
2000 | case Intrinsic::loongarch_lasx_xvsat_h: |
2001 | case Intrinsic::loongarch_lasx_xvsat_hu: |
2002 | case Intrinsic::loongarch_lasx_xvrotri_h: |
2003 | case Intrinsic::loongarch_lasx_xvsllwil_w_h: |
2004 | case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: |
2005 | case Intrinsic::loongarch_lasx_xvsrlri_h: |
2006 | case Intrinsic::loongarch_lasx_xvsrari_h: |
2007 | case Intrinsic::loongarch_lasx_xvrepl128vei_b: |
2008 | return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG); |
2009 | case Intrinsic::loongarch_lsx_vsrlni_b_h: |
2010 | case Intrinsic::loongarch_lsx_vsrani_b_h: |
2011 | case Intrinsic::loongarch_lsx_vsrlrni_b_h: |
2012 | case Intrinsic::loongarch_lsx_vsrarni_b_h: |
2013 | case Intrinsic::loongarch_lsx_vssrlni_b_h: |
2014 | case Intrinsic::loongarch_lsx_vssrani_b_h: |
2015 | case Intrinsic::loongarch_lsx_vssrlni_bu_h: |
2016 | case Intrinsic::loongarch_lsx_vssrani_bu_h: |
2017 | case Intrinsic::loongarch_lsx_vssrlrni_b_h: |
2018 | case Intrinsic::loongarch_lsx_vssrarni_b_h: |
2019 | case Intrinsic::loongarch_lsx_vssrlrni_bu_h: |
2020 | case Intrinsic::loongarch_lsx_vssrarni_bu_h: |
2021 | case Intrinsic::loongarch_lasx_xvsrlni_b_h: |
2022 | case Intrinsic::loongarch_lasx_xvsrani_b_h: |
2023 | case Intrinsic::loongarch_lasx_xvsrlrni_b_h: |
2024 | case Intrinsic::loongarch_lasx_xvsrarni_b_h: |
2025 | case Intrinsic::loongarch_lasx_xvssrlni_b_h: |
2026 | case Intrinsic::loongarch_lasx_xvssrani_b_h: |
2027 | case Intrinsic::loongarch_lasx_xvssrlni_bu_h: |
2028 | case Intrinsic::loongarch_lasx_xvssrani_bu_h: |
2029 | case Intrinsic::loongarch_lasx_xvssrlrni_b_h: |
2030 | case Intrinsic::loongarch_lasx_xvssrarni_b_h: |
2031 | case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: |
2032 | case Intrinsic::loongarch_lasx_xvssrarni_bu_h: |
2033 | return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG); |
2034 | case Intrinsic::loongarch_lsx_vsat_w: |
2035 | case Intrinsic::loongarch_lsx_vsat_wu: |
2036 | case Intrinsic::loongarch_lsx_vrotri_w: |
2037 | case Intrinsic::loongarch_lsx_vsllwil_d_w: |
2038 | case Intrinsic::loongarch_lsx_vsllwil_du_wu: |
2039 | case Intrinsic::loongarch_lsx_vsrlri_w: |
2040 | case Intrinsic::loongarch_lsx_vsrari_w: |
2041 | case Intrinsic::loongarch_lsx_vslei_bu: |
2042 | case Intrinsic::loongarch_lsx_vslei_hu: |
2043 | case Intrinsic::loongarch_lsx_vslei_wu: |
2044 | case Intrinsic::loongarch_lsx_vslei_du: |
2045 | case Intrinsic::loongarch_lsx_vslti_bu: |
2046 | case Intrinsic::loongarch_lsx_vslti_hu: |
2047 | case Intrinsic::loongarch_lsx_vslti_wu: |
2048 | case Intrinsic::loongarch_lsx_vslti_du: |
2049 | case Intrinsic::loongarch_lsx_vbsll_v: |
2050 | case Intrinsic::loongarch_lsx_vbsrl_v: |
2051 | case Intrinsic::loongarch_lasx_xvsat_w: |
2052 | case Intrinsic::loongarch_lasx_xvsat_wu: |
2053 | case Intrinsic::loongarch_lasx_xvrotri_w: |
2054 | case Intrinsic::loongarch_lasx_xvsllwil_d_w: |
2055 | case Intrinsic::loongarch_lasx_xvsllwil_du_wu: |
2056 | case Intrinsic::loongarch_lasx_xvsrlri_w: |
2057 | case Intrinsic::loongarch_lasx_xvsrari_w: |
2058 | case Intrinsic::loongarch_lasx_xvslei_bu: |
2059 | case Intrinsic::loongarch_lasx_xvslei_hu: |
2060 | case Intrinsic::loongarch_lasx_xvslei_wu: |
2061 | case Intrinsic::loongarch_lasx_xvslei_du: |
2062 | case Intrinsic::loongarch_lasx_xvslti_bu: |
2063 | case Intrinsic::loongarch_lasx_xvslti_hu: |
2064 | case Intrinsic::loongarch_lasx_xvslti_wu: |
2065 | case Intrinsic::loongarch_lasx_xvslti_du: |
2066 | case Intrinsic::loongarch_lasx_xvbsll_v: |
2067 | case Intrinsic::loongarch_lasx_xvbsrl_v: |
2068 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG); |
2069 | case Intrinsic::loongarch_lsx_vseqi_b: |
2070 | case Intrinsic::loongarch_lsx_vseqi_h: |
2071 | case Intrinsic::loongarch_lsx_vseqi_w: |
2072 | case Intrinsic::loongarch_lsx_vseqi_d: |
2073 | case Intrinsic::loongarch_lsx_vslei_b: |
2074 | case Intrinsic::loongarch_lsx_vslei_h: |
2075 | case Intrinsic::loongarch_lsx_vslei_w: |
2076 | case Intrinsic::loongarch_lsx_vslei_d: |
2077 | case Intrinsic::loongarch_lsx_vslti_b: |
2078 | case Intrinsic::loongarch_lsx_vslti_h: |
2079 | case Intrinsic::loongarch_lsx_vslti_w: |
2080 | case Intrinsic::loongarch_lsx_vslti_d: |
2081 | case Intrinsic::loongarch_lasx_xvseqi_b: |
2082 | case Intrinsic::loongarch_lasx_xvseqi_h: |
2083 | case Intrinsic::loongarch_lasx_xvseqi_w: |
2084 | case Intrinsic::loongarch_lasx_xvseqi_d: |
2085 | case Intrinsic::loongarch_lasx_xvslei_b: |
2086 | case Intrinsic::loongarch_lasx_xvslei_h: |
2087 | case Intrinsic::loongarch_lasx_xvslei_w: |
2088 | case Intrinsic::loongarch_lasx_xvslei_d: |
2089 | case Intrinsic::loongarch_lasx_xvslti_b: |
2090 | case Intrinsic::loongarch_lasx_xvslti_h: |
2091 | case Intrinsic::loongarch_lasx_xvslti_w: |
2092 | case Intrinsic::loongarch_lasx_xvslti_d: |
2093 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true); |
2094 | case Intrinsic::loongarch_lsx_vsrlni_h_w: |
2095 | case Intrinsic::loongarch_lsx_vsrani_h_w: |
2096 | case Intrinsic::loongarch_lsx_vsrlrni_h_w: |
2097 | case Intrinsic::loongarch_lsx_vsrarni_h_w: |
2098 | case Intrinsic::loongarch_lsx_vssrlni_h_w: |
2099 | case Intrinsic::loongarch_lsx_vssrani_h_w: |
2100 | case Intrinsic::loongarch_lsx_vssrlni_hu_w: |
2101 | case Intrinsic::loongarch_lsx_vssrani_hu_w: |
2102 | case Intrinsic::loongarch_lsx_vssrlrni_h_w: |
2103 | case Intrinsic::loongarch_lsx_vssrarni_h_w: |
2104 | case Intrinsic::loongarch_lsx_vssrlrni_hu_w: |
2105 | case Intrinsic::loongarch_lsx_vssrarni_hu_w: |
2106 | case Intrinsic::loongarch_lsx_vfrstpi_b: |
2107 | case Intrinsic::loongarch_lsx_vfrstpi_h: |
2108 | case Intrinsic::loongarch_lasx_xvsrlni_h_w: |
2109 | case Intrinsic::loongarch_lasx_xvsrani_h_w: |
2110 | case Intrinsic::loongarch_lasx_xvsrlrni_h_w: |
2111 | case Intrinsic::loongarch_lasx_xvsrarni_h_w: |
2112 | case Intrinsic::loongarch_lasx_xvssrlni_h_w: |
2113 | case Intrinsic::loongarch_lasx_xvssrani_h_w: |
2114 | case Intrinsic::loongarch_lasx_xvssrlni_hu_w: |
2115 | case Intrinsic::loongarch_lasx_xvssrani_hu_w: |
2116 | case Intrinsic::loongarch_lasx_xvssrlrni_h_w: |
2117 | case Intrinsic::loongarch_lasx_xvssrarni_h_w: |
2118 | case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: |
2119 | case Intrinsic::loongarch_lasx_xvssrarni_hu_w: |
2120 | case Intrinsic::loongarch_lasx_xvfrstpi_b: |
2121 | case Intrinsic::loongarch_lasx_xvfrstpi_h: |
2122 | return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG); |
2123 | case Intrinsic::loongarch_lsx_vsat_d: |
2124 | case Intrinsic::loongarch_lsx_vsat_du: |
2125 | case Intrinsic::loongarch_lsx_vrotri_d: |
2126 | case Intrinsic::loongarch_lsx_vsrlri_d: |
2127 | case Intrinsic::loongarch_lsx_vsrari_d: |
2128 | case Intrinsic::loongarch_lasx_xvsat_d: |
2129 | case Intrinsic::loongarch_lasx_xvsat_du: |
2130 | case Intrinsic::loongarch_lasx_xvrotri_d: |
2131 | case Intrinsic::loongarch_lasx_xvsrlri_d: |
2132 | case Intrinsic::loongarch_lasx_xvsrari_d: |
2133 | return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG); |
2134 | case Intrinsic::loongarch_lsx_vsrlni_w_d: |
2135 | case Intrinsic::loongarch_lsx_vsrani_w_d: |
2136 | case Intrinsic::loongarch_lsx_vsrlrni_w_d: |
2137 | case Intrinsic::loongarch_lsx_vsrarni_w_d: |
2138 | case Intrinsic::loongarch_lsx_vssrlni_w_d: |
2139 | case Intrinsic::loongarch_lsx_vssrani_w_d: |
2140 | case Intrinsic::loongarch_lsx_vssrlni_wu_d: |
2141 | case Intrinsic::loongarch_lsx_vssrani_wu_d: |
2142 | case Intrinsic::loongarch_lsx_vssrlrni_w_d: |
2143 | case Intrinsic::loongarch_lsx_vssrarni_w_d: |
2144 | case Intrinsic::loongarch_lsx_vssrlrni_wu_d: |
2145 | case Intrinsic::loongarch_lsx_vssrarni_wu_d: |
2146 | case Intrinsic::loongarch_lasx_xvsrlni_w_d: |
2147 | case Intrinsic::loongarch_lasx_xvsrani_w_d: |
2148 | case Intrinsic::loongarch_lasx_xvsrlrni_w_d: |
2149 | case Intrinsic::loongarch_lasx_xvsrarni_w_d: |
2150 | case Intrinsic::loongarch_lasx_xvssrlni_w_d: |
2151 | case Intrinsic::loongarch_lasx_xvssrani_w_d: |
2152 | case Intrinsic::loongarch_lasx_xvssrlni_wu_d: |
2153 | case Intrinsic::loongarch_lasx_xvssrani_wu_d: |
2154 | case Intrinsic::loongarch_lasx_xvssrlrni_w_d: |
2155 | case Intrinsic::loongarch_lasx_xvssrarni_w_d: |
2156 | case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: |
2157 | case Intrinsic::loongarch_lasx_xvssrarni_wu_d: |
2158 | return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG); |
2159 | case Intrinsic::loongarch_lsx_vsrlni_d_q: |
2160 | case Intrinsic::loongarch_lsx_vsrani_d_q: |
2161 | case Intrinsic::loongarch_lsx_vsrlrni_d_q: |
2162 | case Intrinsic::loongarch_lsx_vsrarni_d_q: |
2163 | case Intrinsic::loongarch_lsx_vssrlni_d_q: |
2164 | case Intrinsic::loongarch_lsx_vssrani_d_q: |
2165 | case Intrinsic::loongarch_lsx_vssrlni_du_q: |
2166 | case Intrinsic::loongarch_lsx_vssrani_du_q: |
2167 | case Intrinsic::loongarch_lsx_vssrlrni_d_q: |
2168 | case Intrinsic::loongarch_lsx_vssrarni_d_q: |
2169 | case Intrinsic::loongarch_lsx_vssrlrni_du_q: |
2170 | case Intrinsic::loongarch_lsx_vssrarni_du_q: |
2171 | case Intrinsic::loongarch_lasx_xvsrlni_d_q: |
2172 | case Intrinsic::loongarch_lasx_xvsrani_d_q: |
2173 | case Intrinsic::loongarch_lasx_xvsrlrni_d_q: |
2174 | case Intrinsic::loongarch_lasx_xvsrarni_d_q: |
2175 | case Intrinsic::loongarch_lasx_xvssrlni_d_q: |
2176 | case Intrinsic::loongarch_lasx_xvssrani_d_q: |
2177 | case Intrinsic::loongarch_lasx_xvssrlni_du_q: |
2178 | case Intrinsic::loongarch_lasx_xvssrani_du_q: |
2179 | case Intrinsic::loongarch_lasx_xvssrlrni_d_q: |
2180 | case Intrinsic::loongarch_lasx_xvssrarni_d_q: |
2181 | case Intrinsic::loongarch_lasx_xvssrlrni_du_q: |
2182 | case Intrinsic::loongarch_lasx_xvssrarni_du_q: |
2183 | return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG); |
2184 | case Intrinsic::loongarch_lsx_vnori_b: |
2185 | case Intrinsic::loongarch_lsx_vshuf4i_b: |
2186 | case Intrinsic::loongarch_lsx_vshuf4i_h: |
2187 | case Intrinsic::loongarch_lsx_vshuf4i_w: |
2188 | case Intrinsic::loongarch_lasx_xvnori_b: |
2189 | case Intrinsic::loongarch_lasx_xvshuf4i_b: |
2190 | case Intrinsic::loongarch_lasx_xvshuf4i_h: |
2191 | case Intrinsic::loongarch_lasx_xvshuf4i_w: |
2192 | case Intrinsic::loongarch_lasx_xvpermi_d: |
2193 | return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG); |
2194 | case Intrinsic::loongarch_lsx_vshuf4i_d: |
2195 | case Intrinsic::loongarch_lsx_vpermi_w: |
2196 | case Intrinsic::loongarch_lsx_vbitseli_b: |
2197 | case Intrinsic::loongarch_lsx_vextrins_b: |
2198 | case Intrinsic::loongarch_lsx_vextrins_h: |
2199 | case Intrinsic::loongarch_lsx_vextrins_w: |
2200 | case Intrinsic::loongarch_lsx_vextrins_d: |
2201 | case Intrinsic::loongarch_lasx_xvshuf4i_d: |
2202 | case Intrinsic::loongarch_lasx_xvpermi_w: |
2203 | case Intrinsic::loongarch_lasx_xvpermi_q: |
2204 | case Intrinsic::loongarch_lasx_xvbitseli_b: |
2205 | case Intrinsic::loongarch_lasx_xvextrins_b: |
2206 | case Intrinsic::loongarch_lasx_xvextrins_h: |
2207 | case Intrinsic::loongarch_lasx_xvextrins_w: |
2208 | case Intrinsic::loongarch_lasx_xvextrins_d: |
2209 | return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG); |
2210 | case Intrinsic::loongarch_lsx_vrepli_b: |
2211 | case Intrinsic::loongarch_lsx_vrepli_h: |
2212 | case Intrinsic::loongarch_lsx_vrepli_w: |
2213 | case Intrinsic::loongarch_lsx_vrepli_d: |
2214 | case Intrinsic::loongarch_lasx_xvrepli_b: |
2215 | case Intrinsic::loongarch_lasx_xvrepli_h: |
2216 | case Intrinsic::loongarch_lasx_xvrepli_w: |
2217 | case Intrinsic::loongarch_lasx_xvrepli_d: |
2218 | return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
2219 | case Intrinsic::loongarch_lsx_vldi: |
2220 | case Intrinsic::loongarch_lasx_xvldi: |
2221 | return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
2222 | } |
2223 | } |
2224 | |
2225 | // Helper function that emits error message for intrinsics with chain and return |
2226 | // merge values of a UNDEF and the chain. |
2227 | static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, |
2228 | StringRef ErrorMsg, |
2229 | SelectionDAG &DAG) { |
2230 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
2231 | return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)}, |
2232 | dl: SDLoc(Op)); |
2233 | } |
2234 | |
2235 | SDValue |
2236 | LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
2237 | SelectionDAG &DAG) const { |
2238 | SDLoc DL(Op); |
2239 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2240 | EVT VT = Op.getValueType(); |
2241 | SDValue Chain = Op.getOperand(i: 0); |
2242 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2243 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
2244 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
2245 | |
2246 | switch (Op.getConstantOperandVal(i: 1)) { |
2247 | default: |
2248 | return Op; |
2249 | case Intrinsic::loongarch_crc_w_b_w: |
2250 | case Intrinsic::loongarch_crc_w_h_w: |
2251 | case Intrinsic::loongarch_crc_w_w_w: |
2252 | case Intrinsic::loongarch_crc_w_d_w: |
2253 | case Intrinsic::loongarch_crcc_w_b_w: |
2254 | case Intrinsic::loongarch_crcc_w_h_w: |
2255 | case Intrinsic::loongarch_crcc_w_w_w: |
2256 | case Intrinsic::loongarch_crcc_w_d_w: |
2257 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
2258 | case Intrinsic::loongarch_csrrd_w: |
2259 | case Intrinsic::loongarch_csrrd_d: { |
2260 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
2261 | return !isUInt<14>(x: Imm) |
2262 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2263 | : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other}, |
2264 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2265 | } |
2266 | case Intrinsic::loongarch_csrwr_w: |
2267 | case Intrinsic::loongarch_csrwr_d: { |
2268 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
2269 | return !isUInt<14>(x: Imm) |
2270 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2271 | : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other}, |
2272 | Ops: {Chain, Op.getOperand(i: 2), |
2273 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2274 | } |
2275 | case Intrinsic::loongarch_csrxchg_w: |
2276 | case Intrinsic::loongarch_csrxchg_d: { |
2277 | unsigned Imm = Op.getConstantOperandVal(i: 4); |
2278 | return !isUInt<14>(x: Imm) |
2279 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2280 | : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other}, |
2281 | Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3), |
2282 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2283 | } |
2284 | case Intrinsic::loongarch_iocsrrd_d: { |
2285 | return DAG.getNode( |
2286 | Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other}, |
2287 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))}); |
2288 | } |
2289 | #define IOCSRRD_CASE(NAME, NODE) \ |
2290 | case Intrinsic::loongarch_##NAME: { \ |
2291 | return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ |
2292 | {Chain, Op.getOperand(2)}); \ |
2293 | } |
2294 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
2295 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
2296 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
2297 | #undef IOCSRRD_CASE |
2298 | case Intrinsic::loongarch_cpucfg: { |
2299 | return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other}, |
2300 | Ops: {Chain, Op.getOperand(i: 2)}); |
2301 | } |
2302 | case Intrinsic::loongarch_lddir_d: { |
2303 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
2304 | return !isUInt<8>(x: Imm) |
2305 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2306 | : Op; |
2307 | } |
2308 | case Intrinsic::loongarch_movfcsr2gr: { |
2309 | if (!Subtarget.hasBasicF()) |
2310 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
2311 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
2312 | return !isUInt<2>(x: Imm) |
2313 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2314 | : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other}, |
2315 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2316 | } |
2317 | case Intrinsic::loongarch_lsx_vld: |
2318 | case Intrinsic::loongarch_lsx_vldrepl_b: |
2319 | case Intrinsic::loongarch_lasx_xvld: |
2320 | case Intrinsic::loongarch_lasx_xvldrepl_b: |
2321 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2322 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2323 | : SDValue(); |
2324 | case Intrinsic::loongarch_lsx_vldrepl_h: |
2325 | case Intrinsic::loongarch_lasx_xvldrepl_h: |
2326 | return !isShiftedInt<11, 1>( |
2327 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2328 | ? emitIntrinsicWithChainErrorMessage( |
2329 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
2330 | : SDValue(); |
2331 | case Intrinsic::loongarch_lsx_vldrepl_w: |
2332 | case Intrinsic::loongarch_lasx_xvldrepl_w: |
2333 | return !isShiftedInt<10, 2>( |
2334 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2335 | ? emitIntrinsicWithChainErrorMessage( |
2336 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
2337 | : SDValue(); |
2338 | case Intrinsic::loongarch_lsx_vldrepl_d: |
2339 | case Intrinsic::loongarch_lasx_xvldrepl_d: |
2340 | return !isShiftedInt<9, 3>( |
2341 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
2342 | ? emitIntrinsicWithChainErrorMessage( |
2343 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
2344 | : SDValue(); |
2345 | } |
2346 | } |
2347 | |
2348 | // Helper function that emits error message for intrinsics with void return |
2349 | // value and return the chain. |
2350 | static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, |
2351 | SelectionDAG &DAG) { |
2352 | |
2353 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
2354 | return Op.getOperand(i: 0); |
2355 | } |
2356 | |
2357 | SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, |
2358 | SelectionDAG &DAG) const { |
2359 | SDLoc DL(Op); |
2360 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2361 | SDValue Chain = Op.getOperand(i: 0); |
2362 | uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1); |
2363 | SDValue Op2 = Op.getOperand(i: 2); |
2364 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2365 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
2366 | const StringRef ErrorMsgReqLA32 = "requires loongarch32" ; |
2367 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
2368 | |
2369 | switch (IntrinsicEnum) { |
2370 | default: |
2371 | // TODO: Add more Intrinsics. |
2372 | return SDValue(); |
2373 | case Intrinsic::loongarch_cacop_d: |
2374 | case Intrinsic::loongarch_cacop_w: { |
2375 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) |
2376 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
2377 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) |
2378 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG); |
2379 | // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) |
2380 | unsigned Imm1 = Op2->getAsZExtVal(); |
2381 | int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue(); |
2382 | if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2)) |
2383 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG); |
2384 | return Op; |
2385 | } |
2386 | case Intrinsic::loongarch_dbar: { |
2387 | unsigned Imm = Op2->getAsZExtVal(); |
2388 | return !isUInt<15>(x: Imm) |
2389 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2390 | : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain, |
2391 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2392 | } |
2393 | case Intrinsic::loongarch_ibar: { |
2394 | unsigned Imm = Op2->getAsZExtVal(); |
2395 | return !isUInt<15>(x: Imm) |
2396 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2397 | : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain, |
2398 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2399 | } |
2400 | case Intrinsic::loongarch_break: { |
2401 | unsigned Imm = Op2->getAsZExtVal(); |
2402 | return !isUInt<15>(x: Imm) |
2403 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2404 | : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain, |
2405 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2406 | } |
2407 | case Intrinsic::loongarch_movgr2fcsr: { |
2408 | if (!Subtarget.hasBasicF()) |
2409 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
2410 | unsigned Imm = Op2->getAsZExtVal(); |
2411 | return !isUInt<2>(x: Imm) |
2412 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2413 | : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain, |
2414 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT), |
2415 | N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, |
2416 | Operand: Op.getOperand(i: 3))); |
2417 | } |
2418 | case Intrinsic::loongarch_syscall: { |
2419 | unsigned Imm = Op2->getAsZExtVal(); |
2420 | return !isUInt<15>(x: Imm) |
2421 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2422 | : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain, |
2423 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
2424 | } |
2425 | #define IOCSRWR_CASE(NAME, NODE) \ |
2426 | case Intrinsic::loongarch_##NAME: { \ |
2427 | SDValue Op3 = Op.getOperand(3); \ |
2428 | return Subtarget.is64Bit() \ |
2429 | ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ |
2430 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
2431 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ |
2432 | : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ |
2433 | Op3); \ |
2434 | } |
2435 | IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); |
2436 | IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); |
2437 | IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); |
2438 | #undef IOCSRWR_CASE |
2439 | case Intrinsic::loongarch_iocsrwr_d: { |
2440 | return !Subtarget.is64Bit() |
2441 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
2442 | : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain, |
2443 | N2: Op2, |
2444 | N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, |
2445 | Operand: Op.getOperand(i: 3))); |
2446 | } |
2447 | #define ASRT_LE_GT_CASE(NAME) \ |
2448 | case Intrinsic::loongarch_##NAME: { \ |
2449 | return !Subtarget.is64Bit() \ |
2450 | ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ |
2451 | : Op; \ |
2452 | } |
2453 | ASRT_LE_GT_CASE(asrtle_d) |
2454 | ASRT_LE_GT_CASE(asrtgt_d) |
2455 | #undef ASRT_LE_GT_CASE |
2456 | case Intrinsic::loongarch_ldpte_d: { |
2457 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
2458 | return !Subtarget.is64Bit() |
2459 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
2460 | : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2461 | : Op; |
2462 | } |
2463 | case Intrinsic::loongarch_lsx_vst: |
2464 | case Intrinsic::loongarch_lasx_xvst: |
2465 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) |
2466 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2467 | : SDValue(); |
2468 | case Intrinsic::loongarch_lasx_xvstelm_b: |
2469 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2470 | !isUInt<5>(x: Op.getConstantOperandVal(i: 5))) |
2471 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2472 | : SDValue(); |
2473 | case Intrinsic::loongarch_lsx_vstelm_b: |
2474 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2475 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
2476 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
2477 | : SDValue(); |
2478 | case Intrinsic::loongarch_lasx_xvstelm_h: |
2479 | return (!isShiftedInt<8, 1>( |
2480 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2481 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
2482 | ? emitIntrinsicErrorMessage( |
2483 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
2484 | : SDValue(); |
2485 | case Intrinsic::loongarch_lsx_vstelm_h: |
2486 | return (!isShiftedInt<8, 1>( |
2487 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2488 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
2489 | ? emitIntrinsicErrorMessage( |
2490 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
2491 | : SDValue(); |
2492 | case Intrinsic::loongarch_lasx_xvstelm_w: |
2493 | return (!isShiftedInt<8, 2>( |
2494 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2495 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
2496 | ? emitIntrinsicErrorMessage( |
2497 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
2498 | : SDValue(); |
2499 | case Intrinsic::loongarch_lsx_vstelm_w: |
2500 | return (!isShiftedInt<8, 2>( |
2501 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2502 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
2503 | ? emitIntrinsicErrorMessage( |
2504 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
2505 | : SDValue(); |
2506 | case Intrinsic::loongarch_lasx_xvstelm_d: |
2507 | return (!isShiftedInt<8, 3>( |
2508 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2509 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
2510 | ? emitIntrinsicErrorMessage( |
2511 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
2512 | : SDValue(); |
2513 | case Intrinsic::loongarch_lsx_vstelm_d: |
2514 | return (!isShiftedInt<8, 3>( |
2515 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
2516 | !isUInt<1>(x: Op.getConstantOperandVal(i: 5))) |
2517 | ? emitIntrinsicErrorMessage( |
2518 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
2519 | : SDValue(); |
2520 | } |
2521 | } |
2522 | |
2523 | SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, |
2524 | SelectionDAG &DAG) const { |
2525 | SDLoc DL(Op); |
2526 | SDValue Lo = Op.getOperand(i: 0); |
2527 | SDValue Hi = Op.getOperand(i: 1); |
2528 | SDValue Shamt = Op.getOperand(i: 2); |
2529 | EVT VT = Lo.getValueType(); |
2530 | |
2531 | // if Shamt-GRLen < 0: // Shamt < GRLen |
2532 | // Lo = Lo << Shamt |
2533 | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) |
2534 | // else: |
2535 | // Lo = 0 |
2536 | // Hi = Lo << (Shamt-GRLen) |
2537 | |
2538 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
2539 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
2540 | SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
2541 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
2542 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
2543 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
2544 | |
2545 | SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt); |
2546 | SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One); |
2547 | SDValue ShiftRightLo = |
2548 | DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt); |
2549 | SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt); |
2550 | SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo); |
2551 | SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen); |
2552 | |
2553 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
2554 | |
2555 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero); |
2556 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
2557 | |
2558 | SDValue Parts[2] = {Lo, Hi}; |
2559 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
2560 | } |
2561 | |
2562 | SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, |
2563 | SelectionDAG &DAG, |
2564 | bool IsSRA) const { |
2565 | SDLoc DL(Op); |
2566 | SDValue Lo = Op.getOperand(i: 0); |
2567 | SDValue Hi = Op.getOperand(i: 1); |
2568 | SDValue Shamt = Op.getOperand(i: 2); |
2569 | EVT VT = Lo.getValueType(); |
2570 | |
2571 | // SRA expansion: |
2572 | // if Shamt-GRLen < 0: // Shamt < GRLen |
2573 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
2574 | // Hi = Hi >>s Shamt |
2575 | // else: |
2576 | // Lo = Hi >>s (Shamt-GRLen); |
2577 | // Hi = Hi >>s (GRLen-1) |
2578 | // |
2579 | // SRL expansion: |
2580 | // if Shamt-GRLen < 0: // Shamt < GRLen |
2581 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
2582 | // Hi = Hi >>u Shamt |
2583 | // else: |
2584 | // Lo = Hi >>u (Shamt-GRLen); |
2585 | // Hi = 0; |
2586 | |
2587 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
2588 | |
2589 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
2590 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
2591 | SDValue MinusGRLen = DAG.getConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
2592 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
2593 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
2594 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
2595 | |
2596 | SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt); |
2597 | SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One); |
2598 | SDValue ShiftLeftHi = |
2599 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt); |
2600 | SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi); |
2601 | SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt); |
2602 | SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen); |
2603 | SDValue HiFalse = |
2604 | IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero; |
2605 | |
2606 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
2607 | |
2608 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse); |
2609 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
2610 | |
2611 | SDValue Parts[2] = {Lo, Hi}; |
2612 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
2613 | } |
2614 | |
2615 | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
2616 | // form of the given Opcode. |
2617 | static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { |
2618 | switch (Opcode) { |
2619 | default: |
2620 | llvm_unreachable("Unexpected opcode" ); |
2621 | case ISD::UDIV: |
2622 | return LoongArchISD::DIV_WU; |
2623 | case ISD::UREM: |
2624 | return LoongArchISD::MOD_WU; |
2625 | case ISD::SHL: |
2626 | return LoongArchISD::SLL_W; |
2627 | case ISD::SRA: |
2628 | return LoongArchISD::SRA_W; |
2629 | case ISD::SRL: |
2630 | return LoongArchISD::SRL_W; |
2631 | case ISD::ROTL: |
2632 | case ISD::ROTR: |
2633 | return LoongArchISD::ROTR_W; |
2634 | case ISD::CTTZ: |
2635 | return LoongArchISD::CTZ_W; |
2636 | case ISD::CTLZ: |
2637 | return LoongArchISD::CLZ_W; |
2638 | } |
2639 | } |
2640 | |
2641 | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
2642 | // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would |
2643 | // otherwise be promoted to i64, making it difficult to select the |
2644 | // SLL_W/.../*W later one because the fact the operation was originally of |
2645 | // type i8/i16/i32 is lost. |
2646 | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, |
2647 | unsigned ExtOpc = ISD::ANY_EXTEND) { |
2648 | SDLoc DL(N); |
2649 | LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode()); |
2650 | SDValue NewOp0, NewRes; |
2651 | |
2652 | switch (NumOp) { |
2653 | default: |
2654 | llvm_unreachable("Unexpected NumOp" ); |
2655 | case 1: { |
2656 | NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
2657 | NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0); |
2658 | break; |
2659 | } |
2660 | case 2: { |
2661 | NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
2662 | SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
2663 | if (N->getOpcode() == ISD::ROTL) { |
2664 | SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64); |
2665 | NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1); |
2666 | } |
2667 | NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
2668 | break; |
2669 | } |
2670 | // TODO:Handle more NumOp. |
2671 | } |
2672 | |
2673 | // ReplaceNodeResults requires we maintain the same type for the return |
2674 | // value. |
2675 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes); |
2676 | } |
2677 | |
2678 | // Converts the given 32-bit operation to a i64 operation with signed extension |
2679 | // semantic to reduce the signed extension instructions. |
2680 | static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { |
2681 | SDLoc DL(N); |
2682 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
2683 | SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
2684 | SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
2685 | SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp, |
2686 | N2: DAG.getValueType(MVT::i32)); |
2687 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes); |
2688 | } |
2689 | |
2690 | // Helper function that emits error message for intrinsics with/without chain |
2691 | // and return a UNDEF or and the chain as the results. |
2692 | static void emitErrorAndReplaceIntrinsicResults( |
2693 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, |
2694 | StringRef ErrorMsg, bool WithChain = true) { |
2695 | DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
2696 | Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0))); |
2697 | if (!WithChain) |
2698 | return; |
2699 | Results.push_back(Elt: N->getOperand(Num: 0)); |
2700 | } |
2701 | |
2702 | template <unsigned N> |
2703 | static void |
2704 | replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, |
2705 | SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, |
2706 | unsigned ResOp) { |
2707 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2708 | unsigned Imm = Node->getConstantOperandVal(Num: 2); |
2709 | if (!isUInt<N>(Imm)) { |
2710 | emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR, |
2711 | /*WithChain=*/false); |
2712 | return; |
2713 | } |
2714 | SDLoc DL(Node); |
2715 | SDValue Vec = Node->getOperand(Num: 1); |
2716 | |
2717 | SDValue PickElt = |
2718 | DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec, |
2719 | N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()), |
2720 | N3: DAG.getValueType(Vec.getValueType().getVectorElementType())); |
2721 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0), |
2722 | Operand: PickElt.getValue(R: 0))); |
2723 | } |
2724 | |
2725 | static void replaceVecCondBranchResults(SDNode *N, |
2726 | SmallVectorImpl<SDValue> &Results, |
2727 | SelectionDAG &DAG, |
2728 | const LoongArchSubtarget &Subtarget, |
2729 | unsigned ResOp) { |
2730 | SDLoc DL(N); |
2731 | SDValue Vec = N->getOperand(Num: 1); |
2732 | |
2733 | SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec); |
2734 | Results.push_back( |
2735 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0))); |
2736 | } |
2737 | |
2738 | static void |
2739 | replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
2740 | SelectionDAG &DAG, |
2741 | const LoongArchSubtarget &Subtarget) { |
2742 | switch (N->getConstantOperandVal(Num: 0)) { |
2743 | default: |
2744 | llvm_unreachable("Unexpected Intrinsic." ); |
2745 | case Intrinsic::loongarch_lsx_vpickve2gr_b: |
2746 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
2747 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
2748 | break; |
2749 | case Intrinsic::loongarch_lsx_vpickve2gr_h: |
2750 | case Intrinsic::loongarch_lasx_xvpickve2gr_w: |
2751 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
2752 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
2753 | break; |
2754 | case Intrinsic::loongarch_lsx_vpickve2gr_w: |
2755 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
2756 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
2757 | break; |
2758 | case Intrinsic::loongarch_lsx_vpickve2gr_bu: |
2759 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
2760 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
2761 | break; |
2762 | case Intrinsic::loongarch_lsx_vpickve2gr_hu: |
2763 | case Intrinsic::loongarch_lasx_xvpickve2gr_wu: |
2764 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
2765 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
2766 | break; |
2767 | case Intrinsic::loongarch_lsx_vpickve2gr_wu: |
2768 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
2769 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
2770 | break; |
2771 | case Intrinsic::loongarch_lsx_bz_b: |
2772 | case Intrinsic::loongarch_lsx_bz_h: |
2773 | case Intrinsic::loongarch_lsx_bz_w: |
2774 | case Intrinsic::loongarch_lsx_bz_d: |
2775 | case Intrinsic::loongarch_lasx_xbz_b: |
2776 | case Intrinsic::loongarch_lasx_xbz_h: |
2777 | case Intrinsic::loongarch_lasx_xbz_w: |
2778 | case Intrinsic::loongarch_lasx_xbz_d: |
2779 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2780 | ResOp: LoongArchISD::VALL_ZERO); |
2781 | break; |
2782 | case Intrinsic::loongarch_lsx_bz_v: |
2783 | case Intrinsic::loongarch_lasx_xbz_v: |
2784 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2785 | ResOp: LoongArchISD::VANY_ZERO); |
2786 | break; |
2787 | case Intrinsic::loongarch_lsx_bnz_b: |
2788 | case Intrinsic::loongarch_lsx_bnz_h: |
2789 | case Intrinsic::loongarch_lsx_bnz_w: |
2790 | case Intrinsic::loongarch_lsx_bnz_d: |
2791 | case Intrinsic::loongarch_lasx_xbnz_b: |
2792 | case Intrinsic::loongarch_lasx_xbnz_h: |
2793 | case Intrinsic::loongarch_lasx_xbnz_w: |
2794 | case Intrinsic::loongarch_lasx_xbnz_d: |
2795 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2796 | ResOp: LoongArchISD::VALL_NONZERO); |
2797 | break; |
2798 | case Intrinsic::loongarch_lsx_bnz_v: |
2799 | case Intrinsic::loongarch_lasx_xbnz_v: |
2800 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
2801 | ResOp: LoongArchISD::VANY_NONZERO); |
2802 | break; |
2803 | } |
2804 | } |
2805 | |
2806 | void LoongArchTargetLowering::ReplaceNodeResults( |
2807 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
2808 | SDLoc DL(N); |
2809 | EVT VT = N->getValueType(ResNo: 0); |
2810 | switch (N->getOpcode()) { |
2811 | default: |
2812 | llvm_unreachable("Don't know how to legalize this operation" ); |
2813 | case ISD::ADD: |
2814 | case ISD::SUB: |
2815 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
2816 | "Unexpected custom legalisation" ); |
2817 | Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG)); |
2818 | break; |
2819 | case ISD::UDIV: |
2820 | case ISD::UREM: |
2821 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2822 | "Unexpected custom legalisation" ); |
2823 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2, ExtOpc: ISD::SIGN_EXTEND)); |
2824 | break; |
2825 | case ISD::SHL: |
2826 | case ISD::SRA: |
2827 | case ISD::SRL: |
2828 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2829 | "Unexpected custom legalisation" ); |
2830 | if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) { |
2831 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
2832 | break; |
2833 | } |
2834 | break; |
2835 | case ISD::ROTL: |
2836 | case ISD::ROTR: |
2837 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2838 | "Unexpected custom legalisation" ); |
2839 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
2840 | break; |
2841 | case ISD::FP_TO_SINT: { |
2842 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2843 | "Unexpected custom legalisation" ); |
2844 | SDValue Src = N->getOperand(Num: 0); |
2845 | EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0)); |
2846 | if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) != |
2847 | TargetLowering::TypeSoftenFloat) { |
2848 | SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src); |
2849 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst)); |
2850 | return; |
2851 | } |
2852 | // If the FP type needs to be softened, emit a library call using the 'si' |
2853 | // version. If we left it to default legalization we'd end up with 'di'. |
2854 | RTLIB::Libcall LC; |
2855 | LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT); |
2856 | MakeLibCallOptions CallOptions; |
2857 | EVT OpVT = Src.getValueType(); |
2858 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true); |
2859 | SDValue Chain = SDValue(); |
2860 | SDValue Result; |
2861 | std::tie(args&: Result, args&: Chain) = |
2862 | makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain); |
2863 | Results.push_back(Elt: Result); |
2864 | break; |
2865 | } |
2866 | case ISD::BITCAST: { |
2867 | SDValue Src = N->getOperand(Num: 0); |
2868 | EVT SrcVT = Src.getValueType(); |
2869 | if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && |
2870 | Subtarget.hasBasicF()) { |
2871 | SDValue Dst = |
2872 | DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src); |
2873 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst)); |
2874 | } |
2875 | break; |
2876 | } |
2877 | case ISD::FP_TO_UINT: { |
2878 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2879 | "Unexpected custom legalisation" ); |
2880 | auto &TLI = DAG.getTargetLoweringInfo(); |
2881 | SDValue Tmp1, Tmp2; |
2882 | TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG); |
2883 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1)); |
2884 | break; |
2885 | } |
2886 | case ISD::BSWAP: { |
2887 | SDValue Src = N->getOperand(Num: 0); |
2888 | assert((VT == MVT::i16 || VT == MVT::i32) && |
2889 | "Unexpected custom legalization" ); |
2890 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2891 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
2892 | SDValue Tmp; |
2893 | switch (VT.getSizeInBits()) { |
2894 | default: |
2895 | llvm_unreachable("Unexpected operand width" ); |
2896 | case 16: |
2897 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc); |
2898 | break; |
2899 | case 32: |
2900 | // Only LA64 will get to here due to the size mismatch between VT and |
2901 | // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. |
2902 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc); |
2903 | break; |
2904 | } |
2905 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
2906 | break; |
2907 | } |
2908 | case ISD::BITREVERSE: { |
2909 | SDValue Src = N->getOperand(Num: 0); |
2910 | assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
2911 | "Unexpected custom legalization" ); |
2912 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2913 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
2914 | SDValue Tmp; |
2915 | switch (VT.getSizeInBits()) { |
2916 | default: |
2917 | llvm_unreachable("Unexpected operand width" ); |
2918 | case 8: |
2919 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc); |
2920 | break; |
2921 | case 32: |
2922 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc); |
2923 | break; |
2924 | } |
2925 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
2926 | break; |
2927 | } |
2928 | case ISD::CTLZ: |
2929 | case ISD::CTTZ: { |
2930 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
2931 | "Unexpected custom legalisation" ); |
2932 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1)); |
2933 | break; |
2934 | } |
2935 | case ISD::INTRINSIC_W_CHAIN: { |
2936 | SDValue Chain = N->getOperand(Num: 0); |
2937 | SDValue Op2 = N->getOperand(Num: 2); |
2938 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2939 | const StringRef ErrorMsgOOR = "argument out of range" ; |
2940 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
2941 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
2942 | |
2943 | switch (N->getConstantOperandVal(Num: 1)) { |
2944 | default: |
2945 | llvm_unreachable("Unexpected Intrinsic." ); |
2946 | case Intrinsic::loongarch_movfcsr2gr: { |
2947 | if (!Subtarget.hasBasicF()) { |
2948 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF); |
2949 | return; |
2950 | } |
2951 | unsigned Imm = Op2->getAsZExtVal(); |
2952 | if (!isUInt<2>(x: Imm)) { |
2953 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
2954 | return; |
2955 | } |
2956 | SDValue MOVFCSR2GRResults = DAG.getNode( |
2957 | Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other}, |
2958 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
2959 | Results.push_back( |
2960 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0))); |
2961 | Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1)); |
2962 | break; |
2963 | } |
2964 | #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ |
2965 | case Intrinsic::loongarch_##NAME: { \ |
2966 | SDValue NODE = DAG.getNode( \ |
2967 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
2968 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
2969 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
2970 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
2971 | Results.push_back(NODE.getValue(1)); \ |
2972 | break; \ |
2973 | } |
2974 | CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) |
2975 | CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) |
2976 | CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) |
2977 | CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) |
2978 | CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) |
2979 | CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) |
2980 | #undef CRC_CASE_EXT_BINARYOP |
2981 | |
2982 | #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ |
2983 | case Intrinsic::loongarch_##NAME: { \ |
2984 | SDValue NODE = DAG.getNode( \ |
2985 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
2986 | {Chain, Op2, \ |
2987 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
2988 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
2989 | Results.push_back(NODE.getValue(1)); \ |
2990 | break; \ |
2991 | } |
2992 | CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) |
2993 | CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) |
2994 | #undef CRC_CASE_EXT_UNARYOP |
2995 | #define CSR_CASE(ID) \ |
2996 | case Intrinsic::loongarch_##ID: { \ |
2997 | if (!Subtarget.is64Bit()) \ |
2998 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ |
2999 | break; \ |
3000 | } |
3001 | CSR_CASE(csrrd_d); |
3002 | CSR_CASE(csrwr_d); |
3003 | CSR_CASE(csrxchg_d); |
3004 | CSR_CASE(iocsrrd_d); |
3005 | #undef CSR_CASE |
3006 | case Intrinsic::loongarch_csrrd_w: { |
3007 | unsigned Imm = Op2->getAsZExtVal(); |
3008 | if (!isUInt<14>(x: Imm)) { |
3009 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
3010 | return; |
3011 | } |
3012 | SDValue CSRRDResults = |
3013 | DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other}, |
3014 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3015 | Results.push_back( |
3016 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0))); |
3017 | Results.push_back(Elt: CSRRDResults.getValue(R: 1)); |
3018 | break; |
3019 | } |
3020 | case Intrinsic::loongarch_csrwr_w: { |
3021 | unsigned Imm = N->getConstantOperandVal(Num: 3); |
3022 | if (!isUInt<14>(x: Imm)) { |
3023 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
3024 | return; |
3025 | } |
3026 | SDValue CSRWRResults = |
3027 | DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other}, |
3028 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2), |
3029 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3030 | Results.push_back( |
3031 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0))); |
3032 | Results.push_back(Elt: CSRWRResults.getValue(R: 1)); |
3033 | break; |
3034 | } |
3035 | case Intrinsic::loongarch_csrxchg_w: { |
3036 | unsigned Imm = N->getConstantOperandVal(Num: 4); |
3037 | if (!isUInt<14>(x: Imm)) { |
3038 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
3039 | return; |
3040 | } |
3041 | SDValue CSRXCHGResults = DAG.getNode( |
3042 | Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other}, |
3043 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2), |
3044 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)), |
3045 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3046 | Results.push_back( |
3047 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0))); |
3048 | Results.push_back(Elt: CSRXCHGResults.getValue(R: 1)); |
3049 | break; |
3050 | } |
3051 | #define IOCSRRD_CASE(NAME, NODE) \ |
3052 | case Intrinsic::loongarch_##NAME: { \ |
3053 | SDValue IOCSRRDResults = \ |
3054 | DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
3055 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ |
3056 | Results.push_back( \ |
3057 | DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ |
3058 | Results.push_back(IOCSRRDResults.getValue(1)); \ |
3059 | break; \ |
3060 | } |
3061 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
3062 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
3063 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
3064 | #undef IOCSRRD_CASE |
3065 | case Intrinsic::loongarch_cpucfg: { |
3066 | SDValue CPUCFGResults = |
3067 | DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other}, |
3068 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)}); |
3069 | Results.push_back( |
3070 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0))); |
3071 | Results.push_back(Elt: CPUCFGResults.getValue(R: 1)); |
3072 | break; |
3073 | } |
3074 | case Intrinsic::loongarch_lddir_d: { |
3075 | if (!Subtarget.is64Bit()) { |
3076 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64); |
3077 | return; |
3078 | } |
3079 | break; |
3080 | } |
3081 | } |
3082 | break; |
3083 | } |
3084 | case ISD::READ_REGISTER: { |
3085 | if (Subtarget.is64Bit()) |
3086 | DAG.getContext()->emitError( |
3087 | ErrorStr: "On LA64, only 64-bit registers can be read." ); |
3088 | else |
3089 | DAG.getContext()->emitError( |
3090 | ErrorStr: "On LA32, only 32-bit registers can be read." ); |
3091 | Results.push_back(Elt: DAG.getUNDEF(VT)); |
3092 | Results.push_back(Elt: N->getOperand(Num: 0)); |
3093 | break; |
3094 | } |
3095 | case ISD::INTRINSIC_WO_CHAIN: { |
3096 | replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); |
3097 | break; |
3098 | } |
3099 | } |
3100 | } |
3101 | |
3102 | static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, |
3103 | TargetLowering::DAGCombinerInfo &DCI, |
3104 | const LoongArchSubtarget &Subtarget) { |
3105 | if (DCI.isBeforeLegalizeOps()) |
3106 | return SDValue(); |
3107 | |
3108 | SDValue FirstOperand = N->getOperand(Num: 0); |
3109 | SDValue SecondOperand = N->getOperand(Num: 1); |
3110 | unsigned FirstOperandOpc = FirstOperand.getOpcode(); |
3111 | EVT ValTy = N->getValueType(ResNo: 0); |
3112 | SDLoc DL(N); |
3113 | uint64_t lsb, msb; |
3114 | unsigned SMIdx, SMLen; |
3115 | ConstantSDNode *CN; |
3116 | SDValue NewOperand; |
3117 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3118 | |
3119 | // Op's second operand must be a shifted mask. |
3120 | if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) || |
3121 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen)) |
3122 | return SDValue(); |
3123 | |
3124 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { |
3125 | // Pattern match BSTRPICK. |
3126 | // $dst = and ((sra or srl) $src , lsb), (2**len - 1) |
3127 | // => BSTRPICK $dst, $src, msb, lsb |
3128 | // where msb = lsb + len - 1 |
3129 | |
3130 | // The second operand of the shift must be an immediate. |
3131 | if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1)))) |
3132 | return SDValue(); |
3133 | |
3134 | lsb = CN->getZExtValue(); |
3135 | |
3136 | // Return if the shifted mask does not start at bit 0 or the sum of its |
3137 | // length and lsb exceeds the word's size. |
3138 | if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) |
3139 | return SDValue(); |
3140 | |
3141 | NewOperand = FirstOperand.getOperand(i: 0); |
3142 | } else { |
3143 | // Pattern match BSTRPICK. |
3144 | // $dst = and $src, (2**len- 1) , if len > 12 |
3145 | // => BSTRPICK $dst, $src, msb, lsb |
3146 | // where lsb = 0 and msb = len - 1 |
3147 | |
3148 | // If the mask is <= 0xfff, andi can be used instead. |
3149 | if (CN->getZExtValue() <= 0xfff) |
3150 | return SDValue(); |
3151 | |
3152 | // Return if the MSB exceeds. |
3153 | if (SMIdx + SMLen > ValTy.getSizeInBits()) |
3154 | return SDValue(); |
3155 | |
3156 | if (SMIdx > 0) { |
3157 | // Omit if the constant has more than 2 uses. This a conservative |
3158 | // decision. Whether it is a win depends on the HW microarchitecture. |
3159 | // However it should always be better for 1 and 2 uses. |
3160 | if (CN->use_size() > 2) |
3161 | return SDValue(); |
3162 | // Return if the constant can be composed by a single LU12I.W. |
3163 | if ((CN->getZExtValue() & 0xfff) == 0) |
3164 | return SDValue(); |
3165 | // Return if the constand can be composed by a single ADDI with |
3166 | // the zero register. |
3167 | if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) |
3168 | return SDValue(); |
3169 | } |
3170 | |
3171 | lsb = SMIdx; |
3172 | NewOperand = FirstOperand; |
3173 | } |
3174 | |
3175 | msb = lsb + SMLen - 1; |
3176 | SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand, |
3177 | N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT), |
3178 | N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
3179 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) |
3180 | return NR0; |
3181 | // Try to optimize to |
3182 | // bstrpick $Rd, $Rs, msb, lsb |
3183 | // slli $Rd, $Rd, lsb |
3184 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0, |
3185 | N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
3186 | } |
3187 | |
3188 | static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, |
3189 | TargetLowering::DAGCombinerInfo &DCI, |
3190 | const LoongArchSubtarget &Subtarget) { |
3191 | if (DCI.isBeforeLegalizeOps()) |
3192 | return SDValue(); |
3193 | |
3194 | // $dst = srl (and $src, Mask), Shamt |
3195 | // => |
3196 | // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt |
3197 | // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 |
3198 | // |
3199 | |
3200 | SDValue FirstOperand = N->getOperand(Num: 0); |
3201 | ConstantSDNode *CN; |
3202 | EVT ValTy = N->getValueType(ResNo: 0); |
3203 | SDLoc DL(N); |
3204 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3205 | unsigned MaskIdx, MaskLen; |
3206 | uint64_t Shamt; |
3207 | |
3208 | // The first operand must be an AND and the second operand of the AND must be |
3209 | // a shifted mask. |
3210 | if (FirstOperand.getOpcode() != ISD::AND || |
3211 | !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) || |
3212 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen)) |
3213 | return SDValue(); |
3214 | |
3215 | // The second operand (shift amount) must be an immediate. |
3216 | if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))) |
3217 | return SDValue(); |
3218 | |
3219 | Shamt = CN->getZExtValue(); |
3220 | if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) |
3221 | return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, |
3222 | N1: FirstOperand->getOperand(Num: 0), |
3223 | N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
3224 | N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
3225 | |
3226 | return SDValue(); |
3227 | } |
3228 | |
3229 | static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, |
3230 | TargetLowering::DAGCombinerInfo &DCI, |
3231 | const LoongArchSubtarget &Subtarget) { |
3232 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3233 | EVT ValTy = N->getValueType(ResNo: 0); |
3234 | SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1); |
3235 | ConstantSDNode *CN0, *CN1; |
3236 | SDLoc DL(N); |
3237 | unsigned ValBits = ValTy.getSizeInBits(); |
3238 | unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; |
3239 | unsigned Shamt; |
3240 | bool SwapAndRetried = false; |
3241 | |
3242 | if (DCI.isBeforeLegalizeOps()) |
3243 | return SDValue(); |
3244 | |
3245 | if (ValBits != 32 && ValBits != 64) |
3246 | return SDValue(); |
3247 | |
3248 | Retry: |
3249 | // 1st pattern to match BSTRINS: |
3250 | // R = or (and X, mask0), (and (shl Y, lsb), mask1) |
3251 | // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 |
3252 | // => |
3253 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
3254 | if (N0.getOpcode() == ISD::AND && |
3255 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3256 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3257 | N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
3258 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3259 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
3260 | MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && |
3261 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3262 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
3263 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
3264 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n" ); |
3265 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3266 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3267 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
3268 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3269 | } |
3270 | |
3271 | // 2nd pattern to match BSTRINS: |
3272 | // R = or (and X, mask0), (shl (and Y, mask1), lsb) |
3273 | // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) |
3274 | // => |
3275 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
3276 | if (N0.getOpcode() == ISD::AND && |
3277 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3278 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3279 | N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
3280 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3281 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
3282 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3283 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
3284 | MaskLen0 == MaskLen1 && MaskIdx1 == 0 && |
3285 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
3286 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n" ); |
3287 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3288 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3289 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
3290 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3291 | } |
3292 | |
3293 | // 3rd pattern to match BSTRINS: |
3294 | // R = or (and X, mask0), (and Y, mask1) |
3295 | // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 |
3296 | // => |
3297 | // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb |
3298 | // where msb = lsb + size - 1 |
3299 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && |
3300 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3301 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3302 | (MaskIdx0 + MaskLen0 <= 64) && |
3303 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) && |
3304 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
3305 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n" ); |
3306 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3307 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1, |
3308 | N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)), |
3309 | N3: DAG.getConstant(Val: ValBits == 32 |
3310 | ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
3311 | : (MaskIdx0 + MaskLen0 - 1), |
3312 | DL, VT: GRLenVT), |
3313 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3314 | } |
3315 | |
3316 | // 4th pattern to match BSTRINS: |
3317 | // R = or (and X, mask), (shl Y, shamt) |
3318 | // where mask = (2**shamt - 1) |
3319 | // => |
3320 | // R = BSTRINS X, Y, ValBits - 1, shamt |
3321 | // where ValBits = 32 or 64 |
3322 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && |
3323 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3324 | isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3325 | MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3326 | (Shamt = CN1->getZExtValue()) == MaskLen0 && |
3327 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
3328 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n" ); |
3329 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3330 | N2: N1.getOperand(i: 0), |
3331 | N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT), |
3332 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
3333 | } |
3334 | |
3335 | // 5th pattern to match BSTRINS: |
3336 | // R = or (and X, mask), const |
3337 | // where ~mask = (2**size - 1) << lsb, mask & const = 0 |
3338 | // => |
3339 | // R = BSTRINS X, (const >> lsb), msb, lsb |
3340 | // where msb = lsb + size - 1 |
3341 | if (N0.getOpcode() == ISD::AND && |
3342 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
3343 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
3344 | (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) && |
3345 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
3346 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n" ); |
3347 | return DAG.getNode( |
3348 | Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
3349 | N2: DAG.getConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy), |
3350 | N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
3351 | : (MaskIdx0 + MaskLen0 - 1), |
3352 | DL, VT: GRLenVT), |
3353 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
3354 | } |
3355 | |
3356 | // 6th pattern. |
3357 | // a = b | ((c & mask) << shamt), where all positions in b to be overwritten |
3358 | // by the incoming bits are known to be zero. |
3359 | // => |
3360 | // a = BSTRINS b, c, shamt + MaskLen - 1, shamt |
3361 | // |
3362 | // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th |
3363 | // pattern is more common than the 1st. So we put the 1st before the 6th in |
3364 | // order to match as many nodes as possible. |
3365 | ConstantSDNode *CNMask, *CNShamt; |
3366 | unsigned MaskIdx, MaskLen; |
3367 | if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
3368 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3369 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
3370 | MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3371 | CNShamt->getZExtValue() + MaskLen <= ValBits) { |
3372 | Shamt = CNShamt->getZExtValue(); |
3373 | APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); |
3374 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
3375 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n" ); |
3376 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
3377 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3378 | N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT), |
3379 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
3380 | } |
3381 | } |
3382 | |
3383 | // 7th pattern. |
3384 | // a = b | ((c << shamt) & shifted_mask), where all positions in b to be |
3385 | // overwritten by the incoming bits are known to be zero. |
3386 | // => |
3387 | // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx |
3388 | // |
3389 | // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd |
3390 | // before the 7th in order to match as many nodes as possible. |
3391 | if (N1.getOpcode() == ISD::AND && |
3392 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3393 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
3394 | N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
3395 | (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
3396 | CNShamt->getZExtValue() == MaskIdx) { |
3397 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
3398 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
3399 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n" ); |
3400 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
3401 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
3402 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
3403 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
3404 | } |
3405 | } |
3406 | |
3407 | // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. |
3408 | if (!SwapAndRetried) { |
3409 | std::swap(a&: N0, b&: N1); |
3410 | SwapAndRetried = true; |
3411 | goto Retry; |
3412 | } |
3413 | |
3414 | SwapAndRetried = false; |
3415 | Retry2: |
3416 | // 8th pattern. |
3417 | // a = b | (c & shifted_mask), where all positions in b to be overwritten by |
3418 | // the incoming bits are known to be zero. |
3419 | // => |
3420 | // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx |
3421 | // |
3422 | // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So |
3423 | // we put it here in order to match as many nodes as possible or generate less |
3424 | // instructions. |
3425 | if (N1.getOpcode() == ISD::AND && |
3426 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
3427 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) { |
3428 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
3429 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
3430 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n" ); |
3431 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
3432 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), |
3433 | N1: N1->getOperand(Num: 0), |
3434 | N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)), |
3435 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
3436 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
3437 | } |
3438 | } |
3439 | // Swap N0/N1 and retry. |
3440 | if (!SwapAndRetried) { |
3441 | std::swap(a&: N0, b&: N1); |
3442 | SwapAndRetried = true; |
3443 | goto Retry2; |
3444 | } |
3445 | |
3446 | return SDValue(); |
3447 | } |
3448 | |
3449 | static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { |
3450 | ExtType = ISD::NON_EXTLOAD; |
3451 | |
3452 | switch (V.getNode()->getOpcode()) { |
3453 | case ISD::LOAD: { |
3454 | LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode()); |
3455 | if ((LoadNode->getMemoryVT() == MVT::i8) || |
3456 | (LoadNode->getMemoryVT() == MVT::i16)) { |
3457 | ExtType = LoadNode->getExtensionType(); |
3458 | return true; |
3459 | } |
3460 | return false; |
3461 | } |
3462 | case ISD::AssertSext: { |
3463 | VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1)); |
3464 | if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { |
3465 | ExtType = ISD::SEXTLOAD; |
3466 | return true; |
3467 | } |
3468 | return false; |
3469 | } |
3470 | case ISD::AssertZext: { |
3471 | VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1)); |
3472 | if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { |
3473 | ExtType = ISD::ZEXTLOAD; |
3474 | return true; |
3475 | } |
3476 | return false; |
3477 | } |
3478 | default: |
3479 | return false; |
3480 | } |
3481 | |
3482 | return false; |
3483 | } |
3484 | |
3485 | // Eliminate redundant truncation and zero-extension nodes. |
3486 | // * Case 1: |
3487 | // +------------+ +------------+ +------------+ |
3488 | // | Input1 | | Input2 | | CC | |
3489 | // +------------+ +------------+ +------------+ |
3490 | // | | | |
3491 | // V V +----+ |
3492 | // +------------+ +------------+ | |
3493 | // | TRUNCATE | | TRUNCATE | | |
3494 | // +------------+ +------------+ | |
3495 | // | | | |
3496 | // V V | |
3497 | // +------------+ +------------+ | |
3498 | // | ZERO_EXT | | ZERO_EXT | | |
3499 | // +------------+ +------------+ | |
3500 | // | | | |
3501 | // | +-------------+ | |
3502 | // V V | | |
3503 | // +----------------+ | | |
3504 | // | AND | | | |
3505 | // +----------------+ | | |
3506 | // | | | |
3507 | // +---------------+ | | |
3508 | // | | | |
3509 | // V V V |
3510 | // +-------------+ |
3511 | // | CMP | |
3512 | // +-------------+ |
3513 | // * Case 2: |
3514 | // +------------+ +------------+ +-------------+ +------------+ +------------+ |
3515 | // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | |
3516 | // +------------+ +------------+ +-------------+ +------------+ +------------+ |
3517 | // | | | | | |
3518 | // V | | | | |
3519 | // +------------+ | | | | |
3520 | // | XOR |<---------------------+ | | |
3521 | // +------------+ | | | |
3522 | // | | | | |
3523 | // V V +---------------+ | |
3524 | // +------------+ +------------+ | | |
3525 | // | TRUNCATE | | TRUNCATE | | +-------------------------+ |
3526 | // +------------+ +------------+ | | |
3527 | // | | | | |
3528 | // V V | | |
3529 | // +------------+ +------------+ | | |
3530 | // | ZERO_EXT | | ZERO_EXT | | | |
3531 | // +------------+ +------------+ | | |
3532 | // | | | | |
3533 | // V V | | |
3534 | // +----------------+ | | |
3535 | // | AND | | | |
3536 | // +----------------+ | | |
3537 | // | | | |
3538 | // +---------------+ | | |
3539 | // | | | |
3540 | // V V V |
3541 | // +-------------+ |
3542 | // | CMP | |
3543 | // +-------------+ |
3544 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, |
3545 | TargetLowering::DAGCombinerInfo &DCI, |
3546 | const LoongArchSubtarget &Subtarget) { |
3547 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
3548 | |
3549 | SDNode *AndNode = N->getOperand(Num: 0).getNode(); |
3550 | if (AndNode->getOpcode() != ISD::AND) |
3551 | return SDValue(); |
3552 | |
3553 | SDValue AndInputValue2 = AndNode->getOperand(Num: 1); |
3554 | if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) |
3555 | return SDValue(); |
3556 | |
3557 | SDValue CmpInputValue = N->getOperand(Num: 1); |
3558 | SDValue AndInputValue1 = AndNode->getOperand(Num: 0); |
3559 | if (AndInputValue1.getOpcode() == ISD::XOR) { |
3560 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
3561 | return SDValue(); |
3562 | ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1)); |
3563 | if (!CN || CN->getSExtValue() != -1) |
3564 | return SDValue(); |
3565 | CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue); |
3566 | if (!CN || CN->getSExtValue() != 0) |
3567 | return SDValue(); |
3568 | AndInputValue1 = AndInputValue1.getOperand(i: 0); |
3569 | if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) |
3570 | return SDValue(); |
3571 | } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { |
3572 | if (AndInputValue2 != CmpInputValue) |
3573 | return SDValue(); |
3574 | } else { |
3575 | return SDValue(); |
3576 | } |
3577 | |
3578 | SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0); |
3579 | if (TruncValue1.getOpcode() != ISD::TRUNCATE) |
3580 | return SDValue(); |
3581 | |
3582 | SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0); |
3583 | if (TruncValue2.getOpcode() != ISD::TRUNCATE) |
3584 | return SDValue(); |
3585 | |
3586 | SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0); |
3587 | SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0); |
3588 | ISD::LoadExtType ExtType1; |
3589 | ISD::LoadExtType ExtType2; |
3590 | |
3591 | if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) || |
3592 | !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2)) |
3593 | return SDValue(); |
3594 | |
3595 | if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) || |
3596 | AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0)) |
3597 | return SDValue(); |
3598 | |
3599 | if ((ExtType2 != ISD::ZEXTLOAD) && |
3600 | ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) |
3601 | return SDValue(); |
3602 | |
3603 | // These truncation and zero-extension nodes are not necessary, remove them. |
3604 | SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0), |
3605 | N1: TruncInputValue1, N2: TruncInputValue2); |
3606 | SDValue NewSetCC = |
3607 | DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC); |
3608 | DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode()); |
3609 | return SDValue(N, 0); |
3610 | } |
3611 | |
3612 | // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. |
3613 | static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, |
3614 | TargetLowering::DAGCombinerInfo &DCI, |
3615 | const LoongArchSubtarget &Subtarget) { |
3616 | if (DCI.isBeforeLegalizeOps()) |
3617 | return SDValue(); |
3618 | |
3619 | SDValue Src = N->getOperand(Num: 0); |
3620 | if (Src.getOpcode() != LoongArchISD::REVB_2W) |
3621 | return SDValue(); |
3622 | |
3623 | return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
3624 | Operand: Src.getOperand(i: 0)); |
3625 | } |
3626 | |
3627 | template <unsigned N> |
3628 | static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, |
3629 | SelectionDAG &DAG, |
3630 | const LoongArchSubtarget &Subtarget, |
3631 | bool IsSigned = false) { |
3632 | SDLoc DL(Node); |
3633 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
3634 | // Check the ImmArg. |
3635 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
3636 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
3637 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3638 | ": argument out of range." ); |
3639 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT()); |
3640 | } |
3641 | return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT()); |
3642 | } |
3643 | |
3644 | template <unsigned N> |
3645 | static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, |
3646 | SelectionDAG &DAG, bool IsSigned = false) { |
3647 | SDLoc DL(Node); |
3648 | EVT ResTy = Node->getValueType(ResNo: 0); |
3649 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
3650 | |
3651 | // Check the ImmArg. |
3652 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
3653 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
3654 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3655 | ": argument out of range." ); |
3656 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3657 | } |
3658 | return DAG.getConstant( |
3659 | Val: APInt(ResTy.getScalarType().getSizeInBits(), |
3660 | IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), |
3661 | DL, VT: ResTy); |
3662 | } |
3663 | |
3664 | static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { |
3665 | SDLoc DL(Node); |
3666 | EVT ResTy = Node->getValueType(ResNo: 0); |
3667 | SDValue Vec = Node->getOperand(Num: 2); |
3668 | SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy); |
3669 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask); |
3670 | } |
3671 | |
3672 | static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { |
3673 | SDLoc DL(Node); |
3674 | EVT ResTy = Node->getValueType(ResNo: 0); |
3675 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy); |
3676 | SDValue Bit = |
3677 | DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG)); |
3678 | |
3679 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), |
3680 | N2: DAG.getNOT(DL, Val: Bit, VT: ResTy)); |
3681 | } |
3682 | |
3683 | template <unsigned N> |
3684 | static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { |
3685 | SDLoc DL(Node); |
3686 | EVT ResTy = Node->getValueType(ResNo: 0); |
3687 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
3688 | // Check the unsigned ImmArg. |
3689 | if (!isUInt<N>(CImm->getZExtValue())) { |
3690 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3691 | ": argument out of range." ); |
3692 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3693 | } |
3694 | |
3695 | APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
3696 | SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy); |
3697 | |
3698 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask); |
3699 | } |
3700 | |
3701 | template <unsigned N> |
3702 | static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { |
3703 | SDLoc DL(Node); |
3704 | EVT ResTy = Node->getValueType(ResNo: 0); |
3705 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
3706 | // Check the unsigned ImmArg. |
3707 | if (!isUInt<N>(CImm->getZExtValue())) { |
3708 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3709 | ": argument out of range." ); |
3710 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3711 | } |
3712 | |
3713 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
3714 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
3715 | return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
3716 | } |
3717 | |
3718 | template <unsigned N> |
3719 | static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { |
3720 | SDLoc DL(Node); |
3721 | EVT ResTy = Node->getValueType(ResNo: 0); |
3722 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
3723 | // Check the unsigned ImmArg. |
3724 | if (!isUInt<N>(CImm->getZExtValue())) { |
3725 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
3726 | ": argument out of range." ); |
3727 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
3728 | } |
3729 | |
3730 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
3731 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
3732 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
3733 | } |
3734 | |
3735 | static SDValue |
3736 | performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, |
3737 | TargetLowering::DAGCombinerInfo &DCI, |
3738 | const LoongArchSubtarget &Subtarget) { |
3739 | SDLoc DL(N); |
3740 | switch (N->getConstantOperandVal(Num: 0)) { |
3741 | default: |
3742 | break; |
3743 | case Intrinsic::loongarch_lsx_vadd_b: |
3744 | case Intrinsic::loongarch_lsx_vadd_h: |
3745 | case Intrinsic::loongarch_lsx_vadd_w: |
3746 | case Intrinsic::loongarch_lsx_vadd_d: |
3747 | case Intrinsic::loongarch_lasx_xvadd_b: |
3748 | case Intrinsic::loongarch_lasx_xvadd_h: |
3749 | case Intrinsic::loongarch_lasx_xvadd_w: |
3750 | case Intrinsic::loongarch_lasx_xvadd_d: |
3751 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3752 | N2: N->getOperand(Num: 2)); |
3753 | case Intrinsic::loongarch_lsx_vaddi_bu: |
3754 | case Intrinsic::loongarch_lsx_vaddi_hu: |
3755 | case Intrinsic::loongarch_lsx_vaddi_wu: |
3756 | case Intrinsic::loongarch_lsx_vaddi_du: |
3757 | case Intrinsic::loongarch_lasx_xvaddi_bu: |
3758 | case Intrinsic::loongarch_lasx_xvaddi_hu: |
3759 | case Intrinsic::loongarch_lasx_xvaddi_wu: |
3760 | case Intrinsic::loongarch_lasx_xvaddi_du: |
3761 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3762 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3763 | case Intrinsic::loongarch_lsx_vsub_b: |
3764 | case Intrinsic::loongarch_lsx_vsub_h: |
3765 | case Intrinsic::loongarch_lsx_vsub_w: |
3766 | case Intrinsic::loongarch_lsx_vsub_d: |
3767 | case Intrinsic::loongarch_lasx_xvsub_b: |
3768 | case Intrinsic::loongarch_lasx_xvsub_h: |
3769 | case Intrinsic::loongarch_lasx_xvsub_w: |
3770 | case Intrinsic::loongarch_lasx_xvsub_d: |
3771 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3772 | N2: N->getOperand(Num: 2)); |
3773 | case Intrinsic::loongarch_lsx_vsubi_bu: |
3774 | case Intrinsic::loongarch_lsx_vsubi_hu: |
3775 | case Intrinsic::loongarch_lsx_vsubi_wu: |
3776 | case Intrinsic::loongarch_lsx_vsubi_du: |
3777 | case Intrinsic::loongarch_lasx_xvsubi_bu: |
3778 | case Intrinsic::loongarch_lasx_xvsubi_hu: |
3779 | case Intrinsic::loongarch_lasx_xvsubi_wu: |
3780 | case Intrinsic::loongarch_lasx_xvsubi_du: |
3781 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3782 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3783 | case Intrinsic::loongarch_lsx_vneg_b: |
3784 | case Intrinsic::loongarch_lsx_vneg_h: |
3785 | case Intrinsic::loongarch_lsx_vneg_w: |
3786 | case Intrinsic::loongarch_lsx_vneg_d: |
3787 | case Intrinsic::loongarch_lasx_xvneg_b: |
3788 | case Intrinsic::loongarch_lasx_xvneg_h: |
3789 | case Intrinsic::loongarch_lasx_xvneg_w: |
3790 | case Intrinsic::loongarch_lasx_xvneg_d: |
3791 | return DAG.getNode( |
3792 | Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), |
3793 | N1: DAG.getConstant( |
3794 | Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0, |
3795 | /*isSigned=*/true), |
3796 | DL: SDLoc(N), VT: N->getValueType(ResNo: 0)), |
3797 | N2: N->getOperand(Num: 1)); |
3798 | case Intrinsic::loongarch_lsx_vmax_b: |
3799 | case Intrinsic::loongarch_lsx_vmax_h: |
3800 | case Intrinsic::loongarch_lsx_vmax_w: |
3801 | case Intrinsic::loongarch_lsx_vmax_d: |
3802 | case Intrinsic::loongarch_lasx_xvmax_b: |
3803 | case Intrinsic::loongarch_lasx_xvmax_h: |
3804 | case Intrinsic::loongarch_lasx_xvmax_w: |
3805 | case Intrinsic::loongarch_lasx_xvmax_d: |
3806 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3807 | N2: N->getOperand(Num: 2)); |
3808 | case Intrinsic::loongarch_lsx_vmax_bu: |
3809 | case Intrinsic::loongarch_lsx_vmax_hu: |
3810 | case Intrinsic::loongarch_lsx_vmax_wu: |
3811 | case Intrinsic::loongarch_lsx_vmax_du: |
3812 | case Intrinsic::loongarch_lasx_xvmax_bu: |
3813 | case Intrinsic::loongarch_lasx_xvmax_hu: |
3814 | case Intrinsic::loongarch_lasx_xvmax_wu: |
3815 | case Intrinsic::loongarch_lasx_xvmax_du: |
3816 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3817 | N2: N->getOperand(Num: 2)); |
3818 | case Intrinsic::loongarch_lsx_vmaxi_b: |
3819 | case Intrinsic::loongarch_lsx_vmaxi_h: |
3820 | case Intrinsic::loongarch_lsx_vmaxi_w: |
3821 | case Intrinsic::loongarch_lsx_vmaxi_d: |
3822 | case Intrinsic::loongarch_lasx_xvmaxi_b: |
3823 | case Intrinsic::loongarch_lasx_xvmaxi_h: |
3824 | case Intrinsic::loongarch_lasx_xvmaxi_w: |
3825 | case Intrinsic::loongarch_lasx_xvmaxi_d: |
3826 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3827 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
3828 | case Intrinsic::loongarch_lsx_vmaxi_bu: |
3829 | case Intrinsic::loongarch_lsx_vmaxi_hu: |
3830 | case Intrinsic::loongarch_lsx_vmaxi_wu: |
3831 | case Intrinsic::loongarch_lsx_vmaxi_du: |
3832 | case Intrinsic::loongarch_lasx_xvmaxi_bu: |
3833 | case Intrinsic::loongarch_lasx_xvmaxi_hu: |
3834 | case Intrinsic::loongarch_lasx_xvmaxi_wu: |
3835 | case Intrinsic::loongarch_lasx_xvmaxi_du: |
3836 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3837 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3838 | case Intrinsic::loongarch_lsx_vmin_b: |
3839 | case Intrinsic::loongarch_lsx_vmin_h: |
3840 | case Intrinsic::loongarch_lsx_vmin_w: |
3841 | case Intrinsic::loongarch_lsx_vmin_d: |
3842 | case Intrinsic::loongarch_lasx_xvmin_b: |
3843 | case Intrinsic::loongarch_lasx_xvmin_h: |
3844 | case Intrinsic::loongarch_lasx_xvmin_w: |
3845 | case Intrinsic::loongarch_lasx_xvmin_d: |
3846 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3847 | N2: N->getOperand(Num: 2)); |
3848 | case Intrinsic::loongarch_lsx_vmin_bu: |
3849 | case Intrinsic::loongarch_lsx_vmin_hu: |
3850 | case Intrinsic::loongarch_lsx_vmin_wu: |
3851 | case Intrinsic::loongarch_lsx_vmin_du: |
3852 | case Intrinsic::loongarch_lasx_xvmin_bu: |
3853 | case Intrinsic::loongarch_lasx_xvmin_hu: |
3854 | case Intrinsic::loongarch_lasx_xvmin_wu: |
3855 | case Intrinsic::loongarch_lasx_xvmin_du: |
3856 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3857 | N2: N->getOperand(Num: 2)); |
3858 | case Intrinsic::loongarch_lsx_vmini_b: |
3859 | case Intrinsic::loongarch_lsx_vmini_h: |
3860 | case Intrinsic::loongarch_lsx_vmini_w: |
3861 | case Intrinsic::loongarch_lsx_vmini_d: |
3862 | case Intrinsic::loongarch_lasx_xvmini_b: |
3863 | case Intrinsic::loongarch_lasx_xvmini_h: |
3864 | case Intrinsic::loongarch_lasx_xvmini_w: |
3865 | case Intrinsic::loongarch_lasx_xvmini_d: |
3866 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3867 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
3868 | case Intrinsic::loongarch_lsx_vmini_bu: |
3869 | case Intrinsic::loongarch_lsx_vmini_hu: |
3870 | case Intrinsic::loongarch_lsx_vmini_wu: |
3871 | case Intrinsic::loongarch_lsx_vmini_du: |
3872 | case Intrinsic::loongarch_lasx_xvmini_bu: |
3873 | case Intrinsic::loongarch_lasx_xvmini_hu: |
3874 | case Intrinsic::loongarch_lasx_xvmini_wu: |
3875 | case Intrinsic::loongarch_lasx_xvmini_du: |
3876 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3877 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
3878 | case Intrinsic::loongarch_lsx_vmul_b: |
3879 | case Intrinsic::loongarch_lsx_vmul_h: |
3880 | case Intrinsic::loongarch_lsx_vmul_w: |
3881 | case Intrinsic::loongarch_lsx_vmul_d: |
3882 | case Intrinsic::loongarch_lasx_xvmul_b: |
3883 | case Intrinsic::loongarch_lasx_xvmul_h: |
3884 | case Intrinsic::loongarch_lasx_xvmul_w: |
3885 | case Intrinsic::loongarch_lasx_xvmul_d: |
3886 | return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3887 | N2: N->getOperand(Num: 2)); |
3888 | case Intrinsic::loongarch_lsx_vmadd_b: |
3889 | case Intrinsic::loongarch_lsx_vmadd_h: |
3890 | case Intrinsic::loongarch_lsx_vmadd_w: |
3891 | case Intrinsic::loongarch_lsx_vmadd_d: |
3892 | case Intrinsic::loongarch_lasx_xvmadd_b: |
3893 | case Intrinsic::loongarch_lasx_xvmadd_h: |
3894 | case Intrinsic::loongarch_lasx_xvmadd_w: |
3895 | case Intrinsic::loongarch_lasx_xvmadd_d: { |
3896 | EVT ResTy = N->getValueType(ResNo: 0); |
3897 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
3898 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
3899 | N2: N->getOperand(Num: 3))); |
3900 | } |
3901 | case Intrinsic::loongarch_lsx_vmsub_b: |
3902 | case Intrinsic::loongarch_lsx_vmsub_h: |
3903 | case Intrinsic::loongarch_lsx_vmsub_w: |
3904 | case Intrinsic::loongarch_lsx_vmsub_d: |
3905 | case Intrinsic::loongarch_lasx_xvmsub_b: |
3906 | case Intrinsic::loongarch_lasx_xvmsub_h: |
3907 | case Intrinsic::loongarch_lasx_xvmsub_w: |
3908 | case Intrinsic::loongarch_lasx_xvmsub_d: { |
3909 | EVT ResTy = N->getValueType(ResNo: 0); |
3910 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
3911 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
3912 | N2: N->getOperand(Num: 3))); |
3913 | } |
3914 | case Intrinsic::loongarch_lsx_vdiv_b: |
3915 | case Intrinsic::loongarch_lsx_vdiv_h: |
3916 | case Intrinsic::loongarch_lsx_vdiv_w: |
3917 | case Intrinsic::loongarch_lsx_vdiv_d: |
3918 | case Intrinsic::loongarch_lasx_xvdiv_b: |
3919 | case Intrinsic::loongarch_lasx_xvdiv_h: |
3920 | case Intrinsic::loongarch_lasx_xvdiv_w: |
3921 | case Intrinsic::loongarch_lasx_xvdiv_d: |
3922 | return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3923 | N2: N->getOperand(Num: 2)); |
3924 | case Intrinsic::loongarch_lsx_vdiv_bu: |
3925 | case Intrinsic::loongarch_lsx_vdiv_hu: |
3926 | case Intrinsic::loongarch_lsx_vdiv_wu: |
3927 | case Intrinsic::loongarch_lsx_vdiv_du: |
3928 | case Intrinsic::loongarch_lasx_xvdiv_bu: |
3929 | case Intrinsic::loongarch_lasx_xvdiv_hu: |
3930 | case Intrinsic::loongarch_lasx_xvdiv_wu: |
3931 | case Intrinsic::loongarch_lasx_xvdiv_du: |
3932 | return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3933 | N2: N->getOperand(Num: 2)); |
3934 | case Intrinsic::loongarch_lsx_vmod_b: |
3935 | case Intrinsic::loongarch_lsx_vmod_h: |
3936 | case Intrinsic::loongarch_lsx_vmod_w: |
3937 | case Intrinsic::loongarch_lsx_vmod_d: |
3938 | case Intrinsic::loongarch_lasx_xvmod_b: |
3939 | case Intrinsic::loongarch_lasx_xvmod_h: |
3940 | case Intrinsic::loongarch_lasx_xvmod_w: |
3941 | case Intrinsic::loongarch_lasx_xvmod_d: |
3942 | return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3943 | N2: N->getOperand(Num: 2)); |
3944 | case Intrinsic::loongarch_lsx_vmod_bu: |
3945 | case Intrinsic::loongarch_lsx_vmod_hu: |
3946 | case Intrinsic::loongarch_lsx_vmod_wu: |
3947 | case Intrinsic::loongarch_lsx_vmod_du: |
3948 | case Intrinsic::loongarch_lasx_xvmod_bu: |
3949 | case Intrinsic::loongarch_lasx_xvmod_hu: |
3950 | case Intrinsic::loongarch_lasx_xvmod_wu: |
3951 | case Intrinsic::loongarch_lasx_xvmod_du: |
3952 | return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3953 | N2: N->getOperand(Num: 2)); |
3954 | case Intrinsic::loongarch_lsx_vand_v: |
3955 | case Intrinsic::loongarch_lasx_xvand_v: |
3956 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3957 | N2: N->getOperand(Num: 2)); |
3958 | case Intrinsic::loongarch_lsx_vor_v: |
3959 | case Intrinsic::loongarch_lasx_xvor_v: |
3960 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3961 | N2: N->getOperand(Num: 2)); |
3962 | case Intrinsic::loongarch_lsx_vxor_v: |
3963 | case Intrinsic::loongarch_lasx_xvxor_v: |
3964 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3965 | N2: N->getOperand(Num: 2)); |
3966 | case Intrinsic::loongarch_lsx_vnor_v: |
3967 | case Intrinsic::loongarch_lasx_xvnor_v: { |
3968 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3969 | N2: N->getOperand(Num: 2)); |
3970 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
3971 | } |
3972 | case Intrinsic::loongarch_lsx_vandi_b: |
3973 | case Intrinsic::loongarch_lasx_xvandi_b: |
3974 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3975 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
3976 | case Intrinsic::loongarch_lsx_vori_b: |
3977 | case Intrinsic::loongarch_lasx_xvori_b: |
3978 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3979 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
3980 | case Intrinsic::loongarch_lsx_vxori_b: |
3981 | case Intrinsic::loongarch_lasx_xvxori_b: |
3982 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3983 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
3984 | case Intrinsic::loongarch_lsx_vsll_b: |
3985 | case Intrinsic::loongarch_lsx_vsll_h: |
3986 | case Intrinsic::loongarch_lsx_vsll_w: |
3987 | case Intrinsic::loongarch_lsx_vsll_d: |
3988 | case Intrinsic::loongarch_lasx_xvsll_b: |
3989 | case Intrinsic::loongarch_lasx_xvsll_h: |
3990 | case Intrinsic::loongarch_lasx_xvsll_w: |
3991 | case Intrinsic::loongarch_lasx_xvsll_d: |
3992 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3993 | N2: truncateVecElts(Node: N, DAG)); |
3994 | case Intrinsic::loongarch_lsx_vslli_b: |
3995 | case Intrinsic::loongarch_lasx_xvslli_b: |
3996 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
3997 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
3998 | case Intrinsic::loongarch_lsx_vslli_h: |
3999 | case Intrinsic::loongarch_lasx_xvslli_h: |
4000 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4001 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
4002 | case Intrinsic::loongarch_lsx_vslli_w: |
4003 | case Intrinsic::loongarch_lasx_xvslli_w: |
4004 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4005 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
4006 | case Intrinsic::loongarch_lsx_vslli_d: |
4007 | case Intrinsic::loongarch_lasx_xvslli_d: |
4008 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4009 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
4010 | case Intrinsic::loongarch_lsx_vsrl_b: |
4011 | case Intrinsic::loongarch_lsx_vsrl_h: |
4012 | case Intrinsic::loongarch_lsx_vsrl_w: |
4013 | case Intrinsic::loongarch_lsx_vsrl_d: |
4014 | case Intrinsic::loongarch_lasx_xvsrl_b: |
4015 | case Intrinsic::loongarch_lasx_xvsrl_h: |
4016 | case Intrinsic::loongarch_lasx_xvsrl_w: |
4017 | case Intrinsic::loongarch_lasx_xvsrl_d: |
4018 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4019 | N2: truncateVecElts(Node: N, DAG)); |
4020 | case Intrinsic::loongarch_lsx_vsrli_b: |
4021 | case Intrinsic::loongarch_lasx_xvsrli_b: |
4022 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4023 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
4024 | case Intrinsic::loongarch_lsx_vsrli_h: |
4025 | case Intrinsic::loongarch_lasx_xvsrli_h: |
4026 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4027 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
4028 | case Intrinsic::loongarch_lsx_vsrli_w: |
4029 | case Intrinsic::loongarch_lasx_xvsrli_w: |
4030 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4031 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
4032 | case Intrinsic::loongarch_lsx_vsrli_d: |
4033 | case Intrinsic::loongarch_lasx_xvsrli_d: |
4034 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4035 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
4036 | case Intrinsic::loongarch_lsx_vsra_b: |
4037 | case Intrinsic::loongarch_lsx_vsra_h: |
4038 | case Intrinsic::loongarch_lsx_vsra_w: |
4039 | case Intrinsic::loongarch_lsx_vsra_d: |
4040 | case Intrinsic::loongarch_lasx_xvsra_b: |
4041 | case Intrinsic::loongarch_lasx_xvsra_h: |
4042 | case Intrinsic::loongarch_lasx_xvsra_w: |
4043 | case Intrinsic::loongarch_lasx_xvsra_d: |
4044 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4045 | N2: truncateVecElts(Node: N, DAG)); |
4046 | case Intrinsic::loongarch_lsx_vsrai_b: |
4047 | case Intrinsic::loongarch_lasx_xvsrai_b: |
4048 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4049 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
4050 | case Intrinsic::loongarch_lsx_vsrai_h: |
4051 | case Intrinsic::loongarch_lasx_xvsrai_h: |
4052 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4053 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
4054 | case Intrinsic::loongarch_lsx_vsrai_w: |
4055 | case Intrinsic::loongarch_lasx_xvsrai_w: |
4056 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4057 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
4058 | case Intrinsic::loongarch_lsx_vsrai_d: |
4059 | case Intrinsic::loongarch_lasx_xvsrai_d: |
4060 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4061 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
4062 | case Intrinsic::loongarch_lsx_vclz_b: |
4063 | case Intrinsic::loongarch_lsx_vclz_h: |
4064 | case Intrinsic::loongarch_lsx_vclz_w: |
4065 | case Intrinsic::loongarch_lsx_vclz_d: |
4066 | case Intrinsic::loongarch_lasx_xvclz_b: |
4067 | case Intrinsic::loongarch_lasx_xvclz_h: |
4068 | case Intrinsic::loongarch_lasx_xvclz_w: |
4069 | case Intrinsic::loongarch_lasx_xvclz_d: |
4070 | return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
4071 | case Intrinsic::loongarch_lsx_vpcnt_b: |
4072 | case Intrinsic::loongarch_lsx_vpcnt_h: |
4073 | case Intrinsic::loongarch_lsx_vpcnt_w: |
4074 | case Intrinsic::loongarch_lsx_vpcnt_d: |
4075 | case Intrinsic::loongarch_lasx_xvpcnt_b: |
4076 | case Intrinsic::loongarch_lasx_xvpcnt_h: |
4077 | case Intrinsic::loongarch_lasx_xvpcnt_w: |
4078 | case Intrinsic::loongarch_lasx_xvpcnt_d: |
4079 | return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
4080 | case Intrinsic::loongarch_lsx_vbitclr_b: |
4081 | case Intrinsic::loongarch_lsx_vbitclr_h: |
4082 | case Intrinsic::loongarch_lsx_vbitclr_w: |
4083 | case Intrinsic::loongarch_lsx_vbitclr_d: |
4084 | case Intrinsic::loongarch_lasx_xvbitclr_b: |
4085 | case Intrinsic::loongarch_lasx_xvbitclr_h: |
4086 | case Intrinsic::loongarch_lasx_xvbitclr_w: |
4087 | case Intrinsic::loongarch_lasx_xvbitclr_d: |
4088 | return lowerVectorBitClear(Node: N, DAG); |
4089 | case Intrinsic::loongarch_lsx_vbitclri_b: |
4090 | case Intrinsic::loongarch_lasx_xvbitclri_b: |
4091 | return lowerVectorBitClearImm<3>(Node: N, DAG); |
4092 | case Intrinsic::loongarch_lsx_vbitclri_h: |
4093 | case Intrinsic::loongarch_lasx_xvbitclri_h: |
4094 | return lowerVectorBitClearImm<4>(Node: N, DAG); |
4095 | case Intrinsic::loongarch_lsx_vbitclri_w: |
4096 | case Intrinsic::loongarch_lasx_xvbitclri_w: |
4097 | return lowerVectorBitClearImm<5>(Node: N, DAG); |
4098 | case Intrinsic::loongarch_lsx_vbitclri_d: |
4099 | case Intrinsic::loongarch_lasx_xvbitclri_d: |
4100 | return lowerVectorBitClearImm<6>(Node: N, DAG); |
4101 | case Intrinsic::loongarch_lsx_vbitset_b: |
4102 | case Intrinsic::loongarch_lsx_vbitset_h: |
4103 | case Intrinsic::loongarch_lsx_vbitset_w: |
4104 | case Intrinsic::loongarch_lsx_vbitset_d: |
4105 | case Intrinsic::loongarch_lasx_xvbitset_b: |
4106 | case Intrinsic::loongarch_lasx_xvbitset_h: |
4107 | case Intrinsic::loongarch_lasx_xvbitset_w: |
4108 | case Intrinsic::loongarch_lasx_xvbitset_d: { |
4109 | EVT VecTy = N->getValueType(ResNo: 0); |
4110 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
4111 | return DAG.getNode( |
4112 | Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
4113 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
4114 | } |
4115 | case Intrinsic::loongarch_lsx_vbitseti_b: |
4116 | case Intrinsic::loongarch_lasx_xvbitseti_b: |
4117 | return lowerVectorBitSetImm<3>(Node: N, DAG); |
4118 | case Intrinsic::loongarch_lsx_vbitseti_h: |
4119 | case Intrinsic::loongarch_lasx_xvbitseti_h: |
4120 | return lowerVectorBitSetImm<4>(Node: N, DAG); |
4121 | case Intrinsic::loongarch_lsx_vbitseti_w: |
4122 | case Intrinsic::loongarch_lasx_xvbitseti_w: |
4123 | return lowerVectorBitSetImm<5>(Node: N, DAG); |
4124 | case Intrinsic::loongarch_lsx_vbitseti_d: |
4125 | case Intrinsic::loongarch_lasx_xvbitseti_d: |
4126 | return lowerVectorBitSetImm<6>(Node: N, DAG); |
4127 | case Intrinsic::loongarch_lsx_vbitrev_b: |
4128 | case Intrinsic::loongarch_lsx_vbitrev_h: |
4129 | case Intrinsic::loongarch_lsx_vbitrev_w: |
4130 | case Intrinsic::loongarch_lsx_vbitrev_d: |
4131 | case Intrinsic::loongarch_lasx_xvbitrev_b: |
4132 | case Intrinsic::loongarch_lasx_xvbitrev_h: |
4133 | case Intrinsic::loongarch_lasx_xvbitrev_w: |
4134 | case Intrinsic::loongarch_lasx_xvbitrev_d: { |
4135 | EVT VecTy = N->getValueType(ResNo: 0); |
4136 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
4137 | return DAG.getNode( |
4138 | Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
4139 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
4140 | } |
4141 | case Intrinsic::loongarch_lsx_vbitrevi_b: |
4142 | case Intrinsic::loongarch_lasx_xvbitrevi_b: |
4143 | return lowerVectorBitRevImm<3>(Node: N, DAG); |
4144 | case Intrinsic::loongarch_lsx_vbitrevi_h: |
4145 | case Intrinsic::loongarch_lasx_xvbitrevi_h: |
4146 | return lowerVectorBitRevImm<4>(Node: N, DAG); |
4147 | case Intrinsic::loongarch_lsx_vbitrevi_w: |
4148 | case Intrinsic::loongarch_lasx_xvbitrevi_w: |
4149 | return lowerVectorBitRevImm<5>(Node: N, DAG); |
4150 | case Intrinsic::loongarch_lsx_vbitrevi_d: |
4151 | case Intrinsic::loongarch_lasx_xvbitrevi_d: |
4152 | return lowerVectorBitRevImm<6>(Node: N, DAG); |
4153 | case Intrinsic::loongarch_lsx_vfadd_s: |
4154 | case Intrinsic::loongarch_lsx_vfadd_d: |
4155 | case Intrinsic::loongarch_lasx_xvfadd_s: |
4156 | case Intrinsic::loongarch_lasx_xvfadd_d: |
4157 | return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4158 | N2: N->getOperand(Num: 2)); |
4159 | case Intrinsic::loongarch_lsx_vfsub_s: |
4160 | case Intrinsic::loongarch_lsx_vfsub_d: |
4161 | case Intrinsic::loongarch_lasx_xvfsub_s: |
4162 | case Intrinsic::loongarch_lasx_xvfsub_d: |
4163 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4164 | N2: N->getOperand(Num: 2)); |
4165 | case Intrinsic::loongarch_lsx_vfmul_s: |
4166 | case Intrinsic::loongarch_lsx_vfmul_d: |
4167 | case Intrinsic::loongarch_lasx_xvfmul_s: |
4168 | case Intrinsic::loongarch_lasx_xvfmul_d: |
4169 | return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4170 | N2: N->getOperand(Num: 2)); |
4171 | case Intrinsic::loongarch_lsx_vfdiv_s: |
4172 | case Intrinsic::loongarch_lsx_vfdiv_d: |
4173 | case Intrinsic::loongarch_lasx_xvfdiv_s: |
4174 | case Intrinsic::loongarch_lasx_xvfdiv_d: |
4175 | return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4176 | N2: N->getOperand(Num: 2)); |
4177 | case Intrinsic::loongarch_lsx_vfmadd_s: |
4178 | case Intrinsic::loongarch_lsx_vfmadd_d: |
4179 | case Intrinsic::loongarch_lasx_xvfmadd_s: |
4180 | case Intrinsic::loongarch_lasx_xvfmadd_d: |
4181 | return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
4182 | N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3)); |
4183 | case Intrinsic::loongarch_lsx_vinsgr2vr_b: |
4184 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4185 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4186 | N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4187 | case Intrinsic::loongarch_lsx_vinsgr2vr_h: |
4188 | case Intrinsic::loongarch_lasx_xvinsgr2vr_w: |
4189 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4190 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4191 | N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4192 | case Intrinsic::loongarch_lsx_vinsgr2vr_w: |
4193 | case Intrinsic::loongarch_lasx_xvinsgr2vr_d: |
4194 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4195 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4196 | N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4197 | case Intrinsic::loongarch_lsx_vinsgr2vr_d: |
4198 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
4199 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
4200 | N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget)); |
4201 | case Intrinsic::loongarch_lsx_vreplgr2vr_b: |
4202 | case Intrinsic::loongarch_lsx_vreplgr2vr_h: |
4203 | case Intrinsic::loongarch_lsx_vreplgr2vr_w: |
4204 | case Intrinsic::loongarch_lsx_vreplgr2vr_d: |
4205 | case Intrinsic::loongarch_lasx_xvreplgr2vr_b: |
4206 | case Intrinsic::loongarch_lasx_xvreplgr2vr_h: |
4207 | case Intrinsic::loongarch_lasx_xvreplgr2vr_w: |
4208 | case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { |
4209 | EVT ResTy = N->getValueType(ResNo: 0); |
4210 | SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(Num: 1)); |
4211 | return DAG.getBuildVector(VT: ResTy, DL, Ops); |
4212 | } |
4213 | case Intrinsic::loongarch_lsx_vreplve_b: |
4214 | case Intrinsic::loongarch_lsx_vreplve_h: |
4215 | case Intrinsic::loongarch_lsx_vreplve_w: |
4216 | case Intrinsic::loongarch_lsx_vreplve_d: |
4217 | case Intrinsic::loongarch_lasx_xvreplve_b: |
4218 | case Intrinsic::loongarch_lasx_xvreplve_h: |
4219 | case Intrinsic::loongarch_lasx_xvreplve_w: |
4220 | case Intrinsic::loongarch_lasx_xvreplve_d: |
4221 | return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0), |
4222 | N1: N->getOperand(Num: 1), |
4223 | N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(), |
4224 | Operand: N->getOperand(Num: 2))); |
4225 | } |
4226 | return SDValue(); |
4227 | } |
4228 | |
4229 | SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, |
4230 | DAGCombinerInfo &DCI) const { |
4231 | SelectionDAG &DAG = DCI.DAG; |
4232 | switch (N->getOpcode()) { |
4233 | default: |
4234 | break; |
4235 | case ISD::AND: |
4236 | return performANDCombine(N, DAG, DCI, Subtarget); |
4237 | case ISD::OR: |
4238 | return performORCombine(N, DAG, DCI, Subtarget); |
4239 | case ISD::SETCC: |
4240 | return performSETCCCombine(N, DAG, DCI, Subtarget); |
4241 | case ISD::SRL: |
4242 | return performSRLCombine(N, DAG, DCI, Subtarget); |
4243 | case LoongArchISD::BITREV_W: |
4244 | return performBITREV_WCombine(N, DAG, DCI, Subtarget); |
4245 | case ISD::INTRINSIC_WO_CHAIN: |
4246 | return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); |
4247 | } |
4248 | return SDValue(); |
4249 | } |
4250 | |
4251 | static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, |
4252 | MachineBasicBlock *MBB) { |
4253 | if (!ZeroDivCheck) |
4254 | return MBB; |
4255 | |
4256 | // Build instructions: |
4257 | // MBB: |
4258 | // div(or mod) $dst, $dividend, $divisor |
4259 | // bnez $divisor, SinkMBB |
4260 | // BreakMBB: |
4261 | // break 7 // BRK_DIVZERO |
4262 | // SinkMBB: |
4263 | // fallthrough |
4264 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
4265 | MachineFunction::iterator It = ++MBB->getIterator(); |
4266 | MachineFunction *MF = MBB->getParent(); |
4267 | auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
4268 | auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
4269 | MF->insert(MBBI: It, MBB: BreakMBB); |
4270 | MF->insert(MBBI: It, MBB: SinkMBB); |
4271 | |
4272 | // Transfer the remainder of MBB and its successor edges to SinkMBB. |
4273 | SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end()); |
4274 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
4275 | |
4276 | const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); |
4277 | DebugLoc DL = MI.getDebugLoc(); |
4278 | MachineOperand &Divisor = MI.getOperand(i: 2); |
4279 | Register DivisorReg = Divisor.getReg(); |
4280 | |
4281 | // MBB: |
4282 | BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNEZ)) |
4283 | .addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill())) |
4284 | .addMBB(MBB: SinkMBB); |
4285 | MBB->addSuccessor(Succ: BreakMBB); |
4286 | MBB->addSuccessor(Succ: SinkMBB); |
4287 | |
4288 | // BreakMBB: |
4289 | // See linux header file arch/loongarch/include/uapi/asm/break.h for the |
4290 | // definition of BRK_DIVZERO. |
4291 | BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/); |
4292 | BreakMBB->addSuccessor(Succ: SinkMBB); |
4293 | |
4294 | // Clear Divisor's kill flag. |
4295 | Divisor.setIsKill(false); |
4296 | |
4297 | return SinkMBB; |
4298 | } |
4299 | |
4300 | static MachineBasicBlock * |
4301 | emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, |
4302 | const LoongArchSubtarget &Subtarget) { |
4303 | unsigned CondOpc; |
4304 | switch (MI.getOpcode()) { |
4305 | default: |
4306 | llvm_unreachable("Unexpected opcode" ); |
4307 | case LoongArch::PseudoVBZ: |
4308 | CondOpc = LoongArch::VSETEQZ_V; |
4309 | break; |
4310 | case LoongArch::PseudoVBZ_B: |
4311 | CondOpc = LoongArch::VSETANYEQZ_B; |
4312 | break; |
4313 | case LoongArch::PseudoVBZ_H: |
4314 | CondOpc = LoongArch::VSETANYEQZ_H; |
4315 | break; |
4316 | case LoongArch::PseudoVBZ_W: |
4317 | CondOpc = LoongArch::VSETANYEQZ_W; |
4318 | break; |
4319 | case LoongArch::PseudoVBZ_D: |
4320 | CondOpc = LoongArch::VSETANYEQZ_D; |
4321 | break; |
4322 | case LoongArch::PseudoVBNZ: |
4323 | CondOpc = LoongArch::VSETNEZ_V; |
4324 | break; |
4325 | case LoongArch::PseudoVBNZ_B: |
4326 | CondOpc = LoongArch::VSETALLNEZ_B; |
4327 | break; |
4328 | case LoongArch::PseudoVBNZ_H: |
4329 | CondOpc = LoongArch::VSETALLNEZ_H; |
4330 | break; |
4331 | case LoongArch::PseudoVBNZ_W: |
4332 | CondOpc = LoongArch::VSETALLNEZ_W; |
4333 | break; |
4334 | case LoongArch::PseudoVBNZ_D: |
4335 | CondOpc = LoongArch::VSETALLNEZ_D; |
4336 | break; |
4337 | case LoongArch::PseudoXVBZ: |
4338 | CondOpc = LoongArch::XVSETEQZ_V; |
4339 | break; |
4340 | case LoongArch::PseudoXVBZ_B: |
4341 | CondOpc = LoongArch::XVSETANYEQZ_B; |
4342 | break; |
4343 | case LoongArch::PseudoXVBZ_H: |
4344 | CondOpc = LoongArch::XVSETANYEQZ_H; |
4345 | break; |
4346 | case LoongArch::PseudoXVBZ_W: |
4347 | CondOpc = LoongArch::XVSETANYEQZ_W; |
4348 | break; |
4349 | case LoongArch::PseudoXVBZ_D: |
4350 | CondOpc = LoongArch::XVSETANYEQZ_D; |
4351 | break; |
4352 | case LoongArch::PseudoXVBNZ: |
4353 | CondOpc = LoongArch::XVSETNEZ_V; |
4354 | break; |
4355 | case LoongArch::PseudoXVBNZ_B: |
4356 | CondOpc = LoongArch::XVSETALLNEZ_B; |
4357 | break; |
4358 | case LoongArch::PseudoXVBNZ_H: |
4359 | CondOpc = LoongArch::XVSETALLNEZ_H; |
4360 | break; |
4361 | case LoongArch::PseudoXVBNZ_W: |
4362 | CondOpc = LoongArch::XVSETALLNEZ_W; |
4363 | break; |
4364 | case LoongArch::PseudoXVBNZ_D: |
4365 | CondOpc = LoongArch::XVSETALLNEZ_D; |
4366 | break; |
4367 | } |
4368 | |
4369 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
4370 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
4371 | DebugLoc DL = MI.getDebugLoc(); |
4372 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
4373 | MachineFunction::iterator It = ++BB->getIterator(); |
4374 | |
4375 | MachineFunction *F = BB->getParent(); |
4376 | MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
4377 | MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
4378 | MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
4379 | |
4380 | F->insert(MBBI: It, MBB: FalseBB); |
4381 | F->insert(MBBI: It, MBB: TrueBB); |
4382 | F->insert(MBBI: It, MBB: SinkBB); |
4383 | |
4384 | // Transfer the remainder of MBB and its successor edges to Sink. |
4385 | SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end()); |
4386 | SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
4387 | |
4388 | // Insert the real instruction to BB. |
4389 | Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass); |
4390 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg()); |
4391 | |
4392 | // Insert branch. |
4393 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB); |
4394 | BB->addSuccessor(Succ: FalseBB); |
4395 | BB->addSuccessor(Succ: TrueBB); |
4396 | |
4397 | // FalseBB. |
4398 | Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
4399 | BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1) |
4400 | .addReg(RegNo: LoongArch::R0) |
4401 | .addImm(Val: 0); |
4402 | BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB); |
4403 | FalseBB->addSuccessor(Succ: SinkBB); |
4404 | |
4405 | // TrueBB. |
4406 | Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
4407 | BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2) |
4408 | .addReg(RegNo: LoongArch::R0) |
4409 | .addImm(Val: 1); |
4410 | TrueBB->addSuccessor(Succ: SinkBB); |
4411 | |
4412 | // SinkBB: merge the results. |
4413 | BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI), |
4414 | DestReg: MI.getOperand(i: 0).getReg()) |
4415 | .addReg(RegNo: RD1) |
4416 | .addMBB(MBB: FalseBB) |
4417 | .addReg(RegNo: RD2) |
4418 | .addMBB(MBB: TrueBB); |
4419 | |
4420 | // The pseudo instruction is gone now. |
4421 | MI.eraseFromParent(); |
4422 | return SinkBB; |
4423 | } |
4424 | |
4425 | static MachineBasicBlock * |
4426 | emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, |
4427 | const LoongArchSubtarget &Subtarget) { |
4428 | unsigned InsOp; |
4429 | unsigned HalfSize; |
4430 | switch (MI.getOpcode()) { |
4431 | default: |
4432 | llvm_unreachable("Unexpected opcode" ); |
4433 | case LoongArch::PseudoXVINSGR2VR_B: |
4434 | HalfSize = 16; |
4435 | InsOp = LoongArch::VINSGR2VR_B; |
4436 | break; |
4437 | case LoongArch::PseudoXVINSGR2VR_H: |
4438 | HalfSize = 8; |
4439 | InsOp = LoongArch::VINSGR2VR_H; |
4440 | break; |
4441 | } |
4442 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
4443 | const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; |
4444 | const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; |
4445 | DebugLoc DL = MI.getDebugLoc(); |
4446 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
4447 | // XDst = vector_insert XSrc, Elt, Idx |
4448 | Register XDst = MI.getOperand(i: 0).getReg(); |
4449 | Register XSrc = MI.getOperand(i: 1).getReg(); |
4450 | Register Elt = MI.getOperand(i: 2).getReg(); |
4451 | unsigned Idx = MI.getOperand(i: 3).getImm(); |
4452 | |
4453 | Register ScratchReg1 = XSrc; |
4454 | if (Idx >= HalfSize) { |
4455 | ScratchReg1 = MRI.createVirtualRegister(RegClass: RC); |
4456 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1) |
4457 | .addReg(RegNo: XSrc) |
4458 | .addReg(RegNo: XSrc) |
4459 | .addImm(Val: 1); |
4460 | } |
4461 | |
4462 | Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC); |
4463 | Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC); |
4464 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1) |
4465 | .addReg(RegNo: ScratchReg1, flags: 0, SubReg: LoongArch::sub_128); |
4466 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2) |
4467 | .addReg(RegNo: ScratchSubReg1) |
4468 | .addReg(RegNo: Elt) |
4469 | .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx); |
4470 | |
4471 | Register ScratchReg2 = XDst; |
4472 | if (Idx >= HalfSize) |
4473 | ScratchReg2 = MRI.createVirtualRegister(RegClass: RC); |
4474 | |
4475 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2) |
4476 | .addImm(Val: 0) |
4477 | .addReg(RegNo: ScratchSubReg2) |
4478 | .addImm(Val: LoongArch::sub_128); |
4479 | |
4480 | if (Idx >= HalfSize) |
4481 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst) |
4482 | .addReg(RegNo: XSrc) |
4483 | .addReg(RegNo: ScratchReg2) |
4484 | .addImm(Val: 2); |
4485 | |
4486 | MI.eraseFromParent(); |
4487 | return BB; |
4488 | } |
4489 | |
4490 | MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( |
4491 | MachineInstr &MI, MachineBasicBlock *BB) const { |
4492 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
4493 | DebugLoc DL = MI.getDebugLoc(); |
4494 | |
4495 | switch (MI.getOpcode()) { |
4496 | default: |
4497 | llvm_unreachable("Unexpected instr type to insert" ); |
4498 | case LoongArch::DIV_W: |
4499 | case LoongArch::DIV_WU: |
4500 | case LoongArch::MOD_W: |
4501 | case LoongArch::MOD_WU: |
4502 | case LoongArch::DIV_D: |
4503 | case LoongArch::DIV_DU: |
4504 | case LoongArch::MOD_D: |
4505 | case LoongArch::MOD_DU: |
4506 | return insertDivByZeroTrap(MI, MBB: BB); |
4507 | break; |
4508 | case LoongArch::WRFCSR: { |
4509 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR), |
4510 | DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm()) |
4511 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
4512 | MI.eraseFromParent(); |
4513 | return BB; |
4514 | } |
4515 | case LoongArch::RDFCSR: { |
4516 | MachineInstr *ReadFCSR = |
4517 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR), |
4518 | DestReg: MI.getOperand(i: 0).getReg()) |
4519 | .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm()); |
4520 | ReadFCSR->getOperand(i: 1).setIsUndef(); |
4521 | MI.eraseFromParent(); |
4522 | return BB; |
4523 | } |
4524 | case LoongArch::PseudoVBZ: |
4525 | case LoongArch::PseudoVBZ_B: |
4526 | case LoongArch::PseudoVBZ_H: |
4527 | case LoongArch::PseudoVBZ_W: |
4528 | case LoongArch::PseudoVBZ_D: |
4529 | case LoongArch::PseudoVBNZ: |
4530 | case LoongArch::PseudoVBNZ_B: |
4531 | case LoongArch::PseudoVBNZ_H: |
4532 | case LoongArch::PseudoVBNZ_W: |
4533 | case LoongArch::PseudoVBNZ_D: |
4534 | case LoongArch::PseudoXVBZ: |
4535 | case LoongArch::PseudoXVBZ_B: |
4536 | case LoongArch::PseudoXVBZ_H: |
4537 | case LoongArch::PseudoXVBZ_W: |
4538 | case LoongArch::PseudoXVBZ_D: |
4539 | case LoongArch::PseudoXVBNZ: |
4540 | case LoongArch::PseudoXVBNZ_B: |
4541 | case LoongArch::PseudoXVBNZ_H: |
4542 | case LoongArch::PseudoXVBNZ_W: |
4543 | case LoongArch::PseudoXVBNZ_D: |
4544 | return emitVecCondBranchPseudo(MI, BB, Subtarget); |
4545 | case LoongArch::PseudoXVINSGR2VR_B: |
4546 | case LoongArch::PseudoXVINSGR2VR_H: |
4547 | return emitPseudoXVINSGR2VR(MI, BB, Subtarget); |
4548 | } |
4549 | } |
4550 | |
4551 | bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( |
4552 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
4553 | unsigned *Fast) const { |
4554 | if (!Subtarget.hasUAL()) |
4555 | return false; |
4556 | |
4557 | // TODO: set reasonable speed number. |
4558 | if (Fast) |
4559 | *Fast = 1; |
4560 | return true; |
4561 | } |
4562 | |
4563 | const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { |
4564 | switch ((LoongArchISD::NodeType)Opcode) { |
4565 | case LoongArchISD::FIRST_NUMBER: |
4566 | break; |
4567 | |
4568 | #define NODE_NAME_CASE(node) \ |
4569 | case LoongArchISD::node: \ |
4570 | return "LoongArchISD::" #node; |
4571 | |
4572 | // TODO: Add more target-dependent nodes later. |
4573 | NODE_NAME_CASE(CALL) |
4574 | NODE_NAME_CASE(CALL_MEDIUM) |
4575 | NODE_NAME_CASE(CALL_LARGE) |
4576 | NODE_NAME_CASE(RET) |
4577 | NODE_NAME_CASE(TAIL) |
4578 | NODE_NAME_CASE(TAIL_MEDIUM) |
4579 | NODE_NAME_CASE(TAIL_LARGE) |
4580 | NODE_NAME_CASE(SLL_W) |
4581 | NODE_NAME_CASE(SRA_W) |
4582 | NODE_NAME_CASE(SRL_W) |
4583 | NODE_NAME_CASE(BSTRINS) |
4584 | NODE_NAME_CASE(BSTRPICK) |
4585 | NODE_NAME_CASE(MOVGR2FR_W_LA64) |
4586 | NODE_NAME_CASE(MOVFR2GR_S_LA64) |
4587 | NODE_NAME_CASE(FTINT) |
4588 | NODE_NAME_CASE(REVB_2H) |
4589 | NODE_NAME_CASE(REVB_2W) |
4590 | NODE_NAME_CASE(BITREV_4B) |
4591 | NODE_NAME_CASE(BITREV_W) |
4592 | NODE_NAME_CASE(ROTR_W) |
4593 | NODE_NAME_CASE(ROTL_W) |
4594 | NODE_NAME_CASE(DIV_WU) |
4595 | NODE_NAME_CASE(MOD_WU) |
4596 | NODE_NAME_CASE(CLZ_W) |
4597 | NODE_NAME_CASE(CTZ_W) |
4598 | NODE_NAME_CASE(DBAR) |
4599 | NODE_NAME_CASE(IBAR) |
4600 | NODE_NAME_CASE(BREAK) |
4601 | NODE_NAME_CASE(SYSCALL) |
4602 | NODE_NAME_CASE(CRC_W_B_W) |
4603 | NODE_NAME_CASE(CRC_W_H_W) |
4604 | NODE_NAME_CASE(CRC_W_W_W) |
4605 | NODE_NAME_CASE(CRC_W_D_W) |
4606 | NODE_NAME_CASE(CRCC_W_B_W) |
4607 | NODE_NAME_CASE(CRCC_W_H_W) |
4608 | NODE_NAME_CASE(CRCC_W_W_W) |
4609 | NODE_NAME_CASE(CRCC_W_D_W) |
4610 | NODE_NAME_CASE(CSRRD) |
4611 | NODE_NAME_CASE(CSRWR) |
4612 | NODE_NAME_CASE(CSRXCHG) |
4613 | NODE_NAME_CASE(IOCSRRD_B) |
4614 | NODE_NAME_CASE(IOCSRRD_H) |
4615 | NODE_NAME_CASE(IOCSRRD_W) |
4616 | NODE_NAME_CASE(IOCSRRD_D) |
4617 | NODE_NAME_CASE(IOCSRWR_B) |
4618 | NODE_NAME_CASE(IOCSRWR_H) |
4619 | NODE_NAME_CASE(IOCSRWR_W) |
4620 | NODE_NAME_CASE(IOCSRWR_D) |
4621 | NODE_NAME_CASE(CPUCFG) |
4622 | NODE_NAME_CASE(MOVGR2FCSR) |
4623 | NODE_NAME_CASE(MOVFCSR2GR) |
4624 | NODE_NAME_CASE(CACOP_D) |
4625 | NODE_NAME_CASE(CACOP_W) |
4626 | NODE_NAME_CASE(VSHUF) |
4627 | NODE_NAME_CASE(VPICKEV) |
4628 | NODE_NAME_CASE(VPICKOD) |
4629 | NODE_NAME_CASE(VPACKEV) |
4630 | NODE_NAME_CASE(VPACKOD) |
4631 | NODE_NAME_CASE(VILVL) |
4632 | NODE_NAME_CASE(VILVH) |
4633 | NODE_NAME_CASE(VSHUF4I) |
4634 | NODE_NAME_CASE(VREPLVEI) |
4635 | NODE_NAME_CASE(XVPERMI) |
4636 | NODE_NAME_CASE(VPICK_SEXT_ELT) |
4637 | NODE_NAME_CASE(VPICK_ZEXT_ELT) |
4638 | NODE_NAME_CASE(VREPLVE) |
4639 | NODE_NAME_CASE(VALL_ZERO) |
4640 | NODE_NAME_CASE(VANY_ZERO) |
4641 | NODE_NAME_CASE(VALL_NONZERO) |
4642 | NODE_NAME_CASE(VANY_NONZERO) |
4643 | } |
4644 | #undef NODE_NAME_CASE |
4645 | return nullptr; |
4646 | } |
4647 | |
4648 | //===----------------------------------------------------------------------===// |
4649 | // Calling Convention Implementation |
4650 | //===----------------------------------------------------------------------===// |
4651 | |
4652 | // Eight general-purpose registers a0-a7 used for passing integer arguments, |
4653 | // with a0-a1 reused to return values. Generally, the GPRs are used to pass |
4654 | // fixed-point arguments, and floating-point arguments when no FPR is available |
4655 | // or with soft float ABI. |
4656 | const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, |
4657 | LoongArch::R7, LoongArch::R8, LoongArch::R9, |
4658 | LoongArch::R10, LoongArch::R11}; |
4659 | // Eight floating-point registers fa0-fa7 used for passing floating-point |
4660 | // arguments, and fa0-fa1 are also used to return values. |
4661 | const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, |
4662 | LoongArch::F3, LoongArch::F4, LoongArch::F5, |
4663 | LoongArch::F6, LoongArch::F7}; |
4664 | // FPR32 and FPR64 alias each other. |
4665 | const MCPhysReg ArgFPR64s[] = { |
4666 | LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, |
4667 | LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; |
4668 | |
4669 | const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, |
4670 | LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, |
4671 | LoongArch::VR6, LoongArch::VR7}; |
4672 | |
4673 | const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, |
4674 | LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, |
4675 | LoongArch::XR6, LoongArch::XR7}; |
4676 | |
4677 | // Pass a 2*GRLen argument that has been split into two GRLen values through |
4678 | // registers or the stack as necessary. |
4679 | static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, |
4680 | CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, |
4681 | unsigned ValNo2, MVT ValVT2, MVT LocVT2, |
4682 | ISD::ArgFlagsTy ArgFlags2) { |
4683 | unsigned GRLenInBytes = GRLen / 8; |
4684 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
4685 | // At least one half can be passed via register. |
4686 | State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), RegNo: Reg, |
4687 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
4688 | } else { |
4689 | // Both halves must be passed on the stack, with proper alignment. |
4690 | Align StackAlign = |
4691 | std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign()); |
4692 | State.addLoc( |
4693 | V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), |
4694 | Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign), |
4695 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
4696 | State.addLoc(V: CCValAssign::getMem( |
4697 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
4698 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
4699 | return false; |
4700 | } |
4701 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
4702 | // The second half can also be passed via register. |
4703 | State.addLoc( |
4704 | V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, RegNo: Reg, LocVT: LocVT2, HTP: CCValAssign::Full)); |
4705 | } else { |
4706 | // The second half is passed via the stack, without additional alignment. |
4707 | State.addLoc(V: CCValAssign::getMem( |
4708 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
4709 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
4710 | } |
4711 | return false; |
4712 | } |
4713 | |
4714 | // Implements the LoongArch calling convention. Returns true upon failure. |
4715 | static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, |
4716 | unsigned ValNo, MVT ValVT, |
4717 | CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, |
4718 | CCState &State, bool IsFixed, bool IsRet, |
4719 | Type *OrigTy) { |
4720 | unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); |
4721 | assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen" ); |
4722 | MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; |
4723 | MVT LocVT = ValVT; |
4724 | |
4725 | // Any return value split into more than two values can't be returned |
4726 | // directly. |
4727 | if (IsRet && ValNo > 1) |
4728 | return true; |
4729 | |
4730 | // If passing a variadic argument, or if no FPR is available. |
4731 | bool UseGPRForFloat = true; |
4732 | |
4733 | switch (ABI) { |
4734 | default: |
4735 | llvm_unreachable("Unexpected ABI" ); |
4736 | break; |
4737 | case LoongArchABI::ABI_ILP32F: |
4738 | case LoongArchABI::ABI_LP64F: |
4739 | case LoongArchABI::ABI_ILP32D: |
4740 | case LoongArchABI::ABI_LP64D: |
4741 | UseGPRForFloat = !IsFixed; |
4742 | break; |
4743 | case LoongArchABI::ABI_ILP32S: |
4744 | case LoongArchABI::ABI_LP64S: |
4745 | break; |
4746 | } |
4747 | |
4748 | // FPR32 and FPR64 alias each other. |
4749 | if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s)) |
4750 | UseGPRForFloat = true; |
4751 | |
4752 | if (UseGPRForFloat && ValVT == MVT::f32) { |
4753 | LocVT = GRLenVT; |
4754 | LocInfo = CCValAssign::BCvt; |
4755 | } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { |
4756 | LocVT = MVT::i64; |
4757 | LocInfo = CCValAssign::BCvt; |
4758 | } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { |
4759 | // TODO: Handle passing f64 on LA32 with D feature. |
4760 | report_fatal_error(reason: "Passing f64 with GPR on LA32 is undefined" ); |
4761 | } |
4762 | |
4763 | // If this is a variadic argument, the LoongArch calling convention requires |
4764 | // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 |
4765 | // byte alignment. An aligned register should be used regardless of whether |
4766 | // the original argument was split during legalisation or not. The argument |
4767 | // will not be passed by registers if the original type is larger than |
4768 | // 2*GRLen, so the register alignment rule does not apply. |
4769 | unsigned TwoGRLenInBytes = (2 * GRLen) / 8; |
4770 | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && |
4771 | DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) { |
4772 | unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs); |
4773 | // Skip 'odd' register if necessary. |
4774 | if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) |
4775 | State.AllocateReg(Regs: ArgGPRs); |
4776 | } |
4777 | |
4778 | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
4779 | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
4780 | State.getPendingArgFlags(); |
4781 | |
4782 | assert(PendingLocs.size() == PendingArgFlags.size() && |
4783 | "PendingLocs and PendingArgFlags out of sync" ); |
4784 | |
4785 | // Split arguments might be passed indirectly, so keep track of the pending |
4786 | // values. |
4787 | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
4788 | LocVT = GRLenVT; |
4789 | LocInfo = CCValAssign::Indirect; |
4790 | PendingLocs.push_back( |
4791 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
4792 | PendingArgFlags.push_back(Elt: ArgFlags); |
4793 | if (!ArgFlags.isSplitEnd()) { |
4794 | return false; |
4795 | } |
4796 | } |
4797 | |
4798 | // If the split argument only had two elements, it should be passed directly |
4799 | // in registers or on the stack. |
4800 | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
4801 | PendingLocs.size() <= 2) { |
4802 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()" ); |
4803 | // Apply the normal calling convention rules to the first half of the |
4804 | // split argument. |
4805 | CCValAssign VA = PendingLocs[0]; |
4806 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
4807 | PendingLocs.clear(); |
4808 | PendingArgFlags.clear(); |
4809 | return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, |
4810 | ArgFlags2: ArgFlags); |
4811 | } |
4812 | |
4813 | // Allocate to a register if possible, or else a stack slot. |
4814 | Register Reg; |
4815 | unsigned StoreSizeBytes = GRLen / 8; |
4816 | Align StackAlign = Align(GRLen / 8); |
4817 | |
4818 | if (ValVT == MVT::f32 && !UseGPRForFloat) |
4819 | Reg = State.AllocateReg(Regs: ArgFPR32s); |
4820 | else if (ValVT == MVT::f64 && !UseGPRForFloat) |
4821 | Reg = State.AllocateReg(Regs: ArgFPR64s); |
4822 | else if (ValVT.is128BitVector()) |
4823 | Reg = State.AllocateReg(Regs: ArgVRs); |
4824 | else if (ValVT.is256BitVector()) |
4825 | Reg = State.AllocateReg(Regs: ArgXRs); |
4826 | else |
4827 | Reg = State.AllocateReg(Regs: ArgGPRs); |
4828 | |
4829 | unsigned StackOffset = |
4830 | Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign); |
4831 | |
4832 | // If we reach this point and PendingLocs is non-empty, we must be at the |
4833 | // end of a split argument that must be passed indirectly. |
4834 | if (!PendingLocs.empty()) { |
4835 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()" ); |
4836 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()" ); |
4837 | for (auto &It : PendingLocs) { |
4838 | if (Reg) |
4839 | It.convertToReg(RegNo: Reg); |
4840 | else |
4841 | It.convertToMem(Offset: StackOffset); |
4842 | State.addLoc(V: It); |
4843 | } |
4844 | PendingLocs.clear(); |
4845 | PendingArgFlags.clear(); |
4846 | return false; |
4847 | } |
4848 | assert((!UseGPRForFloat || LocVT == GRLenVT) && |
4849 | "Expected an GRLenVT at this stage" ); |
4850 | |
4851 | if (Reg) { |
4852 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
4853 | return false; |
4854 | } |
4855 | |
4856 | // When a floating-point value is passed on the stack, no bit-cast is needed. |
4857 | if (ValVT.isFloatingPoint()) { |
4858 | LocVT = ValVT; |
4859 | LocInfo = CCValAssign::Full; |
4860 | } |
4861 | |
4862 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
4863 | return false; |
4864 | } |
4865 | |
4866 | void LoongArchTargetLowering::analyzeInputArgs( |
4867 | MachineFunction &MF, CCState &CCInfo, |
4868 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
4869 | LoongArchCCAssignFn Fn) const { |
4870 | FunctionType *FType = MF.getFunction().getFunctionType(); |
4871 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
4872 | MVT ArgVT = Ins[i].VT; |
4873 | Type *ArgTy = nullptr; |
4874 | if (IsRet) |
4875 | ArgTy = FType->getReturnType(); |
4876 | else if (Ins[i].isOrigArg()) |
4877 | ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex()); |
4878 | LoongArchABI::ABI ABI = |
4879 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
4880 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, |
4881 | CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { |
4882 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT |
4883 | << '\n'); |
4884 | llvm_unreachable("" ); |
4885 | } |
4886 | } |
4887 | } |
4888 | |
4889 | void LoongArchTargetLowering::analyzeOutputArgs( |
4890 | MachineFunction &MF, CCState &CCInfo, |
4891 | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
4892 | CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { |
4893 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
4894 | MVT ArgVT = Outs[i].VT; |
4895 | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
4896 | LoongArchABI::ABI ABI = |
4897 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
4898 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, |
4899 | CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { |
4900 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT |
4901 | << "\n" ); |
4902 | llvm_unreachable("" ); |
4903 | } |
4904 | } |
4905 | } |
4906 | |
4907 | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
4908 | // values. |
4909 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
4910 | const CCValAssign &VA, const SDLoc &DL) { |
4911 | switch (VA.getLocInfo()) { |
4912 | default: |
4913 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
4914 | case CCValAssign::Full: |
4915 | case CCValAssign::Indirect: |
4916 | break; |
4917 | case CCValAssign::BCvt: |
4918 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
4919 | Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val); |
4920 | else |
4921 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val); |
4922 | break; |
4923 | } |
4924 | return Val; |
4925 | } |
4926 | |
4927 | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
4928 | const CCValAssign &VA, const SDLoc &DL, |
4929 | const ISD::InputArg &In, |
4930 | const LoongArchTargetLowering &TLI) { |
4931 | MachineFunction &MF = DAG.getMachineFunction(); |
4932 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
4933 | EVT LocVT = VA.getLocVT(); |
4934 | SDValue Val; |
4935 | const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT()); |
4936 | Register VReg = RegInfo.createVirtualRegister(RegClass: RC); |
4937 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
4938 | Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
4939 | |
4940 | // If input is sign extended from 32 bits, note it for the OptW pass. |
4941 | if (In.isOrigArg()) { |
4942 | Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex()); |
4943 | if (OrigArg->getType()->isIntegerTy()) { |
4944 | unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); |
4945 | // An input zero extended from i31 can also be considered sign extended. |
4946 | if ((BitWidth <= 32 && In.Flags.isSExt()) || |
4947 | (BitWidth < 32 && In.Flags.isZExt())) { |
4948 | LoongArchMachineFunctionInfo *LAFI = |
4949 | MF.getInfo<LoongArchMachineFunctionInfo>(); |
4950 | LAFI->addSExt32Register(Reg: VReg); |
4951 | } |
4952 | } |
4953 | } |
4954 | |
4955 | return convertLocVTToValVT(DAG, Val, VA, DL); |
4956 | } |
4957 | |
4958 | // The caller is responsible for loading the full value if the argument is |
4959 | // passed with CCValAssign::Indirect. |
4960 | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
4961 | const CCValAssign &VA, const SDLoc &DL) { |
4962 | MachineFunction &MF = DAG.getMachineFunction(); |
4963 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4964 | EVT ValVT = VA.getValVT(); |
4965 | int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
4966 | /*IsImmutable=*/true); |
4967 | SDValue FIN = DAG.getFrameIndex( |
4968 | FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0))); |
4969 | |
4970 | ISD::LoadExtType ExtType; |
4971 | switch (VA.getLocInfo()) { |
4972 | default: |
4973 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
4974 | case CCValAssign::Full: |
4975 | case CCValAssign::Indirect: |
4976 | case CCValAssign::BCvt: |
4977 | ExtType = ISD::NON_EXTLOAD; |
4978 | break; |
4979 | } |
4980 | return DAG.getExtLoad( |
4981 | ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN, |
4982 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT); |
4983 | } |
4984 | |
4985 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
4986 | const CCValAssign &VA, const SDLoc &DL) { |
4987 | EVT LocVT = VA.getLocVT(); |
4988 | |
4989 | switch (VA.getLocInfo()) { |
4990 | default: |
4991 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
4992 | case CCValAssign::Full: |
4993 | break; |
4994 | case CCValAssign::BCvt: |
4995 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
4996 | Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val); |
4997 | else |
4998 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val); |
4999 | break; |
5000 | } |
5001 | return Val; |
5002 | } |
5003 | |
5004 | static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
5005 | CCValAssign::LocInfo LocInfo, |
5006 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
5007 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
5008 | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim |
5009 | // s0 s1 s2 s3 s4 s5 s6 s7 s8 |
5010 | static const MCPhysReg GPRList[] = { |
5011 | LoongArch::R23, LoongArch::R24, LoongArch::R25, |
5012 | LoongArch::R26, LoongArch::R27, LoongArch::R28, |
5013 | LoongArch::R29, LoongArch::R30, LoongArch::R31}; |
5014 | if (unsigned Reg = State.AllocateReg(Regs: GPRList)) { |
5015 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
5016 | return false; |
5017 | } |
5018 | } |
5019 | |
5020 | if (LocVT == MVT::f32) { |
5021 | // Pass in STG registers: F1, F2, F3, F4 |
5022 | // fs0,fs1,fs2,fs3 |
5023 | static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, |
5024 | LoongArch::F26, LoongArch::F27}; |
5025 | if (unsigned Reg = State.AllocateReg(Regs: FPR32List)) { |
5026 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
5027 | return false; |
5028 | } |
5029 | } |
5030 | |
5031 | if (LocVT == MVT::f64) { |
5032 | // Pass in STG registers: D1, D2, D3, D4 |
5033 | // fs4,fs5,fs6,fs7 |
5034 | static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, |
5035 | LoongArch::F30_64, LoongArch::F31_64}; |
5036 | if (unsigned Reg = State.AllocateReg(Regs: FPR64List)) { |
5037 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
5038 | return false; |
5039 | } |
5040 | } |
5041 | |
5042 | report_fatal_error(reason: "No registers left in GHC calling convention" ); |
5043 | return true; |
5044 | } |
5045 | |
5046 | // Transform physical registers into virtual registers. |
5047 | SDValue LoongArchTargetLowering::LowerFormalArguments( |
5048 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
5049 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
5050 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
5051 | |
5052 | MachineFunction &MF = DAG.getMachineFunction(); |
5053 | |
5054 | switch (CallConv) { |
5055 | default: |
5056 | llvm_unreachable("Unsupported calling convention" ); |
5057 | case CallingConv::C: |
5058 | case CallingConv::Fast: |
5059 | break; |
5060 | case CallingConv::GHC: |
5061 | if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) || |
5062 | !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD)) |
5063 | report_fatal_error( |
5064 | reason: "GHC calling convention requires the F and D extensions" ); |
5065 | } |
5066 | |
5067 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
5068 | MVT GRLenVT = Subtarget.getGRLenVT(); |
5069 | unsigned GRLenInBytes = Subtarget.getGRLen() / 8; |
5070 | // Used with varargs to acumulate store chains. |
5071 | std::vector<SDValue> OutChains; |
5072 | |
5073 | // Assign locations to all of the incoming arguments. |
5074 | SmallVector<CCValAssign> ArgLocs; |
5075 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
5076 | |
5077 | if (CallConv == CallingConv::GHC) |
5078 | CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC); |
5079 | else |
5080 | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch); |
5081 | |
5082 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
5083 | CCValAssign &VA = ArgLocs[i]; |
5084 | SDValue ArgValue; |
5085 | if (VA.isRegLoc()) |
5086 | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[i], TLI: *this); |
5087 | else |
5088 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
5089 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
5090 | // If the original argument was split and passed by reference, we need to |
5091 | // load all parts of it here (using the same address). |
5092 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
5093 | PtrInfo: MachinePointerInfo())); |
5094 | unsigned ArgIndex = Ins[i].OrigArgIndex; |
5095 | unsigned ArgPartOffset = Ins[i].PartOffset; |
5096 | assert(ArgPartOffset == 0); |
5097 | while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { |
5098 | CCValAssign &PartVA = ArgLocs[i + 1]; |
5099 | unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset; |
5100 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
5101 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset); |
5102 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
5103 | PtrInfo: MachinePointerInfo())); |
5104 | ++i; |
5105 | } |
5106 | continue; |
5107 | } |
5108 | InVals.push_back(Elt: ArgValue); |
5109 | } |
5110 | |
5111 | if (IsVarArg) { |
5112 | ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); |
5113 | unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs); |
5114 | const TargetRegisterClass *RC = &LoongArch::GPRRegClass; |
5115 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
5116 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
5117 | auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); |
5118 | |
5119 | // Offset of the first variable argument from stack pointer, and size of |
5120 | // the vararg save area. For now, the varargs save area is either zero or |
5121 | // large enough to hold a0-a7. |
5122 | int VaArgOffset, VarArgsSaveSize; |
5123 | |
5124 | // If all registers are allocated, then all varargs must be passed on the |
5125 | // stack and we don't need to save any argregs. |
5126 | if (ArgRegs.size() == Idx) { |
5127 | VaArgOffset = CCInfo.getStackSize(); |
5128 | VarArgsSaveSize = 0; |
5129 | } else { |
5130 | VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); |
5131 | VaArgOffset = -VarArgsSaveSize; |
5132 | } |
5133 | |
5134 | // Record the frame index of the first variable argument |
5135 | // which is a value necessary to VASTART. |
5136 | int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
5137 | LoongArchFI->setVarArgsFrameIndex(FI); |
5138 | |
5139 | // If saving an odd number of registers then create an extra stack slot to |
5140 | // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures |
5141 | // offsets to even-numbered registered remain 2*GRLen-aligned. |
5142 | if (Idx % 2) { |
5143 | MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes, |
5144 | IsImmutable: true); |
5145 | VarArgsSaveSize += GRLenInBytes; |
5146 | } |
5147 | |
5148 | // Copy the integer registers that may have been used for passing varargs |
5149 | // to the vararg save area. |
5150 | for (unsigned I = Idx; I < ArgRegs.size(); |
5151 | ++I, VaArgOffset += GRLenInBytes) { |
5152 | const Register Reg = RegInfo.createVirtualRegister(RegClass: RC); |
5153 | RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg); |
5154 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT); |
5155 | FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
5156 | SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
5157 | SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff, |
5158 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
5159 | cast<StoreSDNode>(Val: Store.getNode()) |
5160 | ->getMemOperand() |
5161 | ->setValue((Value *)nullptr); |
5162 | OutChains.push_back(x: Store); |
5163 | } |
5164 | LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); |
5165 | } |
5166 | |
5167 | // All stores are grouped in one node to allow the matching between |
5168 | // the size of Ins and InVals. This only happens for vararg functions. |
5169 | if (!OutChains.empty()) { |
5170 | OutChains.push_back(x: Chain); |
5171 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains); |
5172 | } |
5173 | |
5174 | return Chain; |
5175 | } |
5176 | |
5177 | bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
5178 | return CI->isTailCall(); |
5179 | } |
5180 | |
5181 | // Check if the return value is used as only a return value, as otherwise |
5182 | // we can't perform a tail-call. |
5183 | bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, |
5184 | SDValue &Chain) const { |
5185 | if (N->getNumValues() != 1) |
5186 | return false; |
5187 | if (!N->hasNUsesOfValue(NUses: 1, Value: 0)) |
5188 | return false; |
5189 | |
5190 | SDNode *Copy = *N->use_begin(); |
5191 | if (Copy->getOpcode() != ISD::CopyToReg) |
5192 | return false; |
5193 | |
5194 | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
5195 | // isn't safe to perform a tail call. |
5196 | if (Copy->getGluedNode()) |
5197 | return false; |
5198 | |
5199 | // The copy must be used by a LoongArchISD::RET, and nothing else. |
5200 | bool HasRet = false; |
5201 | for (SDNode *Node : Copy->uses()) { |
5202 | if (Node->getOpcode() != LoongArchISD::RET) |
5203 | return false; |
5204 | HasRet = true; |
5205 | } |
5206 | |
5207 | if (!HasRet) |
5208 | return false; |
5209 | |
5210 | Chain = Copy->getOperand(Num: 0); |
5211 | return true; |
5212 | } |
5213 | |
5214 | // Check whether the call is eligible for tail call optimization. |
5215 | bool LoongArchTargetLowering::isEligibleForTailCallOptimization( |
5216 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
5217 | const SmallVectorImpl<CCValAssign> &ArgLocs) const { |
5218 | |
5219 | auto CalleeCC = CLI.CallConv; |
5220 | auto &Outs = CLI.Outs; |
5221 | auto &Caller = MF.getFunction(); |
5222 | auto CallerCC = Caller.getCallingConv(); |
5223 | |
5224 | // Do not tail call opt if the stack is used to pass parameters. |
5225 | if (CCInfo.getStackSize() != 0) |
5226 | return false; |
5227 | |
5228 | // Do not tail call opt if any parameters need to be passed indirectly. |
5229 | for (auto &VA : ArgLocs) |
5230 | if (VA.getLocInfo() == CCValAssign::Indirect) |
5231 | return false; |
5232 | |
5233 | // Do not tail call opt if either caller or callee uses struct return |
5234 | // semantics. |
5235 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
5236 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
5237 | if (IsCallerStructRet || IsCalleeStructRet) |
5238 | return false; |
5239 | |
5240 | // Do not tail call opt if either the callee or caller has a byval argument. |
5241 | for (auto &Arg : Outs) |
5242 | if (Arg.Flags.isByVal()) |
5243 | return false; |
5244 | |
5245 | // The callee has to preserve all registers the caller needs to preserve. |
5246 | const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
5247 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
5248 | if (CalleeCC != CallerCC) { |
5249 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
5250 | if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved)) |
5251 | return false; |
5252 | } |
5253 | return true; |
5254 | } |
5255 | |
5256 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
5257 | return DAG.getDataLayout().getPrefTypeAlign( |
5258 | Ty: VT.getTypeForEVT(Context&: *DAG.getContext())); |
5259 | } |
5260 | |
5261 | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
5262 | // and output parameter nodes. |
5263 | SDValue |
5264 | LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, |
5265 | SmallVectorImpl<SDValue> &InVals) const { |
5266 | SelectionDAG &DAG = CLI.DAG; |
5267 | SDLoc &DL = CLI.DL; |
5268 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
5269 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
5270 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
5271 | SDValue Chain = CLI.Chain; |
5272 | SDValue Callee = CLI.Callee; |
5273 | CallingConv::ID CallConv = CLI.CallConv; |
5274 | bool IsVarArg = CLI.IsVarArg; |
5275 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
5276 | MVT GRLenVT = Subtarget.getGRLenVT(); |
5277 | bool &IsTailCall = CLI.IsTailCall; |
5278 | |
5279 | MachineFunction &MF = DAG.getMachineFunction(); |
5280 | |
5281 | // Analyze the operands of the call, assigning locations to each operand. |
5282 | SmallVector<CCValAssign> ArgLocs; |
5283 | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
5284 | |
5285 | if (CallConv == CallingConv::GHC) |
5286 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC); |
5287 | else |
5288 | analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch); |
5289 | |
5290 | // Check if it's really possible to do a tail call. |
5291 | if (IsTailCall) |
5292 | IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs); |
5293 | |
5294 | if (IsTailCall) |
5295 | ++NumTailCalls; |
5296 | else if (CLI.CB && CLI.CB->isMustTailCall()) |
5297 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
5298 | "site marked musttail" ); |
5299 | |
5300 | // Get a count of how many bytes are to be pushed on the stack. |
5301 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
5302 | |
5303 | // Create local copies for byval args. |
5304 | SmallVector<SDValue> ByValArgs; |
5305 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
5306 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5307 | if (!Flags.isByVal()) |
5308 | continue; |
5309 | |
5310 | SDValue Arg = OutVals[i]; |
5311 | unsigned Size = Flags.getByValSize(); |
5312 | Align Alignment = Flags.getNonZeroByValAlign(); |
5313 | |
5314 | int FI = |
5315 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false); |
5316 | SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
5317 | SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT); |
5318 | |
5319 | Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment, |
5320 | /*IsVolatile=*/isVol: false, |
5321 | /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt, |
5322 | DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
5323 | ByValArgs.push_back(Elt: FIPtr); |
5324 | } |
5325 | |
5326 | if (!IsTailCall) |
5327 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL); |
5328 | |
5329 | // Copy argument values to their designated locations. |
5330 | SmallVector<std::pair<Register, SDValue>> RegsToPass; |
5331 | SmallVector<SDValue> MemOpChains; |
5332 | SDValue StackPtr; |
5333 | for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { |
5334 | CCValAssign &VA = ArgLocs[i]; |
5335 | SDValue ArgValue = OutVals[i]; |
5336 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5337 | |
5338 | // Promote the value if needed. |
5339 | // For now, only handle fully promoted and indirect arguments. |
5340 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
5341 | // Store the argument in a stack slot and pass its address. |
5342 | Align StackAlign = |
5343 | std::max(a: getPrefTypeAlign(VT: Outs[i].ArgVT, DAG), |
5344 | b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG)); |
5345 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
5346 | // If the original argument was split and passed by reference, we need to |
5347 | // store the required parts of it here (and pass just one address). |
5348 | unsigned ArgIndex = Outs[i].OrigArgIndex; |
5349 | unsigned ArgPartOffset = Outs[i].PartOffset; |
5350 | assert(ArgPartOffset == 0); |
5351 | // Calculate the total size to store. We don't have access to what we're |
5352 | // actually storing other than performing the loop and collecting the |
5353 | // info. |
5354 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
5355 | while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { |
5356 | SDValue PartValue = OutVals[i + 1]; |
5357 | unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; |
5358 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
5359 | EVT PartVT = PartValue.getValueType(); |
5360 | |
5361 | StoredSize += PartVT.getStoreSize(); |
5362 | StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG)); |
5363 | Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset)); |
5364 | ++i; |
5365 | } |
5366 | SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign); |
5367 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
5368 | MemOpChains.push_back( |
5369 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
5370 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
5371 | for (const auto &Part : Parts) { |
5372 | SDValue PartValue = Part.first; |
5373 | SDValue PartOffset = Part.second; |
5374 | SDValue Address = |
5375 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset); |
5376 | MemOpChains.push_back( |
5377 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
5378 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
5379 | } |
5380 | ArgValue = SpillSlot; |
5381 | } else { |
5382 | ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL); |
5383 | } |
5384 | |
5385 | // Use local copy if it is a byval arg. |
5386 | if (Flags.isByVal()) |
5387 | ArgValue = ByValArgs[j++]; |
5388 | |
5389 | if (VA.isRegLoc()) { |
5390 | // Queue up the argument copies and emit them at the end. |
5391 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
5392 | } else { |
5393 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
5394 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
5395 | "for passing parameters" ); |
5396 | |
5397 | // Work out the address of the stack slot. |
5398 | if (!StackPtr.getNode()) |
5399 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT); |
5400 | SDValue Address = |
5401 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
5402 | N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL)); |
5403 | |
5404 | // Emit the store. |
5405 | MemOpChains.push_back( |
5406 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
5407 | } |
5408 | } |
5409 | |
5410 | // Join the stores, which are independent of one another. |
5411 | if (!MemOpChains.empty()) |
5412 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains); |
5413 | |
5414 | SDValue Glue; |
5415 | |
5416 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
5417 | for (auto &Reg : RegsToPass) { |
5418 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue); |
5419 | Glue = Chain.getValue(R: 1); |
5420 | } |
5421 | |
5422 | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
5423 | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
5424 | // split it and then direct call can be matched by PseudoCALL. |
5425 | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
5426 | const GlobalValue *GV = S->getGlobal(); |
5427 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV) |
5428 | ? LoongArchII::MO_CALL |
5429 | : LoongArchII::MO_CALL_PLT; |
5430 | Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags); |
5431 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
5432 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr) |
5433 | ? LoongArchII::MO_CALL |
5434 | : LoongArchII::MO_CALL_PLT; |
5435 | Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags); |
5436 | } |
5437 | |
5438 | // The first call operand is the chain and the second is the target address. |
5439 | SmallVector<SDValue> Ops; |
5440 | Ops.push_back(Elt: Chain); |
5441 | Ops.push_back(Elt: Callee); |
5442 | |
5443 | // Add argument registers to the end of the list so that they are |
5444 | // known live into the call. |
5445 | for (auto &Reg : RegsToPass) |
5446 | Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType())); |
5447 | |
5448 | if (!IsTailCall) { |
5449 | // Add a register mask operand representing the call-preserved registers. |
5450 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
5451 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
5452 | assert(Mask && "Missing call preserved mask for calling convention" ); |
5453 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
5454 | } |
5455 | |
5456 | // Glue the call to the argument copies, if any. |
5457 | if (Glue.getNode()) |
5458 | Ops.push_back(Elt: Glue); |
5459 | |
5460 | // Emit the call. |
5461 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
5462 | unsigned Op; |
5463 | switch (DAG.getTarget().getCodeModel()) { |
5464 | default: |
5465 | report_fatal_error(reason: "Unsupported code model" ); |
5466 | case CodeModel::Small: |
5467 | Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; |
5468 | break; |
5469 | case CodeModel::Medium: |
5470 | assert(Subtarget.is64Bit() && "Medium code model requires LA64" ); |
5471 | Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; |
5472 | break; |
5473 | case CodeModel::Large: |
5474 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
5475 | Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; |
5476 | break; |
5477 | } |
5478 | |
5479 | if (IsTailCall) { |
5480 | MF.getFrameInfo().setHasTailCall(); |
5481 | SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
5482 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
5483 | return Ret; |
5484 | } |
5485 | |
5486 | Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
5487 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
5488 | Glue = Chain.getValue(R: 1); |
5489 | |
5490 | // Mark the end of the call, which is glued to the call itself. |
5491 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
5492 | Glue = Chain.getValue(R: 1); |
5493 | |
5494 | // Assign locations to each value returned by this call. |
5495 | SmallVector<CCValAssign> RVLocs; |
5496 | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
5497 | analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch); |
5498 | |
5499 | // Copy all of the result registers out of their specified physreg. |
5500 | for (auto &VA : RVLocs) { |
5501 | // Copy the value out. |
5502 | SDValue RetValue = |
5503 | DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue); |
5504 | // Glue the RetValue to the end of the call sequence. |
5505 | Chain = RetValue.getValue(R: 1); |
5506 | Glue = RetValue.getValue(R: 2); |
5507 | |
5508 | RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL); |
5509 | |
5510 | InVals.push_back(Elt: RetValue); |
5511 | } |
5512 | |
5513 | return Chain; |
5514 | } |
5515 | |
5516 | bool LoongArchTargetLowering::CanLowerReturn( |
5517 | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
5518 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
5519 | SmallVector<CCValAssign> RVLocs; |
5520 | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
5521 | |
5522 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
5523 | LoongArchABI::ABI ABI = |
5524 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
5525 | if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full, |
5526 | ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true, |
5527 | OrigTy: nullptr)) |
5528 | return false; |
5529 | } |
5530 | return true; |
5531 | } |
5532 | |
5533 | SDValue LoongArchTargetLowering::LowerReturn( |
5534 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
5535 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5536 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
5537 | SelectionDAG &DAG) const { |
5538 | // Stores the assignment of the return value to a location. |
5539 | SmallVector<CCValAssign> RVLocs; |
5540 | |
5541 | // Info about the registers and stack slot. |
5542 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
5543 | *DAG.getContext()); |
5544 | |
5545 | analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
5546 | CLI: nullptr, Fn: CC_LoongArch); |
5547 | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
5548 | report_fatal_error(reason: "GHC functions return void only" ); |
5549 | SDValue Glue; |
5550 | SmallVector<SDValue, 4> RetOps(1, Chain); |
5551 | |
5552 | // Copy the result values into the output registers. |
5553 | for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { |
5554 | CCValAssign &VA = RVLocs[i]; |
5555 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
5556 | |
5557 | // Handle a 'normal' return. |
5558 | SDValue Val = convertValVTToLocVT(DAG, Val: OutVals[i], VA, DL); |
5559 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue); |
5560 | |
5561 | // Guarantee that all emitted copies are stuck together. |
5562 | Glue = Chain.getValue(R: 1); |
5563 | RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT())); |
5564 | } |
5565 | |
5566 | RetOps[0] = Chain; // Update chain. |
5567 | |
5568 | // Add the glue node if we have it. |
5569 | if (Glue.getNode()) |
5570 | RetOps.push_back(Elt: Glue); |
5571 | |
5572 | return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps); |
5573 | } |
5574 | |
5575 | bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
5576 | bool ForCodeSize) const { |
5577 | // TODO: Maybe need more checks here after vector extension is supported. |
5578 | if (VT == MVT::f32 && !Subtarget.hasBasicF()) |
5579 | return false; |
5580 | if (VT == MVT::f64 && !Subtarget.hasBasicD()) |
5581 | return false; |
5582 | return (Imm.isZero() || Imm.isExactlyValue(V: +1.0)); |
5583 | } |
5584 | |
5585 | bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { |
5586 | return true; |
5587 | } |
5588 | |
5589 | bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { |
5590 | return true; |
5591 | } |
5592 | |
5593 | bool LoongArchTargetLowering::shouldInsertFencesForAtomic( |
5594 | const Instruction *I) const { |
5595 | if (!Subtarget.is64Bit()) |
5596 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I); |
5597 | |
5598 | if (isa<LoadInst>(Val: I)) |
5599 | return true; |
5600 | |
5601 | // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not |
5602 | // require fences beacuse we can use amswap_db.[w/d]. |
5603 | if (isa<StoreInst>(Val: I)) { |
5604 | unsigned Size = I->getOperand(i: 0)->getType()->getIntegerBitWidth(); |
5605 | return (Size == 8 || Size == 16); |
5606 | } |
5607 | |
5608 | return false; |
5609 | } |
5610 | |
5611 | EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, |
5612 | LLVMContext &Context, |
5613 | EVT VT) const { |
5614 | if (!VT.isVector()) |
5615 | return getPointerTy(DL); |
5616 | return VT.changeVectorElementTypeToInteger(); |
5617 | } |
5618 | |
5619 | bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { |
5620 | // TODO: Support vectors. |
5621 | return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y); |
5622 | } |
5623 | |
5624 | bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
5625 | const CallInst &I, |
5626 | MachineFunction &MF, |
5627 | unsigned Intrinsic) const { |
5628 | switch (Intrinsic) { |
5629 | default: |
5630 | return false; |
5631 | case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: |
5632 | case Intrinsic::loongarch_masked_atomicrmw_add_i32: |
5633 | case Intrinsic::loongarch_masked_atomicrmw_sub_i32: |
5634 | case Intrinsic::loongarch_masked_atomicrmw_nand_i32: |
5635 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5636 | Info.memVT = MVT::i32; |
5637 | Info.ptrVal = I.getArgOperand(i: 0); |
5638 | Info.offset = 0; |
5639 | Info.align = Align(4); |
5640 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
5641 | MachineMemOperand::MOVolatile; |
5642 | return true; |
5643 | // TODO: Add more Intrinsics later. |
5644 | } |
5645 | } |
5646 | |
5647 | TargetLowering::AtomicExpansionKind |
5648 | LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
5649 | // TODO: Add more AtomicRMWInst that needs to be extended. |
5650 | |
5651 | // Since floating-point operation requires a non-trivial set of data |
5652 | // operations, use CmpXChg to expand. |
5653 | if (AI->isFloatingPointOperation() || |
5654 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
5655 | AI->getOperation() == AtomicRMWInst::UDecWrap) |
5656 | return AtomicExpansionKind::CmpXChg; |
5657 | |
5658 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
5659 | if (Size == 8 || Size == 16) |
5660 | return AtomicExpansionKind::MaskedIntrinsic; |
5661 | return AtomicExpansionKind::None; |
5662 | } |
5663 | |
5664 | static Intrinsic::ID |
5665 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, |
5666 | AtomicRMWInst::BinOp BinOp) { |
5667 | if (GRLen == 64) { |
5668 | switch (BinOp) { |
5669 | default: |
5670 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
5671 | case AtomicRMWInst::Xchg: |
5672 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; |
5673 | case AtomicRMWInst::Add: |
5674 | return Intrinsic::loongarch_masked_atomicrmw_add_i64; |
5675 | case AtomicRMWInst::Sub: |
5676 | return Intrinsic::loongarch_masked_atomicrmw_sub_i64; |
5677 | case AtomicRMWInst::Nand: |
5678 | return Intrinsic::loongarch_masked_atomicrmw_nand_i64; |
5679 | case AtomicRMWInst::UMax: |
5680 | return Intrinsic::loongarch_masked_atomicrmw_umax_i64; |
5681 | case AtomicRMWInst::UMin: |
5682 | return Intrinsic::loongarch_masked_atomicrmw_umin_i64; |
5683 | case AtomicRMWInst::Max: |
5684 | return Intrinsic::loongarch_masked_atomicrmw_max_i64; |
5685 | case AtomicRMWInst::Min: |
5686 | return Intrinsic::loongarch_masked_atomicrmw_min_i64; |
5687 | // TODO: support other AtomicRMWInst. |
5688 | } |
5689 | } |
5690 | |
5691 | if (GRLen == 32) { |
5692 | switch (BinOp) { |
5693 | default: |
5694 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
5695 | case AtomicRMWInst::Xchg: |
5696 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; |
5697 | case AtomicRMWInst::Add: |
5698 | return Intrinsic::loongarch_masked_atomicrmw_add_i32; |
5699 | case AtomicRMWInst::Sub: |
5700 | return Intrinsic::loongarch_masked_atomicrmw_sub_i32; |
5701 | case AtomicRMWInst::Nand: |
5702 | return Intrinsic::loongarch_masked_atomicrmw_nand_i32; |
5703 | // TODO: support other AtomicRMWInst. |
5704 | } |
5705 | } |
5706 | |
5707 | llvm_unreachable("Unexpected GRLen\n" ); |
5708 | } |
5709 | |
5710 | TargetLowering::AtomicExpansionKind |
5711 | LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( |
5712 | AtomicCmpXchgInst *CI) const { |
5713 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
5714 | if (Size == 8 || Size == 16) |
5715 | return AtomicExpansionKind::MaskedIntrinsic; |
5716 | return AtomicExpansionKind::None; |
5717 | } |
5718 | |
5719 | Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
5720 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
5721 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
5722 | AtomicOrdering FailOrd = CI->getFailureOrdering(); |
5723 | Value *FailureOrdering = |
5724 | Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd)); |
5725 | |
5726 | // TODO: Support cmpxchg on LA32. |
5727 | Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; |
5728 | CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty()); |
5729 | NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty()); |
5730 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
5731 | Type *Tys[] = {AlignedAddr->getType()}; |
5732 | Function *MaskedCmpXchg = |
5733 | Intrinsic::getDeclaration(M: CI->getModule(), id: CmpXchgIntrID, Tys); |
5734 | Value *Result = Builder.CreateCall( |
5735 | Callee: MaskedCmpXchg, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); |
5736 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
5737 | return Result; |
5738 | } |
5739 | |
5740 | Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( |
5741 | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
5742 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
5743 | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
5744 | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
5745 | // mask, as this produces better code than the LL/SC loop emitted by |
5746 | // int_loongarch_masked_atomicrmw_xchg. |
5747 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
5748 | isa<ConstantInt>(Val: AI->getValOperand())) { |
5749 | ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand()); |
5750 | if (CVal->isZero()) |
5751 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr, |
5752 | Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask" ), |
5753 | Align: AI->getAlign(), Ordering: Ord); |
5754 | if (CVal->isMinusOne()) |
5755 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask, |
5756 | Align: AI->getAlign(), Ordering: Ord); |
5757 | } |
5758 | |
5759 | unsigned GRLen = Subtarget.getGRLen(); |
5760 | Value *Ordering = |
5761 | Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering())); |
5762 | Type *Tys[] = {AlignedAddr->getType()}; |
5763 | Function *LlwOpScwLoop = Intrinsic::getDeclaration( |
5764 | M: AI->getModule(), |
5765 | id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys); |
5766 | |
5767 | if (GRLen == 64) { |
5768 | Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty()); |
5769 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
5770 | ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty()); |
5771 | } |
5772 | |
5773 | Value *Result; |
5774 | |
5775 | // Must pass the shift amount needed to sign extend the loaded value prior |
5776 | // to performing a signed comparison for min/max. ShiftAmt is the number of |
5777 | // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which |
5778 | // is the number of bits to left+right shift the value in order to |
5779 | // sign-extend. |
5780 | if (AI->getOperation() == AtomicRMWInst::Min || |
5781 | AI->getOperation() == AtomicRMWInst::Max) { |
5782 | const DataLayout &DL = AI->getDataLayout(); |
5783 | unsigned ValWidth = |
5784 | DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType()); |
5785 | Value *SextShamt = |
5786 | Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt); |
5787 | Result = Builder.CreateCall(Callee: LlwOpScwLoop, |
5788 | Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
5789 | } else { |
5790 | Result = |
5791 | Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering}); |
5792 | } |
5793 | |
5794 | if (GRLen == 64) |
5795 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
5796 | return Result; |
5797 | } |
5798 | |
5799 | bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( |
5800 | const MachineFunction &MF, EVT VT) const { |
5801 | VT = VT.getScalarType(); |
5802 | |
5803 | if (!VT.isSimple()) |
5804 | return false; |
5805 | |
5806 | switch (VT.getSimpleVT().SimpleTy) { |
5807 | case MVT::f32: |
5808 | case MVT::f64: |
5809 | return true; |
5810 | default: |
5811 | break; |
5812 | } |
5813 | |
5814 | return false; |
5815 | } |
5816 | |
5817 | Register LoongArchTargetLowering::getExceptionPointerRegister( |
5818 | const Constant *PersonalityFn) const { |
5819 | return LoongArch::R4; |
5820 | } |
5821 | |
5822 | Register LoongArchTargetLowering::getExceptionSelectorRegister( |
5823 | const Constant *PersonalityFn) const { |
5824 | return LoongArch::R5; |
5825 | } |
5826 | |
5827 | //===----------------------------------------------------------------------===// |
5828 | // LoongArch Inline Assembly Support |
5829 | //===----------------------------------------------------------------------===// |
5830 | |
5831 | LoongArchTargetLowering::ConstraintType |
5832 | LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { |
5833 | // LoongArch specific constraints in GCC: config/loongarch/constraints.md |
5834 | // |
5835 | // 'f': A floating-point register (if available). |
5836 | // 'k': A memory operand whose address is formed by a base register and |
5837 | // (optionally scaled) index register. |
5838 | // 'l': A signed 16-bit constant. |
5839 | // 'm': A memory operand whose address is formed by a base register and |
5840 | // offset that is suitable for use in instructions with the same |
5841 | // addressing mode as st.w and ld.w. |
5842 | // 'I': A signed 12-bit constant (for arithmetic instructions). |
5843 | // 'J': Integer zero. |
5844 | // 'K': An unsigned 12-bit constant (for logic instructions). |
5845 | // "ZB": An address that is held in a general-purpose register. The offset is |
5846 | // zero. |
5847 | // "ZC": A memory operand whose address is formed by a base register and |
5848 | // offset that is suitable for use in instructions with the same |
5849 | // addressing mode as ll.w and sc.w. |
5850 | if (Constraint.size() == 1) { |
5851 | switch (Constraint[0]) { |
5852 | default: |
5853 | break; |
5854 | case 'f': |
5855 | return C_RegisterClass; |
5856 | case 'l': |
5857 | case 'I': |
5858 | case 'J': |
5859 | case 'K': |
5860 | return C_Immediate; |
5861 | case 'k': |
5862 | return C_Memory; |
5863 | } |
5864 | } |
5865 | |
5866 | if (Constraint == "ZC" || Constraint == "ZB" ) |
5867 | return C_Memory; |
5868 | |
5869 | // 'm' is handled here. |
5870 | return TargetLowering::getConstraintType(Constraint); |
5871 | } |
5872 | |
5873 | InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( |
5874 | StringRef ConstraintCode) const { |
5875 | return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) |
5876 | .Case(S: "k" , Value: InlineAsm::ConstraintCode::k) |
5877 | .Case(S: "ZB" , Value: InlineAsm::ConstraintCode::ZB) |
5878 | .Case(S: "ZC" , Value: InlineAsm::ConstraintCode::ZC) |
5879 | .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); |
5880 | } |
5881 | |
5882 | std::pair<unsigned, const TargetRegisterClass *> |
5883 | LoongArchTargetLowering::getRegForInlineAsmConstraint( |
5884 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
5885 | // First, see if this is a constraint that directly corresponds to a LoongArch |
5886 | // register class. |
5887 | if (Constraint.size() == 1) { |
5888 | switch (Constraint[0]) { |
5889 | case 'r': |
5890 | // TODO: Support fixed vectors up to GRLen? |
5891 | if (VT.isVector()) |
5892 | break; |
5893 | return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass); |
5894 | case 'f': |
5895 | if (Subtarget.hasBasicF() && VT == MVT::f32) |
5896 | return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass); |
5897 | if (Subtarget.hasBasicD() && VT == MVT::f64) |
5898 | return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass); |
5899 | if (Subtarget.hasExtLSX() && |
5900 | TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT)) |
5901 | return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass); |
5902 | if (Subtarget.hasExtLASX() && |
5903 | TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT)) |
5904 | return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass); |
5905 | break; |
5906 | default: |
5907 | break; |
5908 | } |
5909 | } |
5910 | |
5911 | // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen |
5912 | // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm |
5913 | // constraints while the official register name is prefixed with a '$'. So we |
5914 | // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) |
5915 | // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is |
5916 | // case insensitive, so no need to convert the constraint to upper case here. |
5917 | // |
5918 | // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly |
5919 | // decode the usage of register name aliases into their official names. And |
5920 | // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use |
5921 | // official register names. |
5922 | if (Constraint.starts_with(Prefix: "{$r" ) || Constraint.starts_with(Prefix: "{$f" ) || |
5923 | Constraint.starts_with(Prefix: "{$vr" ) || Constraint.starts_with(Prefix: "{$xr" )) { |
5924 | bool IsFP = Constraint[2] == 'f'; |
5925 | std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$'); |
5926 | std::pair<unsigned, const TargetRegisterClass *> R; |
5927 | R = TargetLowering::getRegForInlineAsmConstraint( |
5928 | TRI, Constraint: join_items(Separator: "" , Items&: Temp.first, Items&: Temp.second), VT); |
5929 | // Match those names to the widest floating point register type available. |
5930 | if (IsFP) { |
5931 | unsigned RegNo = R.first; |
5932 | if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { |
5933 | if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { |
5934 | unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; |
5935 | return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass); |
5936 | } |
5937 | } |
5938 | } |
5939 | return R; |
5940 | } |
5941 | |
5942 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
5943 | } |
5944 | |
5945 | void LoongArchTargetLowering::LowerAsmOperandForConstraint( |
5946 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
5947 | SelectionDAG &DAG) const { |
5948 | // Currently only support length 1 constraints. |
5949 | if (Constraint.size() == 1) { |
5950 | switch (Constraint[0]) { |
5951 | case 'l': |
5952 | // Validate & create a 16-bit signed immediate operand. |
5953 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
5954 | uint64_t CVal = C->getSExtValue(); |
5955 | if (isInt<16>(x: CVal)) |
5956 | Ops.push_back( |
5957 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5958 | } |
5959 | return; |
5960 | case 'I': |
5961 | // Validate & create a 12-bit signed immediate operand. |
5962 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
5963 | uint64_t CVal = C->getSExtValue(); |
5964 | if (isInt<12>(x: CVal)) |
5965 | Ops.push_back( |
5966 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5967 | } |
5968 | return; |
5969 | case 'J': |
5970 | // Validate & create an integer zero operand. |
5971 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
5972 | if (C->getZExtValue() == 0) |
5973 | Ops.push_back( |
5974 | x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5975 | return; |
5976 | case 'K': |
5977 | // Validate & create a 12-bit unsigned immediate operand. |
5978 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
5979 | uint64_t CVal = C->getZExtValue(); |
5980 | if (isUInt<12>(x: CVal)) |
5981 | Ops.push_back( |
5982 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
5983 | } |
5984 | return; |
5985 | default: |
5986 | break; |
5987 | } |
5988 | } |
5989 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
5990 | } |
5991 | |
5992 | #define GET_REGISTER_MATCHER |
5993 | #include "LoongArchGenAsmMatcher.inc" |
5994 | |
5995 | Register |
5996 | LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
5997 | const MachineFunction &MF) const { |
5998 | std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$'); |
5999 | std::string NewRegName = Name.second.str(); |
6000 | Register Reg = MatchRegisterAltName(Name: NewRegName); |
6001 | if (Reg == LoongArch::NoRegister) |
6002 | Reg = MatchRegisterName(Name: NewRegName); |
6003 | if (Reg == LoongArch::NoRegister) |
6004 | report_fatal_error( |
6005 | reason: Twine("Invalid register name \"" + StringRef(RegName) + "\"." )); |
6006 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
6007 | if (!ReservedRegs.test(Idx: Reg)) |
6008 | report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" + |
6009 | StringRef(RegName) + "\"." )); |
6010 | return Reg; |
6011 | } |
6012 | |
6013 | bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, |
6014 | EVT VT, SDValue C) const { |
6015 | // TODO: Support vectors. |
6016 | if (!VT.isScalarInteger()) |
6017 | return false; |
6018 | |
6019 | // Omit the optimization if the data size exceeds GRLen. |
6020 | if (VT.getSizeInBits() > Subtarget.getGRLen()) |
6021 | return false; |
6022 | |
6023 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) { |
6024 | const APInt &Imm = ConstNode->getAPIntValue(); |
6025 | // Break MUL into (SLLI + ADD/SUB) or ALSL. |
6026 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
6027 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
6028 | return true; |
6029 | // Break MUL into (ALSL x, (SLLI x, imm0), imm1). |
6030 | if (ConstNode->hasOneUse() && |
6031 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
6032 | (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) |
6033 | return true; |
6034 | // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), |
6035 | // in which the immediate has two set bits. Or Break (MUL x, imm) |
6036 | // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate |
6037 | // equals to (1 << s0) - (1 << s1). |
6038 | if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) { |
6039 | unsigned Shifts = Imm.countr_zero(); |
6040 | // Reject immediates which can be composed via a single LUI. |
6041 | if (Shifts >= 12) |
6042 | return false; |
6043 | // Reject multiplications can be optimized to |
6044 | // (SLLI (ALSL x, x, 1/2/3/4), s). |
6045 | APInt ImmPop = Imm.ashr(ShiftAmt: Shifts); |
6046 | if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) |
6047 | return false; |
6048 | // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, |
6049 | // since it needs one more instruction than other 3 cases. |
6050 | APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); |
6051 | if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || |
6052 | (ImmSmall - Imm).isPowerOf2()) |
6053 | return true; |
6054 | } |
6055 | } |
6056 | |
6057 | return false; |
6058 | } |
6059 | |
6060 | bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
6061 | const AddrMode &AM, |
6062 | Type *Ty, unsigned AS, |
6063 | Instruction *I) const { |
6064 | // LoongArch has four basic addressing modes: |
6065 | // 1. reg |
6066 | // 2. reg + 12-bit signed offset |
6067 | // 3. reg + 14-bit signed offset left-shifted by 2 |
6068 | // 4. reg1 + reg2 |
6069 | // TODO: Add more checks after support vector extension. |
6070 | |
6071 | // No global is ever allowed as a base. |
6072 | if (AM.BaseGV) |
6073 | return false; |
6074 | |
6075 | // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2 |
6076 | // with `UAL` feature. |
6077 | if (!isInt<12>(x: AM.BaseOffs) && |
6078 | !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL())) |
6079 | return false; |
6080 | |
6081 | switch (AM.Scale) { |
6082 | case 0: |
6083 | // "r+i" or just "i", depending on HasBaseReg. |
6084 | break; |
6085 | case 1: |
6086 | // "r+r+i" is not allowed. |
6087 | if (AM.HasBaseReg && AM.BaseOffs) |
6088 | return false; |
6089 | // Otherwise we have "r+r" or "r+i". |
6090 | break; |
6091 | case 2: |
6092 | // "2*r+r" or "2*r+i" is not allowed. |
6093 | if (AM.HasBaseReg || AM.BaseOffs) |
6094 | return false; |
6095 | // Allow "2*r" as "r+r". |
6096 | break; |
6097 | default: |
6098 | return false; |
6099 | } |
6100 | |
6101 | return true; |
6102 | } |
6103 | |
6104 | bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
6105 | return isInt<12>(x: Imm); |
6106 | } |
6107 | |
6108 | bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
6109 | return isInt<12>(x: Imm); |
6110 | } |
6111 | |
6112 | bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
6113 | // Zexts are free if they can be combined with a load. |
6114 | // Don't advertise i32->i64 zextload as being free for LA64. It interacts |
6115 | // poorly with type legalization of compares preferring sext. |
6116 | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
6117 | EVT MemVT = LD->getMemoryVT(); |
6118 | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
6119 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
6120 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
6121 | return true; |
6122 | } |
6123 | |
6124 | return TargetLowering::isZExtFree(Val, VT2); |
6125 | } |
6126 | |
6127 | bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, |
6128 | EVT DstVT) const { |
6129 | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
6130 | } |
6131 | |
6132 | bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const { |
6133 | return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32); |
6134 | } |
6135 | |
6136 | bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { |
6137 | // TODO: Support vectors. |
6138 | if (Y.getValueType().isVector()) |
6139 | return false; |
6140 | |
6141 | return !isa<ConstantSDNode>(Val: Y); |
6142 | } |
6143 | |
6144 | ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { |
6145 | // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. |
6146 | return ISD::SIGN_EXTEND; |
6147 | } |
6148 | |
6149 | bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall( |
6150 | EVT Type, bool IsSigned) const { |
6151 | if (Subtarget.is64Bit() && Type == MVT::i32) |
6152 | return true; |
6153 | |
6154 | return IsSigned; |
6155 | } |
6156 | |
6157 | bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { |
6158 | // Return false to suppress the unnecessary extensions if the LibCall |
6159 | // arguments or return value is a float narrower than GRLEN on a soft FP ABI. |
6160 | if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && |
6161 | Type.getSizeInBits() < Subtarget.getGRLen())) |
6162 | return false; |
6163 | return true; |
6164 | } |
6165 | |