1 | //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that LoongArch uses to lower LLVM code into |
10 | // a selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "LoongArchISelLowering.h" |
15 | #include "LoongArch.h" |
16 | #include "LoongArchMachineFunctionInfo.h" |
17 | #include "LoongArchRegisterInfo.h" |
18 | #include "LoongArchSubtarget.h" |
19 | #include "MCTargetDesc/LoongArchBaseInfo.h" |
20 | #include "MCTargetDesc/LoongArchMCTargetDesc.h" |
21 | #include "llvm/ADT/SmallSet.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/ADT/StringExtras.h" |
24 | #include "llvm/CodeGen/ISDOpcodes.h" |
25 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
26 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
27 | #include "llvm/IR/IRBuilder.h" |
28 | #include "llvm/IR/IntrinsicInst.h" |
29 | #include "llvm/IR/IntrinsicsLoongArch.h" |
30 | #include "llvm/Support/CodeGen.h" |
31 | #include "llvm/Support/Debug.h" |
32 | #include "llvm/Support/ErrorHandling.h" |
33 | #include "llvm/Support/KnownBits.h" |
34 | #include "llvm/Support/MathExtras.h" |
35 | #include <llvm/Analysis/VectorUtils.h> |
36 | |
37 | using namespace llvm; |
38 | |
39 | #define DEBUG_TYPE "loongarch-isel-lowering" |
40 | |
41 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
42 | |
43 | static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division" , cl::Hidden, |
44 | cl::desc("Trap on integer division by zero." ), |
45 | cl::init(Val: false)); |
46 | |
47 | LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, |
48 | const LoongArchSubtarget &STI) |
49 | : TargetLowering(TM), Subtarget(STI) { |
50 | |
51 | MVT GRLenVT = Subtarget.getGRLenVT(); |
52 | |
53 | // Set up the register classes. |
54 | |
55 | addRegisterClass(VT: GRLenVT, RC: &LoongArch::GPRRegClass); |
56 | if (Subtarget.hasBasicF()) |
57 | addRegisterClass(VT: MVT::f32, RC: &LoongArch::FPR32RegClass); |
58 | if (Subtarget.hasBasicD()) |
59 | addRegisterClass(VT: MVT::f64, RC: &LoongArch::FPR64RegClass); |
60 | |
61 | static const MVT::SimpleValueType LSXVTs[] = { |
62 | MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; |
63 | static const MVT::SimpleValueType LASXVTs[] = { |
64 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; |
65 | |
66 | if (Subtarget.hasExtLSX()) |
67 | for (MVT VT : LSXVTs) |
68 | addRegisterClass(VT, RC: &LoongArch::LSX128RegClass); |
69 | |
70 | if (Subtarget.hasExtLASX()) |
71 | for (MVT VT : LASXVTs) |
72 | addRegisterClass(VT, RC: &LoongArch::LASX256RegClass); |
73 | |
74 | // Set operations for LA32 and LA64. |
75 | |
76 | setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: GRLenVT, |
77 | MemVT: MVT::i1, Action: Promote); |
78 | |
79 | setOperationAction(Op: ISD::SHL_PARTS, VT: GRLenVT, Action: Custom); |
80 | setOperationAction(Op: ISD::SRA_PARTS, VT: GRLenVT, Action: Custom); |
81 | setOperationAction(Op: ISD::SRL_PARTS, VT: GRLenVT, Action: Custom); |
82 | setOperationAction(Op: ISD::FP_TO_SINT, VT: GRLenVT, Action: Custom); |
83 | setOperationAction(Op: ISD::ROTL, VT: GRLenVT, Action: Expand); |
84 | setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Expand); |
85 | |
86 | setOperationAction(Ops: {ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, |
87 | ISD::JumpTable, ISD::GlobalTLSAddress}, |
88 | VT: GRLenVT, Action: Custom); |
89 | |
90 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: GRLenVT, Action: Custom); |
91 | |
92 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: GRLenVT, Action: Expand); |
93 | setOperationAction(Ops: {ISD::STACKSAVE, ISD::STACKRESTORE}, VT: MVT::Other, Action: Expand); |
94 | setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom); |
95 | setOperationAction(Ops: {ISD::VAARG, ISD::VACOPY, ISD::VAEND}, VT: MVT::Other, Action: Expand); |
96 | |
97 | setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal); |
98 | setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal); |
99 | |
100 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom); |
101 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom); |
102 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
103 | |
104 | setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom); |
105 | |
106 | // BITREV/REVB requires the 32S feature. |
107 | if (STI.has32S()) { |
108 | // Expand bitreverse.i16 with native-width bitrev and shift for now, before |
109 | // we get to know which of sll and revb.2h is faster. |
110 | setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i8, Action: Custom); |
111 | setOperationAction(Op: ISD::BITREVERSE, VT: GRLenVT, Action: Legal); |
112 | |
113 | // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and |
114 | // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 |
115 | // and i32 could still be byte-swapped relatively cheaply. |
116 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i16, Action: Custom); |
117 | } else { |
118 | setOperationAction(Op: ISD::BSWAP, VT: GRLenVT, Action: Expand); |
119 | setOperationAction(Op: ISD::CTTZ, VT: GRLenVT, Action: Expand); |
120 | setOperationAction(Op: ISD::CTLZ, VT: GRLenVT, Action: Expand); |
121 | setOperationAction(Op: ISD::ROTR, VT: GRLenVT, Action: Expand); |
122 | setOperationAction(Op: ISD::SELECT, VT: GRLenVT, Action: Custom); |
123 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand); |
124 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand); |
125 | } |
126 | |
127 | setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand); |
128 | setOperationAction(Op: ISD::BR_CC, VT: GRLenVT, Action: Expand); |
129 | setOperationAction(Op: ISD::SELECT_CC, VT: GRLenVT, Action: Expand); |
130 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand); |
131 | setOperationAction(Ops: {ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT: GRLenVT, Action: Expand); |
132 | |
133 | setOperationAction(Op: ISD::FP_TO_UINT, VT: GRLenVT, Action: Custom); |
134 | setOperationAction(Op: ISD::UINT_TO_FP, VT: GRLenVT, Action: Expand); |
135 | |
136 | // Set operations for LA64 only. |
137 | |
138 | if (Subtarget.is64Bit()) { |
139 | setOperationAction(Op: ISD::ADD, VT: MVT::i32, Action: Custom); |
140 | setOperationAction(Op: ISD::SUB, VT: MVT::i32, Action: Custom); |
141 | setOperationAction(Op: ISD::SHL, VT: MVT::i32, Action: Custom); |
142 | setOperationAction(Op: ISD::SRA, VT: MVT::i32, Action: Custom); |
143 | setOperationAction(Op: ISD::SRL, VT: MVT::i32, Action: Custom); |
144 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom); |
145 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
146 | setOperationAction(Op: ISD::ROTR, VT: MVT::i32, Action: Custom); |
147 | setOperationAction(Op: ISD::ROTL, VT: MVT::i32, Action: Custom); |
148 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Custom); |
149 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Custom); |
150 | setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom); |
151 | setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i32, Action: Custom); |
152 | setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i32, Action: Custom); |
153 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom); |
154 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i32, Action: Custom); |
155 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i32, Action: Custom); |
156 | |
157 | setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Custom); |
158 | setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Custom); |
159 | setOperationAction(Ops: {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT: MVT::i32, |
160 | Action: Custom); |
161 | setOperationAction(Op: ISD::LROUND, VT: MVT::i32, Action: Custom); |
162 | } |
163 | |
164 | // Set operations for LA32 only. |
165 | |
166 | if (!Subtarget.is64Bit()) { |
167 | setOperationAction(Op: ISD::READ_REGISTER, VT: MVT::i64, Action: Custom); |
168 | setOperationAction(Op: ISD::WRITE_REGISTER, VT: MVT::i64, Action: Custom); |
169 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i64, Action: Custom); |
170 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom); |
171 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom); |
172 | if (Subtarget.hasBasicD()) |
173 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom); |
174 | } |
175 | |
176 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
177 | |
178 | static const ISD::CondCode FPCCToExpand[] = { |
179 | ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, |
180 | ISD::SETGE, ISD::SETNE, ISD::SETGT}; |
181 | |
182 | // Set operations for 'F' feature. |
183 | |
184 | if (Subtarget.hasBasicF()) { |
185 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
186 | setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand); |
187 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand); |
188 | setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::bf16, Action: Expand); |
189 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f32, Action: Expand); |
190 | |
191 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand); |
192 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f32, Action: Expand); |
193 | setOperationAction(Op: ISD::FMA, VT: MVT::f32, Action: Legal); |
194 | setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal); |
195 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal); |
196 | setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal); |
197 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal); |
198 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal); |
199 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal); |
200 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Legal); |
201 | setOperationAction(Op: ISD::FSIN, VT: MVT::f32, Action: Expand); |
202 | setOperationAction(Op: ISD::FCOS, VT: MVT::f32, Action: Expand); |
203 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand); |
204 | setOperationAction(Op: ISD::FPOW, VT: MVT::f32, Action: Expand); |
205 | setOperationAction(Op: ISD::FREM, VT: MVT::f32, Action: Expand); |
206 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, |
207 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
208 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, |
209 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
210 | setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f32, Action: Custom); |
211 | setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f32, |
212 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
213 | |
214 | if (Subtarget.is64Bit()) |
215 | setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal); |
216 | |
217 | if (!Subtarget.hasBasicD()) { |
218 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom); |
219 | if (Subtarget.is64Bit()) { |
220 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom); |
221 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom); |
222 | } |
223 | } |
224 | } |
225 | |
226 | // Set operations for 'D' feature. |
227 | |
228 | if (Subtarget.hasBasicD()) { |
229 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
230 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
231 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand); |
232 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::bf16, Action: Expand); |
233 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand); |
234 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
235 | setCondCodeAction(CCs: FPCCToExpand, VT: MVT::f64, Action: Expand); |
236 | |
237 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand); |
238 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f64, Action: Expand); |
239 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal); |
240 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal); |
241 | setOperationAction(Op: ISD::FMA, VT: MVT::f64, Action: Legal); |
242 | setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal); |
243 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal); |
244 | setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal); |
245 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal); |
246 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Legal); |
247 | setOperationAction(Op: ISD::FSIN, VT: MVT::f64, Action: Expand); |
248 | setOperationAction(Op: ISD::FCOS, VT: MVT::f64, Action: Expand); |
249 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand); |
250 | setOperationAction(Op: ISD::FPOW, VT: MVT::f64, Action: Expand); |
251 | setOperationAction(Op: ISD::FREM, VT: MVT::f64, Action: Expand); |
252 | setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand); |
253 | setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, |
254 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
255 | setOperationAction(Op: ISD::BF16_TO_FP, VT: MVT::f64, Action: Custom); |
256 | setOperationAction(Op: ISD::FP_TO_BF16, VT: MVT::f64, |
257 | Action: Subtarget.isSoftFPABI() ? LibCall : Custom); |
258 | |
259 | if (Subtarget.is64Bit()) |
260 | setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal); |
261 | } |
262 | |
263 | // Set operations for 'LSX' feature. |
264 | |
265 | if (Subtarget.hasExtLSX()) { |
266 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
267 | // Expand all truncating stores and extending loads. |
268 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
269 | setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand); |
270 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
271 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
272 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
273 | } |
274 | // By default everything must be expanded. Then we will selectively turn |
275 | // on ones that can be effectively codegen'd. |
276 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
277 | setOperationAction(Op, VT, Action: Expand); |
278 | } |
279 | |
280 | for (MVT VT : LSXVTs) { |
281 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal); |
282 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
283 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
284 | |
285 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom); |
286 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal); |
287 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
288 | |
289 | setOperationAction(Op: ISD::SETCC, VT, Action: Legal); |
290 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
291 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
292 | } |
293 | for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { |
294 | setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal); |
295 | setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
296 | Action: Legal); |
297 | setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
298 | VT, Action: Legal); |
299 | setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal); |
300 | setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal); |
301 | setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal); |
302 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal); |
303 | setCondCodeAction( |
304 | CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
305 | Action: Expand); |
306 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom); |
307 | setOperationAction(Op: ISD::ABDS, VT, Action: Legal); |
308 | setOperationAction(Op: ISD::ABDU, VT, Action: Legal); |
309 | } |
310 | for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) |
311 | setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom); |
312 | for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) |
313 | setOperationAction(Op: ISD::BSWAP, VT, Action: Legal); |
314 | for (MVT VT : {MVT::v4i32, MVT::v2i64}) { |
315 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal); |
316 | setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal); |
317 | } |
318 | for (MVT VT : {MVT::v4f32, MVT::v2f64}) { |
319 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal); |
320 | setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal); |
321 | setOperationAction(Op: ISD::FMA, VT, Action: Legal); |
322 | setOperationAction(Op: ISD::FSQRT, VT, Action: Legal); |
323 | setOperationAction(Op: ISD::FNEG, VT, Action: Legal); |
324 | setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
325 | ISD::SETUGE, ISD::SETUGT}, |
326 | VT, Action: Expand); |
327 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal); |
328 | } |
329 | setOperationAction(Op: ISD::CTPOP, VT: GRLenVT, Action: Legal); |
330 | setOperationAction(Ops: ISD::FCEIL, VTs: {MVT::f32, MVT::f64}, Action: Legal); |
331 | setOperationAction(Ops: ISD::FFLOOR, VTs: {MVT::f32, MVT::f64}, Action: Legal); |
332 | setOperationAction(Ops: ISD::FTRUNC, VTs: {MVT::f32, MVT::f64}, Action: Legal); |
333 | setOperationAction(Ops: ISD::FROUNDEVEN, VTs: {MVT::f32, MVT::f64}, Action: Legal); |
334 | |
335 | for (MVT VT : |
336 | {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16, |
337 | MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) { |
338 | setOperationAction(Op: ISD::TRUNCATE, VT, Action: Custom); |
339 | } |
340 | } |
341 | |
342 | // Set operations for 'LASX' feature. |
343 | |
344 | if (Subtarget.hasExtLASX()) { |
345 | for (MVT VT : LASXVTs) { |
346 | setOperationAction(Ops: {ISD::LOAD, ISD::STORE}, VT, Action: Legal); |
347 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
348 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
349 | |
350 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Custom); |
351 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Custom); |
352 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
353 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT, Action: Legal); |
354 | |
355 | setOperationAction(Op: ISD::SETCC, VT, Action: Legal); |
356 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
357 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
358 | } |
359 | for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { |
360 | setOperationAction(Ops: {ISD::ADD, ISD::SUB}, VT, Action: Legal); |
361 | setOperationAction(Ops: {ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, |
362 | Action: Legal); |
363 | setOperationAction(Ops: {ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, |
364 | VT, Action: Legal); |
365 | setOperationAction(Ops: {ISD::AND, ISD::OR, ISD::XOR}, VT, Action: Legal); |
366 | setOperationAction(Ops: {ISD::SHL, ISD::SRA, ISD::SRL}, VT, Action: Legal); |
367 | setOperationAction(Ops: {ISD::CTPOP, ISD::CTLZ}, VT, Action: Legal); |
368 | setOperationAction(Ops: {ISD::MULHS, ISD::MULHU}, VT, Action: Legal); |
369 | setCondCodeAction( |
370 | CCs: {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, |
371 | Action: Expand); |
372 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom); |
373 | setOperationAction(Op: ISD::ABDS, VT, Action: Legal); |
374 | setOperationAction(Op: ISD::ABDU, VT, Action: Legal); |
375 | } |
376 | for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) |
377 | setOperationAction(Op: ISD::BITREVERSE, VT, Action: Custom); |
378 | for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) |
379 | setOperationAction(Op: ISD::BSWAP, VT, Action: Legal); |
380 | for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { |
381 | setOperationAction(Ops: {ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Action: Legal); |
382 | setOperationAction(Ops: {ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Action: Legal); |
383 | } |
384 | for (MVT VT : {MVT::v8f32, MVT::v4f64}) { |
385 | setOperationAction(Ops: {ISD::FADD, ISD::FSUB}, VT, Action: Legal); |
386 | setOperationAction(Ops: {ISD::FMUL, ISD::FDIV}, VT, Action: Legal); |
387 | setOperationAction(Op: ISD::FMA, VT, Action: Legal); |
388 | setOperationAction(Op: ISD::FSQRT, VT, Action: Legal); |
389 | setOperationAction(Op: ISD::FNEG, VT, Action: Legal); |
390 | setCondCodeAction(CCs: {ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, |
391 | ISD::SETUGE, ISD::SETUGT}, |
392 | VT, Action: Expand); |
393 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Legal); |
394 | } |
395 | } |
396 | |
397 | // Set DAG combine for LA32 and LA64. |
398 | |
399 | setTargetDAGCombine(ISD::AND); |
400 | setTargetDAGCombine(ISD::OR); |
401 | setTargetDAGCombine(ISD::SRL); |
402 | setTargetDAGCombine(ISD::SETCC); |
403 | |
404 | // Set DAG combine for 'LSX' feature. |
405 | |
406 | if (Subtarget.hasExtLSX()) { |
407 | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
408 | setTargetDAGCombine(ISD::BITCAST); |
409 | } |
410 | |
411 | // Compute derived properties from the register classes. |
412 | computeRegisterProperties(TRI: Subtarget.getRegisterInfo()); |
413 | |
414 | setStackPointerRegisterToSaveRestore(LoongArch::R3); |
415 | |
416 | setBooleanContents(ZeroOrOneBooleanContent); |
417 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
418 | |
419 | setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); |
420 | |
421 | setMinCmpXchgSizeInBits(32); |
422 | |
423 | // Function alignments. |
424 | setMinFunctionAlignment(Align(4)); |
425 | // Set preferred alignments. |
426 | setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment()); |
427 | setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); |
428 | setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment()); |
429 | |
430 | // cmpxchg sizes down to 8 bits become legal if LAMCAS is available. |
431 | if (Subtarget.hasLAMCAS()) |
432 | setMinCmpXchgSizeInBits(8); |
433 | |
434 | if (Subtarget.hasSCQ()) { |
435 | setMaxAtomicSizeInBitsSupported(128); |
436 | setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i128, Action: Custom); |
437 | } |
438 | } |
439 | |
440 | bool LoongArchTargetLowering::isOffsetFoldingLegal( |
441 | const GlobalAddressSDNode *GA) const { |
442 | // In order to maximise the opportunity for common subexpression elimination, |
443 | // keep a separate ADD node for the global address offset instead of folding |
444 | // it in the global address node. Later peephole optimisations may choose to |
445 | // fold it back in when profitable. |
446 | return false; |
447 | } |
448 | |
449 | SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, |
450 | SelectionDAG &DAG) const { |
451 | switch (Op.getOpcode()) { |
452 | case ISD::ATOMIC_FENCE: |
453 | return lowerATOMIC_FENCE(Op, DAG); |
454 | case ISD::EH_DWARF_CFA: |
455 | return lowerEH_DWARF_CFA(Op, DAG); |
456 | case ISD::GlobalAddress: |
457 | return lowerGlobalAddress(Op, DAG); |
458 | case ISD::GlobalTLSAddress: |
459 | return lowerGlobalTLSAddress(Op, DAG); |
460 | case ISD::INTRINSIC_WO_CHAIN: |
461 | return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
462 | case ISD::INTRINSIC_W_CHAIN: |
463 | return lowerINTRINSIC_W_CHAIN(Op, DAG); |
464 | case ISD::INTRINSIC_VOID: |
465 | return lowerINTRINSIC_VOID(Op, DAG); |
466 | case ISD::BlockAddress: |
467 | return lowerBlockAddress(Op, DAG); |
468 | case ISD::JumpTable: |
469 | return lowerJumpTable(Op, DAG); |
470 | case ISD::SHL_PARTS: |
471 | return lowerShiftLeftParts(Op, DAG); |
472 | case ISD::SRA_PARTS: |
473 | return lowerShiftRightParts(Op, DAG, IsSRA: true); |
474 | case ISD::SRL_PARTS: |
475 | return lowerShiftRightParts(Op, DAG, IsSRA: false); |
476 | case ISD::ConstantPool: |
477 | return lowerConstantPool(Op, DAG); |
478 | case ISD::FP_TO_SINT: |
479 | return lowerFP_TO_SINT(Op, DAG); |
480 | case ISD::BITCAST: |
481 | return lowerBITCAST(Op, DAG); |
482 | case ISD::UINT_TO_FP: |
483 | return lowerUINT_TO_FP(Op, DAG); |
484 | case ISD::SINT_TO_FP: |
485 | return lowerSINT_TO_FP(Op, DAG); |
486 | case ISD::VASTART: |
487 | return lowerVASTART(Op, DAG); |
488 | case ISD::FRAMEADDR: |
489 | return lowerFRAMEADDR(Op, DAG); |
490 | case ISD::RETURNADDR: |
491 | return lowerRETURNADDR(Op, DAG); |
492 | case ISD::WRITE_REGISTER: |
493 | return lowerWRITE_REGISTER(Op, DAG); |
494 | case ISD::INSERT_VECTOR_ELT: |
495 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
496 | case ISD::EXTRACT_VECTOR_ELT: |
497 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
498 | case ISD::BUILD_VECTOR: |
499 | return lowerBUILD_VECTOR(Op, DAG); |
500 | case ISD::VECTOR_SHUFFLE: |
501 | return lowerVECTOR_SHUFFLE(Op, DAG); |
502 | case ISD::BITREVERSE: |
503 | return lowerBITREVERSE(Op, DAG); |
504 | case ISD::SCALAR_TO_VECTOR: |
505 | return lowerSCALAR_TO_VECTOR(Op, DAG); |
506 | case ISD::PREFETCH: |
507 | return lowerPREFETCH(Op, DAG); |
508 | case ISD::SELECT: |
509 | return lowerSELECT(Op, DAG); |
510 | case ISD::FP_TO_FP16: |
511 | return lowerFP_TO_FP16(Op, DAG); |
512 | case ISD::FP16_TO_FP: |
513 | return lowerFP16_TO_FP(Op, DAG); |
514 | case ISD::FP_TO_BF16: |
515 | return lowerFP_TO_BF16(Op, DAG); |
516 | case ISD::BF16_TO_FP: |
517 | return lowerBF16_TO_FP(Op, DAG); |
518 | } |
519 | return SDValue(); |
520 | } |
521 | |
522 | SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op, |
523 | SelectionDAG &DAG) const { |
524 | unsigned IsData = Op.getConstantOperandVal(i: 4); |
525 | |
526 | // We don't support non-data prefetch. |
527 | // Just preserve the chain. |
528 | if (!IsData) |
529 | return Op.getOperand(i: 0); |
530 | |
531 | return Op; |
532 | } |
533 | |
534 | // Return true if Val is equal to (setcc LHS, RHS, CC). |
535 | // Return false if Val is the inverse of (setcc LHS, RHS, CC). |
536 | // Otherwise, return std::nullopt. |
537 | static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, |
538 | ISD::CondCode CC, SDValue Val) { |
539 | assert(Val->getOpcode() == ISD::SETCC); |
540 | SDValue LHS2 = Val.getOperand(i: 0); |
541 | SDValue RHS2 = Val.getOperand(i: 1); |
542 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Val: Val.getOperand(i: 2))->get(); |
543 | |
544 | if (LHS == LHS2 && RHS == RHS2) { |
545 | if (CC == CC2) |
546 | return true; |
547 | if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType())) |
548 | return false; |
549 | } else if (LHS == RHS2 && RHS == LHS2) { |
550 | CC2 = ISD::getSetCCSwappedOperands(Operation: CC2); |
551 | if (CC == CC2) |
552 | return true; |
553 | if (CC == ISD::getSetCCInverse(Operation: CC2, Type: LHS2.getValueType())) |
554 | return false; |
555 | } |
556 | |
557 | return std::nullopt; |
558 | } |
559 | |
560 | static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, |
561 | const LoongArchSubtarget &Subtarget) { |
562 | SDValue CondV = N->getOperand(Num: 0); |
563 | SDValue TrueV = N->getOperand(Num: 1); |
564 | SDValue FalseV = N->getOperand(Num: 2); |
565 | MVT VT = N->getSimpleValueType(ResNo: 0); |
566 | SDLoc DL(N); |
567 | |
568 | // (select c, -1, y) -> -c | y |
569 | if (isAllOnesConstant(V: TrueV)) { |
570 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
571 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV)); |
572 | } |
573 | // (select c, y, -1) -> (c-1) | y |
574 | if (isAllOnesConstant(V: FalseV)) { |
575 | SDValue Neg = |
576 | DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT)); |
577 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV)); |
578 | } |
579 | |
580 | // (select c, 0, y) -> (c-1) & y |
581 | if (isNullConstant(V: TrueV)) { |
582 | SDValue Neg = |
583 | DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: DAG.getAllOnesConstant(DL, VT)); |
584 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: FalseV)); |
585 | } |
586 | // (select c, y, 0) -> -c & y |
587 | if (isNullConstant(V: FalseV)) { |
588 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
589 | return DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Neg, N2: DAG.getFreeze(V: TrueV)); |
590 | } |
591 | |
592 | // select c, ~x, x --> xor -c, x |
593 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV)) { |
594 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
595 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
596 | if (~TrueVal == FalseVal) { |
597 | SDValue Neg = DAG.getNegative(Val: CondV, DL, VT); |
598 | return DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Neg, N2: FalseV); |
599 | } |
600 | } |
601 | |
602 | // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops |
603 | // when both truev and falsev are also setcc. |
604 | if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC && |
605 | FalseV.getOpcode() == ISD::SETCC) { |
606 | SDValue LHS = CondV.getOperand(i: 0); |
607 | SDValue RHS = CondV.getOperand(i: 1); |
608 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
609 | |
610 | // (select x, x, y) -> x | y |
611 | // (select !x, x, y) -> x & y |
612 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: TrueV)) { |
613 | return DAG.getNode(Opcode: *MatchResult ? ISD::OR : ISD::AND, DL, VT, N1: TrueV, |
614 | N2: DAG.getFreeze(V: FalseV)); |
615 | } |
616 | // (select x, y, x) -> x & y |
617 | // (select !x, y, x) -> x | y |
618 | if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, Val: FalseV)) { |
619 | return DAG.getNode(Opcode: *MatchResult ? ISD::AND : ISD::OR, DL, VT, |
620 | N1: DAG.getFreeze(V: TrueV), N2: FalseV); |
621 | } |
622 | } |
623 | |
624 | return SDValue(); |
625 | } |
626 | |
627 | // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants |
628 | // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable. |
629 | // For now we only consider transformation profitable if `binOp(c0, c1)` ends up |
630 | // being `0` or `-1`. In such cases we can replace `select` with `and`. |
631 | // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize |
632 | // than `c0`? |
633 | static SDValue |
634 | foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, |
635 | const LoongArchSubtarget &Subtarget) { |
636 | unsigned SelOpNo = 0; |
637 | SDValue Sel = BO->getOperand(Num: 0); |
638 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { |
639 | SelOpNo = 1; |
640 | Sel = BO->getOperand(Num: 1); |
641 | } |
642 | |
643 | if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) |
644 | return SDValue(); |
645 | |
646 | unsigned ConstSelOpNo = 1; |
647 | unsigned OtherSelOpNo = 2; |
648 | if (!isa<ConstantSDNode>(Val: Sel->getOperand(Num: ConstSelOpNo))) { |
649 | ConstSelOpNo = 2; |
650 | OtherSelOpNo = 1; |
651 | } |
652 | SDValue ConstSelOp = Sel->getOperand(Num: ConstSelOpNo); |
653 | ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(Val&: ConstSelOp); |
654 | if (!ConstSelOpNode || ConstSelOpNode->isOpaque()) |
655 | return SDValue(); |
656 | |
657 | SDValue ConstBinOp = BO->getOperand(Num: SelOpNo ^ 1); |
658 | ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(Val&: ConstBinOp); |
659 | if (!ConstBinOpNode || ConstBinOpNode->isOpaque()) |
660 | return SDValue(); |
661 | |
662 | SDLoc DL(Sel); |
663 | EVT VT = BO->getValueType(ResNo: 0); |
664 | |
665 | SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp}; |
666 | if (SelOpNo == 1) |
667 | std::swap(a&: NewConstOps[0], b&: NewConstOps[1]); |
668 | |
669 | SDValue NewConstOp = |
670 | DAG.FoldConstantArithmetic(Opcode: BO->getOpcode(), DL, VT, Ops: NewConstOps); |
671 | if (!NewConstOp) |
672 | return SDValue(); |
673 | |
674 | const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); |
675 | if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) |
676 | return SDValue(); |
677 | |
678 | SDValue OtherSelOp = Sel->getOperand(Num: OtherSelOpNo); |
679 | SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp}; |
680 | if (SelOpNo == 1) |
681 | std::swap(a&: NewNonConstOps[0], b&: NewNonConstOps[1]); |
682 | SDValue NewNonConstOp = DAG.getNode(Opcode: BO->getOpcode(), DL, VT, Ops: NewNonConstOps); |
683 | |
684 | SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp; |
685 | SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp; |
686 | return DAG.getSelect(DL, VT, Cond: Sel.getOperand(i: 0), LHS: NewT, RHS: NewF); |
687 | } |
688 | |
689 | // Changes the condition code and swaps operands if necessary, so the SetCC |
690 | // operation matches one of the comparisons supported directly by branches |
691 | // in the LoongArch ISA. May adjust compares to favor compare with 0 over |
692 | // compare with 1/-1. |
693 | static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, |
694 | ISD::CondCode &CC, SelectionDAG &DAG) { |
695 | // If this is a single bit test that can't be handled by ANDI, shift the |
696 | // bit to be tested to the MSB and perform a signed compare with 0. |
697 | if (isIntEqualitySetCC(Code: CC) && isNullConstant(V: RHS) && |
698 | LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && |
699 | isa<ConstantSDNode>(Val: LHS.getOperand(i: 1))) { |
700 | uint64_t Mask = LHS.getConstantOperandVal(i: 1); |
701 | if ((isPowerOf2_64(Value: Mask) || isMask_64(Value: Mask)) && !isInt<12>(x: Mask)) { |
702 | unsigned ShAmt = 0; |
703 | if (isPowerOf2_64(Value: Mask)) { |
704 | CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
705 | ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Value: Mask); |
706 | } else { |
707 | ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Value: Mask); |
708 | } |
709 | |
710 | LHS = LHS.getOperand(i: 0); |
711 | if (ShAmt != 0) |
712 | LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: LHS.getValueType(), N1: LHS, |
713 | N2: DAG.getConstant(Val: ShAmt, DL, VT: LHS.getValueType())); |
714 | return; |
715 | } |
716 | } |
717 | |
718 | if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) { |
719 | int64_t C = RHSC->getSExtValue(); |
720 | switch (CC) { |
721 | default: |
722 | break; |
723 | case ISD::SETGT: |
724 | // Convert X > -1 to X >= 0. |
725 | if (C == -1) { |
726 | RHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType()); |
727 | CC = ISD::SETGE; |
728 | return; |
729 | } |
730 | break; |
731 | case ISD::SETLT: |
732 | // Convert X < 1 to 0 >= X. |
733 | if (C == 1) { |
734 | RHS = LHS; |
735 | LHS = DAG.getConstant(Val: 0, DL, VT: RHS.getValueType()); |
736 | CC = ISD::SETGE; |
737 | return; |
738 | } |
739 | break; |
740 | } |
741 | } |
742 | |
743 | switch (CC) { |
744 | default: |
745 | break; |
746 | case ISD::SETGT: |
747 | case ISD::SETLE: |
748 | case ISD::SETUGT: |
749 | case ISD::SETULE: |
750 | CC = ISD::getSetCCSwappedOperands(Operation: CC); |
751 | std::swap(a&: LHS, b&: RHS); |
752 | break; |
753 | } |
754 | } |
755 | |
756 | SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op, |
757 | SelectionDAG &DAG) const { |
758 | SDValue CondV = Op.getOperand(i: 0); |
759 | SDValue TrueV = Op.getOperand(i: 1); |
760 | SDValue FalseV = Op.getOperand(i: 2); |
761 | SDLoc DL(Op); |
762 | MVT VT = Op.getSimpleValueType(); |
763 | MVT GRLenVT = Subtarget.getGRLenVT(); |
764 | |
765 | if (SDValue V = combineSelectToBinOp(N: Op.getNode(), DAG, Subtarget)) |
766 | return V; |
767 | |
768 | if (Op.hasOneUse()) { |
769 | unsigned UseOpc = Op->user_begin()->getOpcode(); |
770 | if (isBinOp(Opcode: UseOpc) && DAG.isSafeToSpeculativelyExecute(Opcode: UseOpc)) { |
771 | SDNode *BinOp = *Op->user_begin(); |
772 | if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(BO: *Op->user_begin(), |
773 | DAG, Subtarget)) { |
774 | DAG.ReplaceAllUsesWith(From: BinOp, To: &NewSel); |
775 | // Opcode check is necessary because foldBinOpIntoSelectIfProfitable |
776 | // may return a constant node and cause crash in lowerSELECT. |
777 | if (NewSel.getOpcode() == ISD::SELECT) |
778 | return lowerSELECT(Op: NewSel, DAG); |
779 | return NewSel; |
780 | } |
781 | } |
782 | } |
783 | |
784 | // If the condition is not an integer SETCC which operates on GRLenVT, we need |
785 | // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.: |
786 | // (select condv, truev, falsev) |
787 | // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev) |
788 | if (CondV.getOpcode() != ISD::SETCC || |
789 | CondV.getOperand(i: 0).getSimpleValueType() != GRLenVT) { |
790 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: GRLenVT); |
791 | SDValue SetNE = DAG.getCondCode(Cond: ISD::SETNE); |
792 | |
793 | SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV}; |
794 | |
795 | return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops); |
796 | } |
797 | |
798 | // If the CondV is the output of a SETCC node which operates on GRLenVT |
799 | // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC |
800 | // to take advantage of the integer compare+branch instructions. i.e.: (select |
801 | // (setcc lhs, rhs, cc), truev, falsev) |
802 | // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev) |
803 | SDValue LHS = CondV.getOperand(i: 0); |
804 | SDValue RHS = CondV.getOperand(i: 1); |
805 | ISD::CondCode CCVal = cast<CondCodeSDNode>(Val: CondV.getOperand(i: 2))->get(); |
806 | |
807 | // Special case for a select of 2 constants that have a difference of 1. |
808 | // Normally this is done by DAGCombine, but if the select is introduced by |
809 | // type legalization or op legalization, we miss it. Restricting to SETLT |
810 | // case for now because that is what signed saturating add/sub need. |
811 | // FIXME: We don't need the condition to be SETLT or even a SETCC, |
812 | // but we would probably want to swap the true/false values if the condition |
813 | // is SETGE/SETLE to avoid an XORI. |
814 | if (isa<ConstantSDNode>(Val: TrueV) && isa<ConstantSDNode>(Val: FalseV) && |
815 | CCVal == ISD::SETLT) { |
816 | const APInt &TrueVal = TrueV->getAsAPIntVal(); |
817 | const APInt &FalseVal = FalseV->getAsAPIntVal(); |
818 | if (TrueVal - 1 == FalseVal) |
819 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: CondV, N2: FalseV); |
820 | if (TrueVal + 1 == FalseVal) |
821 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: FalseV, N2: CondV); |
822 | } |
823 | |
824 | translateSetCCForBranch(DL, LHS, RHS, CC&: CCVal, DAG); |
825 | // 1 < x ? x : 1 -> 0 < x ? x : 1 |
826 | if (isOneConstant(V: LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) && |
827 | RHS == TrueV && LHS == FalseV) { |
828 | LHS = DAG.getConstant(Val: 0, DL, VT); |
829 | // 0 <u x is the same as x != 0. |
830 | if (CCVal == ISD::SETULT) { |
831 | std::swap(a&: LHS, b&: RHS); |
832 | CCVal = ISD::SETNE; |
833 | } |
834 | } |
835 | |
836 | // x <s -1 ? x : -1 -> x <s 0 ? x : -1 |
837 | if (isAllOnesConstant(V: RHS) && CCVal == ISD::SETLT && LHS == TrueV && |
838 | RHS == FalseV) { |
839 | RHS = DAG.getConstant(Val: 0, DL, VT); |
840 | } |
841 | |
842 | SDValue TargetCC = DAG.getCondCode(Cond: CCVal); |
843 | |
844 | if (isa<ConstantSDNode>(Val: TrueV) && !isa<ConstantSDNode>(Val: FalseV)) { |
845 | // (select (setcc lhs, rhs, CC), constant, falsev) |
846 | // -> (select (setcc lhs, rhs, InverseCC), falsev, constant) |
847 | std::swap(a&: TrueV, b&: FalseV); |
848 | TargetCC = DAG.getCondCode(Cond: ISD::getSetCCInverse(Operation: CCVal, Type: LHS.getValueType())); |
849 | } |
850 | |
851 | SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; |
852 | return DAG.getNode(Opcode: LoongArchISD::SELECT_CC, DL, VT, Ops); |
853 | } |
854 | |
855 | SDValue |
856 | LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, |
857 | SelectionDAG &DAG) const { |
858 | SDLoc DL(Op); |
859 | MVT OpVT = Op.getSimpleValueType(); |
860 | |
861 | SDValue Vector = DAG.getUNDEF(VT: OpVT); |
862 | SDValue Val = Op.getOperand(i: 0); |
863 | SDValue Idx = DAG.getConstant(Val: 0, DL, VT: Subtarget.getGRLenVT()); |
864 | |
865 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: OpVT, N1: Vector, N2: Val, N3: Idx); |
866 | } |
867 | |
868 | SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op, |
869 | SelectionDAG &DAG) const { |
870 | EVT ResTy = Op->getValueType(ResNo: 0); |
871 | SDValue Src = Op->getOperand(Num: 0); |
872 | SDLoc DL(Op); |
873 | |
874 | EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
875 | unsigned int OrigEltNum = ResTy.getVectorNumElements(); |
876 | unsigned int NewEltNum = NewVT.getVectorNumElements(); |
877 | |
878 | SDValue NewSrc = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: NewVT, Operand: Src); |
879 | |
880 | SmallVector<SDValue, 8> Ops; |
881 | for (unsigned int i = 0; i < NewEltNum; i++) { |
882 | SDValue Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i64, N1: NewSrc, |
883 | N2: DAG.getConstant(Val: i, DL, VT: MVT::i64)); |
884 | unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8) |
885 | ? (unsigned)LoongArchISD::BITREV_8B |
886 | : (unsigned)ISD::BITREVERSE; |
887 | Ops.push_back(Elt: DAG.getNode(Opcode: RevOp, DL, VT: MVT::i64, Operand: Op)); |
888 | } |
889 | SDValue Res = |
890 | DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResTy, Operand: DAG.getBuildVector(VT: NewVT, DL, Ops)); |
891 | |
892 | switch (ResTy.getSimpleVT().SimpleTy) { |
893 | default: |
894 | return SDValue(); |
895 | case MVT::v16i8: |
896 | case MVT::v32i8: |
897 | return Res; |
898 | case MVT::v8i16: |
899 | case MVT::v16i16: |
900 | case MVT::v4i32: |
901 | case MVT::v8i32: { |
902 | SmallVector<int, 32> Mask; |
903 | for (unsigned int i = 0; i < NewEltNum; i++) |
904 | for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--) |
905 | Mask.push_back(Elt: j + (OrigEltNum / NewEltNum) * i); |
906 | return DAG.getVectorShuffle(VT: ResTy, dl: DL, N1: Res, N2: DAG.getUNDEF(VT: ResTy), Mask); |
907 | } |
908 | } |
909 | } |
910 | |
911 | // Widen element type to get a new mask value (if possible). |
912 | // For example: |
913 | // shufflevector <4 x i32> %a, <4 x i32> %b, |
914 | // <4 x i32> <i32 6, i32 7, i32 2, i32 3> |
915 | // is equivalent to: |
916 | // shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1> |
917 | // can be lowered to: |
918 | // VPACKOD_D vr0, vr0, vr1 |
919 | static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
920 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
921 | unsigned EltBits = VT.getScalarSizeInBits(); |
922 | |
923 | if (EltBits > 32 || EltBits == 1) |
924 | return SDValue(); |
925 | |
926 | SmallVector<int, 8> NewMask; |
927 | if (widenShuffleMaskElts(M: Mask, NewMask)) { |
928 | MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(BitWidth: EltBits * 2) |
929 | : MVT::getIntegerVT(BitWidth: EltBits * 2); |
930 | MVT NewVT = MVT::getVectorVT(VT: NewEltVT, NumElements: VT.getVectorNumElements() / 2); |
931 | if (DAG.getTargetLoweringInfo().isTypeLegal(VT: NewVT)) { |
932 | SDValue NewV1 = DAG.getBitcast(VT: NewVT, V: V1); |
933 | SDValue NewV2 = DAG.getBitcast(VT: NewVT, V: V2); |
934 | return DAG.getBitcast( |
935 | VT, V: DAG.getVectorShuffle(VT: NewVT, dl: DL, N1: NewV1, N2: NewV2, Mask: NewMask)); |
936 | } |
937 | } |
938 | |
939 | return SDValue(); |
940 | } |
941 | |
942 | /// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI |
943 | /// instruction. |
944 | // The funciton matches elements from one of the input vector shuffled to the |
945 | // left or right with zeroable elements 'shifted in'. It handles both the |
946 | // strictly bit-wise element shifts and the byte shfit across an entire 128-bit |
947 | // lane. |
948 | // Mostly copied from X86. |
949 | static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, |
950 | unsigned ScalarSizeInBits, ArrayRef<int> Mask, |
951 | int MaskOffset, const APInt &Zeroable) { |
952 | int Size = Mask.size(); |
953 | unsigned SizeInBits = Size * ScalarSizeInBits; |
954 | |
955 | auto CheckZeros = [&](int Shift, int Scale, bool Left) { |
956 | for (int i = 0; i < Size; i += Scale) |
957 | for (int j = 0; j < Shift; ++j) |
958 | if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) |
959 | return false; |
960 | |
961 | return true; |
962 | }; |
963 | |
964 | auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low, |
965 | int Step = 1) { |
966 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
967 | if (!(Mask[i] == -1 || Mask[i] == Low)) |
968 | return false; |
969 | return true; |
970 | }; |
971 | |
972 | auto MatchShift = [&](int Shift, int Scale, bool Left) { |
973 | for (int i = 0; i != Size; i += Scale) { |
974 | unsigned Pos = Left ? i + Shift : i; |
975 | unsigned Low = Left ? i : i + Shift; |
976 | unsigned Len = Scale - Shift; |
977 | if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset)) |
978 | return -1; |
979 | } |
980 | |
981 | int ShiftEltBits = ScalarSizeInBits * Scale; |
982 | bool ByteShift = ShiftEltBits > 64; |
983 | Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI) |
984 | : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI); |
985 | int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1); |
986 | |
987 | // Normalize the scale for byte shifts to still produce an i64 element |
988 | // type. |
989 | Scale = ByteShift ? Scale / 2 : Scale; |
990 | |
991 | // We need to round trip through the appropriate type for the shift. |
992 | MVT ShiftSVT = MVT::getIntegerVT(BitWidth: ScalarSizeInBits * Scale); |
993 | ShiftVT = ByteShift ? MVT::getVectorVT(VT: MVT::i8, NumElements: SizeInBits / 8) |
994 | : MVT::getVectorVT(VT: ShiftSVT, NumElements: Size / Scale); |
995 | return (int)ShiftAmt; |
996 | }; |
997 | |
998 | unsigned MaxWidth = 128; |
999 | for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2) |
1000 | for (int Shift = 1; Shift != Scale; ++Shift) |
1001 | for (bool Left : {true, false}) |
1002 | if (CheckZeros(Shift, Scale, Left)) { |
1003 | int ShiftAmt = MatchShift(Shift, Scale, Left); |
1004 | if (0 < ShiftAmt) |
1005 | return ShiftAmt; |
1006 | } |
1007 | |
1008 | // no match |
1009 | return -1; |
1010 | } |
1011 | |
1012 | /// Lower VECTOR_SHUFFLE as shift (if possible). |
1013 | /// |
1014 | /// For example: |
1015 | /// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer, |
1016 | /// <4 x i32> <i32 4, i32 0, i32 1, i32 2> |
1017 | /// is lowered to: |
1018 | /// (VBSLL_V $v0, $v0, 4) |
1019 | /// |
1020 | /// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer, |
1021 | /// <4 x i32> <i32 4, i32 0, i32 4, i32 2> |
1022 | /// is lowered to: |
1023 | /// (VSLLI_D $v0, $v0, 32) |
1024 | static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask, |
1025 | MVT VT, SDValue V1, SDValue V2, |
1026 | SelectionDAG &DAG, |
1027 | const APInt &Zeroable) { |
1028 | int Size = Mask.size(); |
1029 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size" ); |
1030 | |
1031 | MVT ShiftVT; |
1032 | SDValue V = V1; |
1033 | unsigned Opcode; |
1034 | |
1035 | // Try to match shuffle against V1 shift. |
1036 | int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(), |
1037 | Mask, MaskOffset: 0, Zeroable); |
1038 | |
1039 | // If V1 failed, try to match shuffle against V2 shift. |
1040 | if (ShiftAmt < 0) { |
1041 | ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, ScalarSizeInBits: VT.getScalarSizeInBits(), |
1042 | Mask, MaskOffset: Size, Zeroable); |
1043 | V = V2; |
1044 | } |
1045 | |
1046 | if (ShiftAmt < 0) |
1047 | return SDValue(); |
1048 | |
1049 | assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && |
1050 | "Illegal integer vector type" ); |
1051 | V = DAG.getBitcast(VT: ShiftVT, V); |
1052 | V = DAG.getNode(Opcode, DL, VT: ShiftVT, N1: V, |
1053 | N2: DAG.getConstant(Val: ShiftAmt, DL, VT: MVT::i64)); |
1054 | return DAG.getBitcast(VT, V); |
1055 | } |
1056 | |
1057 | /// Determine whether a range fits a regular pattern of values. |
1058 | /// This function accounts for the possibility of jumping over the End iterator. |
1059 | template <typename ValType> |
1060 | static bool |
1061 | fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, |
1062 | unsigned CheckStride, |
1063 | typename SmallVectorImpl<ValType>::const_iterator End, |
1064 | ValType ExpectedIndex, unsigned ExpectedIndexStride) { |
1065 | auto &I = Begin; |
1066 | |
1067 | while (I != End) { |
1068 | if (*I != -1 && *I != ExpectedIndex) |
1069 | return false; |
1070 | ExpectedIndex += ExpectedIndexStride; |
1071 | |
1072 | // Incrementing past End is undefined behaviour so we must increment one |
1073 | // step at a time and check for End at each step. |
1074 | for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) |
1075 | ; // Empty loop body. |
1076 | } |
1077 | return true; |
1078 | } |
1079 | |
1080 | /// Compute whether each element of a shuffle is zeroable. |
1081 | /// |
1082 | /// A "zeroable" vector shuffle element is one which can be lowered to zero. |
1083 | static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1, |
1084 | SDValue V2, APInt &KnownUndef, |
1085 | APInt &KnownZero) { |
1086 | int Size = Mask.size(); |
1087 | KnownUndef = KnownZero = APInt::getZero(numBits: Size); |
1088 | |
1089 | V1 = peekThroughBitcasts(V: V1); |
1090 | V2 = peekThroughBitcasts(V: V2); |
1091 | |
1092 | bool V1IsZero = ISD::isBuildVectorAllZeros(N: V1.getNode()); |
1093 | bool V2IsZero = ISD::isBuildVectorAllZeros(N: V2.getNode()); |
1094 | |
1095 | int VectorSizeInBits = V1.getValueSizeInBits(); |
1096 | int ScalarSizeInBits = VectorSizeInBits / Size; |
1097 | assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size" ); |
1098 | (void)ScalarSizeInBits; |
1099 | |
1100 | for (int i = 0; i < Size; ++i) { |
1101 | int M = Mask[i]; |
1102 | if (M < 0) { |
1103 | KnownUndef.setBit(i); |
1104 | continue; |
1105 | } |
1106 | if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { |
1107 | KnownZero.setBit(i); |
1108 | continue; |
1109 | } |
1110 | } |
1111 | } |
1112 | |
1113 | /// Test whether a shuffle mask is equivalent within each sub-lane. |
1114 | /// |
1115 | /// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is |
1116 | /// non-trivial to compute in the face of undef lanes. The representation is |
1117 | /// suitable for use with existing 128-bit shuffles as entries from the second |
1118 | /// vector have been remapped to [LaneSize, 2*LaneSize). |
1119 | static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, |
1120 | ArrayRef<int> Mask, |
1121 | SmallVectorImpl<int> &RepeatedMask) { |
1122 | auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); |
1123 | RepeatedMask.assign(NumElts: LaneSize, Elt: -1); |
1124 | int Size = Mask.size(); |
1125 | for (int i = 0; i < Size; ++i) { |
1126 | assert(Mask[i] == -1 || Mask[i] >= 0); |
1127 | if (Mask[i] < 0) |
1128 | continue; |
1129 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
1130 | // This entry crosses lanes, so there is no way to model this shuffle. |
1131 | return false; |
1132 | |
1133 | // Ok, handle the in-lane shuffles by detecting if and when they repeat. |
1134 | // Adjust second vector indices to start at LaneSize instead of Size. |
1135 | int LocalM = |
1136 | Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize; |
1137 | if (RepeatedMask[i % LaneSize] < 0) |
1138 | // This is the first non-undef entry in this slot of a 128-bit lane. |
1139 | RepeatedMask[i % LaneSize] = LocalM; |
1140 | else if (RepeatedMask[i % LaneSize] != LocalM) |
1141 | // Found a mismatch with the repeated mask. |
1142 | return false; |
1143 | } |
1144 | return true; |
1145 | } |
1146 | |
1147 | /// Attempts to match vector shuffle as byte rotation. |
1148 | static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, |
1149 | ArrayRef<int> Mask) { |
1150 | |
1151 | SDValue Lo, Hi; |
1152 | SmallVector<int, 16> RepeatedMask; |
1153 | |
1154 | if (!isRepeatedShuffleMask(LaneSizeInBits: 128, VT, Mask, RepeatedMask)) |
1155 | return -1; |
1156 | |
1157 | int NumElts = RepeatedMask.size(); |
1158 | int Rotation = 0; |
1159 | int Scale = 16 / NumElts; |
1160 | |
1161 | for (int i = 0; i < NumElts; ++i) { |
1162 | int M = RepeatedMask[i]; |
1163 | assert((M == -1 || (0 <= M && M < (2 * NumElts))) && |
1164 | "Unexpected mask index." ); |
1165 | if (M < 0) |
1166 | continue; |
1167 | |
1168 | // Determine where a rotated vector would have started. |
1169 | int StartIdx = i - (M % NumElts); |
1170 | if (StartIdx == 0) |
1171 | return -1; |
1172 | |
1173 | // If we found the tail of a vector the rotation must be the missing |
1174 | // front. If we found the head of a vector, it must be how much of the |
1175 | // head. |
1176 | int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx; |
1177 | |
1178 | if (Rotation == 0) |
1179 | Rotation = CandidateRotation; |
1180 | else if (Rotation != CandidateRotation) |
1181 | return -1; |
1182 | |
1183 | // Compute which value this mask is pointing at. |
1184 | SDValue MaskV = M < NumElts ? V1 : V2; |
1185 | |
1186 | // Compute which of the two target values this index should be assigned |
1187 | // to. This reflects whether the high elements are remaining or the low |
1188 | // elements are remaining. |
1189 | SDValue &TargetV = StartIdx < 0 ? Hi : Lo; |
1190 | |
1191 | // Either set up this value if we've not encountered it before, or check |
1192 | // that it remains consistent. |
1193 | if (!TargetV) |
1194 | TargetV = MaskV; |
1195 | else if (TargetV != MaskV) |
1196 | return -1; |
1197 | } |
1198 | |
1199 | // Check that we successfully analyzed the mask, and normalize the results. |
1200 | assert(Rotation != 0 && "Failed to locate a viable rotation!" ); |
1201 | assert((Lo || Hi) && "Failed to find a rotated input vector!" ); |
1202 | if (!Lo) |
1203 | Lo = Hi; |
1204 | else if (!Hi) |
1205 | Hi = Lo; |
1206 | |
1207 | V1 = Lo; |
1208 | V2 = Hi; |
1209 | |
1210 | return Rotation * Scale; |
1211 | } |
1212 | |
1213 | /// Lower VECTOR_SHUFFLE as byte rotate (if possible). |
1214 | /// |
1215 | /// For example: |
1216 | /// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, |
1217 | /// <2 x i32> <i32 3, i32 0> |
1218 | /// is lowered to: |
1219 | /// (VBSRL_V $v1, $v1, 8) |
1220 | /// (VBSLL_V $v0, $v0, 8) |
1221 | /// (VOR_V $v0, $V0, $v1) |
1222 | static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, |
1223 | ArrayRef<int> Mask, MVT VT, |
1224 | SDValue V1, SDValue V2, |
1225 | SelectionDAG &DAG) { |
1226 | |
1227 | SDValue Lo = V1, Hi = V2; |
1228 | int ByteRotation = matchShuffleAsByteRotate(VT, V1&: Lo, V2&: Hi, Mask); |
1229 | if (ByteRotation <= 0) |
1230 | return SDValue(); |
1231 | |
1232 | MVT ByteVT = MVT::getVectorVT(VT: MVT::i8, NumElements: VT.getSizeInBits() / 8); |
1233 | Lo = DAG.getBitcast(VT: ByteVT, V: Lo); |
1234 | Hi = DAG.getBitcast(VT: ByteVT, V: Hi); |
1235 | |
1236 | int LoByteShift = 16 - ByteRotation; |
1237 | int HiByteShift = ByteRotation; |
1238 | |
1239 | SDValue LoShift = DAG.getNode(Opcode: LoongArchISD::VBSLL, DL, VT: ByteVT, N1: Lo, |
1240 | N2: DAG.getConstant(Val: LoByteShift, DL, VT: MVT::i64)); |
1241 | SDValue HiShift = DAG.getNode(Opcode: LoongArchISD::VBSRL, DL, VT: ByteVT, N1: Hi, |
1242 | N2: DAG.getConstant(Val: HiByteShift, DL, VT: MVT::i64)); |
1243 | return DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::OR, DL, VT: ByteVT, N1: LoShift, N2: HiShift)); |
1244 | } |
1245 | |
1246 | /// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible). |
1247 | /// |
1248 | /// For example: |
1249 | /// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer, |
1250 | /// <4 x i32> <i32 0, i32 4, i32 1, i32 4> |
1251 | /// %3 = bitcast <4 x i32> %2 to <2 x i64> |
1252 | /// is lowered to: |
1253 | /// (VREPLI $v1, 0) |
1254 | /// (VILVL $v0, $v1, $v0) |
1255 | static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, |
1256 | ArrayRef<int> Mask, MVT VT, |
1257 | SDValue V1, SDValue V2, |
1258 | SelectionDAG &DAG, |
1259 | const APInt &Zeroable) { |
1260 | int Bits = VT.getSizeInBits(); |
1261 | int EltBits = VT.getScalarSizeInBits(); |
1262 | int NumElements = VT.getVectorNumElements(); |
1263 | |
1264 | if (Zeroable.isAllOnes()) |
1265 | return DAG.getConstant(Val: 0, DL, VT); |
1266 | |
1267 | // Define a helper function to check a particular ext-scale and lower to it if |
1268 | // valid. |
1269 | auto Lower = [&](int Scale) -> SDValue { |
1270 | SDValue InputV; |
1271 | bool AnyExt = true; |
1272 | int Offset = 0; |
1273 | for (int i = 0; i < NumElements; i++) { |
1274 | int M = Mask[i]; |
1275 | if (M < 0) |
1276 | continue; |
1277 | if (i % Scale != 0) { |
1278 | // Each of the extended elements need to be zeroable. |
1279 | if (!Zeroable[i]) |
1280 | return SDValue(); |
1281 | |
1282 | AnyExt = false; |
1283 | continue; |
1284 | } |
1285 | |
1286 | // Each of the base elements needs to be consecutive indices into the |
1287 | // same input vector. |
1288 | SDValue V = M < NumElements ? V1 : V2; |
1289 | M = M % NumElements; |
1290 | if (!InputV) { |
1291 | InputV = V; |
1292 | Offset = M - (i / Scale); |
1293 | |
1294 | // These offset can't be handled |
1295 | if (Offset % (NumElements / Scale)) |
1296 | return SDValue(); |
1297 | } else if (InputV != V) |
1298 | return SDValue(); |
1299 | |
1300 | if (M != (Offset + (i / Scale))) |
1301 | return SDValue(); // Non-consecutive strided elements. |
1302 | } |
1303 | |
1304 | // If we fail to find an input, we have a zero-shuffle which should always |
1305 | // have already been handled. |
1306 | if (!InputV) |
1307 | return SDValue(); |
1308 | |
1309 | do { |
1310 | unsigned VilVLoHi = LoongArchISD::VILVL; |
1311 | if (Offset >= (NumElements / 2)) { |
1312 | VilVLoHi = LoongArchISD::VILVH; |
1313 | Offset -= (NumElements / 2); |
1314 | } |
1315 | |
1316 | MVT InputVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: EltBits), NumElements); |
1317 | SDValue Ext = |
1318 | AnyExt ? DAG.getFreeze(V: InputV) : DAG.getConstant(Val: 0, DL, VT: InputVT); |
1319 | InputV = DAG.getBitcast(VT: InputVT, V: InputV); |
1320 | InputV = DAG.getNode(Opcode: VilVLoHi, DL, VT: InputVT, N1: Ext, N2: InputV); |
1321 | Scale /= 2; |
1322 | EltBits *= 2; |
1323 | NumElements /= 2; |
1324 | } while (Scale > 1); |
1325 | return DAG.getBitcast(VT, V: InputV); |
1326 | }; |
1327 | |
1328 | // Each iteration, try extending the elements half as much, but into twice as |
1329 | // many elements. |
1330 | for (int NumExtElements = Bits / 64; NumExtElements < NumElements; |
1331 | NumExtElements *= 2) { |
1332 | if (SDValue V = Lower(NumElements / NumExtElements)) |
1333 | return V; |
1334 | } |
1335 | return SDValue(); |
1336 | } |
1337 | |
1338 | /// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). |
1339 | /// |
1340 | /// VREPLVEI performs vector broadcast based on an element specified by an |
1341 | /// integer immediate, with its mask being similar to: |
1342 | /// <x, x, x, ...> |
1343 | /// where x is any valid index. |
1344 | /// |
1345 | /// When undef's appear in the mask they are treated as if they were whatever |
1346 | /// value is necessary in order to fit the above form. |
1347 | static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, |
1348 | MVT VT, SDValue V1, SDValue V2, |
1349 | SelectionDAG &DAG) { |
1350 | int SplatIndex = -1; |
1351 | for (const auto &M : Mask) { |
1352 | if (M != -1) { |
1353 | SplatIndex = M; |
1354 | break; |
1355 | } |
1356 | } |
1357 | |
1358 | if (SplatIndex == -1) |
1359 | return DAG.getUNDEF(VT); |
1360 | |
1361 | assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index" ); |
1362 | if (fitsRegularPattern<int>(Begin: Mask.begin(), CheckStride: 1, End: Mask.end(), ExpectedIndex: SplatIndex, ExpectedIndexStride: 0)) { |
1363 | APInt Imm(64, SplatIndex); |
1364 | return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1, |
1365 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
1366 | } |
1367 | |
1368 | return SDValue(); |
1369 | } |
1370 | |
1371 | /// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). |
1372 | /// |
1373 | /// VSHUF4I splits the vector into blocks of four elements, then shuffles these |
1374 | /// elements according to a <4 x i2> constant (encoded as an integer immediate). |
1375 | /// |
1376 | /// It is therefore possible to lower into VSHUF4I when the mask takes the form: |
1377 | /// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> |
1378 | /// When undef's appear they are treated as if they were whatever value is |
1379 | /// necessary in order to fit the above forms. |
1380 | /// |
1381 | /// For example: |
1382 | /// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, |
1383 | /// <8 x i32> <i32 3, i32 2, i32 1, i32 0, |
1384 | /// i32 7, i32 6, i32 5, i32 4> |
1385 | /// is lowered to: |
1386 | /// (VSHUF4I_H $v0, $v1, 27) |
1387 | /// where the 27 comes from: |
1388 | /// 3 + (2 << 2) + (1 << 4) + (0 << 6) |
1389 | static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, |
1390 | MVT VT, SDValue V1, SDValue V2, |
1391 | SelectionDAG &DAG) { |
1392 | |
1393 | unsigned SubVecSize = 4; |
1394 | if (VT == MVT::v2f64 || VT == MVT::v2i64) |
1395 | SubVecSize = 2; |
1396 | |
1397 | int SubMask[4] = {-1, -1, -1, -1}; |
1398 | for (unsigned i = 0; i < SubVecSize; ++i) { |
1399 | for (unsigned j = i; j < Mask.size(); j += SubVecSize) { |
1400 | int M = Mask[j]; |
1401 | |
1402 | // Convert from vector index to 4-element subvector index |
1403 | // If an index refers to an element outside of the subvector then give up |
1404 | if (M != -1) { |
1405 | M -= 4 * (j / SubVecSize); |
1406 | if (M < 0 || M >= 4) |
1407 | return SDValue(); |
1408 | } |
1409 | |
1410 | // If the mask has an undef, replace it with the current index. |
1411 | // Note that it might still be undef if the current index is also undef |
1412 | if (SubMask[i] == -1) |
1413 | SubMask[i] = M; |
1414 | // Check that non-undef values are the same as in the mask. If they |
1415 | // aren't then give up |
1416 | else if (M != -1 && M != SubMask[i]) |
1417 | return SDValue(); |
1418 | } |
1419 | } |
1420 | |
1421 | // Calculate the immediate. Replace any remaining undefs with zero |
1422 | APInt Imm(64, 0); |
1423 | for (int i = SubVecSize - 1; i >= 0; --i) { |
1424 | int M = SubMask[i]; |
1425 | |
1426 | if (M == -1) |
1427 | M = 0; |
1428 | |
1429 | Imm <<= 2; |
1430 | Imm |= M & 0x3; |
1431 | } |
1432 | |
1433 | // Return vshuf4i.d |
1434 | if (VT == MVT::v2f64 || VT == MVT::v2i64) |
1435 | return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1, N2: V2, |
1436 | N3: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
1437 | |
1438 | return DAG.getNode(Opcode: LoongArchISD::VSHUF4I, DL, VT, N1: V1, |
1439 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
1440 | } |
1441 | |
1442 | /// Lower VECTOR_SHUFFLE into VPACKEV (if possible). |
1443 | /// |
1444 | /// VPACKEV interleaves the even elements from each vector. |
1445 | /// |
1446 | /// It is possible to lower into VPACKEV when the mask consists of two of the |
1447 | /// following forms interleaved: |
1448 | /// <0, 2, 4, ...> |
1449 | /// <n, n+2, n+4, ...> |
1450 | /// where n is the number of elements in the vector. |
1451 | /// For example: |
1452 | /// <0, 0, 2, 2, 4, 4, ...> |
1453 | /// <0, n, 2, n+2, 4, n+4, ...> |
1454 | /// |
1455 | /// When undef's appear in the mask they are treated as if they were whatever |
1456 | /// value is necessary in order to fit the above forms. |
1457 | static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask, |
1458 | MVT VT, SDValue V1, SDValue V2, |
1459 | SelectionDAG &DAG) { |
1460 | |
1461 | const auto &Begin = Mask.begin(); |
1462 | const auto &End = Mask.end(); |
1463 | SDValue OriV1 = V1, OriV2 = V2; |
1464 | |
1465 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
1466 | V1 = OriV1; |
1467 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
1468 | V1 = OriV2; |
1469 | else |
1470 | return SDValue(); |
1471 | |
1472 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
1473 | V2 = OriV1; |
1474 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
1475 | V2 = OriV2; |
1476 | else |
1477 | return SDValue(); |
1478 | |
1479 | return DAG.getNode(Opcode: LoongArchISD::VPACKEV, DL, VT, N1: V2, N2: V1); |
1480 | } |
1481 | |
1482 | /// Lower VECTOR_SHUFFLE into VPACKOD (if possible). |
1483 | /// |
1484 | /// VPACKOD interleaves the odd elements from each vector. |
1485 | /// |
1486 | /// It is possible to lower into VPACKOD when the mask consists of two of the |
1487 | /// following forms interleaved: |
1488 | /// <1, 3, 5, ...> |
1489 | /// <n+1, n+3, n+5, ...> |
1490 | /// where n is the number of elements in the vector. |
1491 | /// For example: |
1492 | /// <1, 1, 3, 3, 5, 5, ...> |
1493 | /// <1, n+1, 3, n+3, 5, n+5, ...> |
1494 | /// |
1495 | /// When undef's appear in the mask they are treated as if they were whatever |
1496 | /// value is necessary in order to fit the above forms. |
1497 | static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask, |
1498 | MVT VT, SDValue V1, SDValue V2, |
1499 | SelectionDAG &DAG) { |
1500 | |
1501 | const auto &Begin = Mask.begin(); |
1502 | const auto &End = Mask.end(); |
1503 | SDValue OriV1 = V1, OriV2 = V2; |
1504 | |
1505 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
1506 | V1 = OriV1; |
1507 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
1508 | V1 = OriV2; |
1509 | else |
1510 | return SDValue(); |
1511 | |
1512 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
1513 | V2 = OriV1; |
1514 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
1515 | V2 = OriV2; |
1516 | else |
1517 | return SDValue(); |
1518 | |
1519 | return DAG.getNode(Opcode: LoongArchISD::VPACKOD, DL, VT, N1: V2, N2: V1); |
1520 | } |
1521 | |
1522 | /// Lower VECTOR_SHUFFLE into VILVH (if possible). |
1523 | /// |
1524 | /// VILVH interleaves consecutive elements from the left (highest-indexed) half |
1525 | /// of each vector. |
1526 | /// |
1527 | /// It is possible to lower into VILVH when the mask consists of two of the |
1528 | /// following forms interleaved: |
1529 | /// <x, x+1, x+2, ...> |
1530 | /// <n+x, n+x+1, n+x+2, ...> |
1531 | /// where n is the number of elements in the vector and x is half n. |
1532 | /// For example: |
1533 | /// <x, x, x+1, x+1, x+2, x+2, ...> |
1534 | /// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> |
1535 | /// |
1536 | /// When undef's appear in the mask they are treated as if they were whatever |
1537 | /// value is necessary in order to fit the above forms. |
1538 | static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask, |
1539 | MVT VT, SDValue V1, SDValue V2, |
1540 | SelectionDAG &DAG) { |
1541 | |
1542 | const auto &Begin = Mask.begin(); |
1543 | const auto &End = Mask.end(); |
1544 | unsigned HalfSize = Mask.size() / 2; |
1545 | SDValue OriV1 = V1, OriV2 = V2; |
1546 | |
1547 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
1548 | V1 = OriV1; |
1549 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
1550 | V1 = OriV2; |
1551 | else |
1552 | return SDValue(); |
1553 | |
1554 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
1555 | V2 = OriV1; |
1556 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size() + HalfSize, |
1557 | ExpectedIndexStride: 1)) |
1558 | V2 = OriV2; |
1559 | else |
1560 | return SDValue(); |
1561 | |
1562 | return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1); |
1563 | } |
1564 | |
1565 | /// Lower VECTOR_SHUFFLE into VILVL (if possible). |
1566 | /// |
1567 | /// VILVL interleaves consecutive elements from the right (lowest-indexed) half |
1568 | /// of each vector. |
1569 | /// |
1570 | /// It is possible to lower into VILVL when the mask consists of two of the |
1571 | /// following forms interleaved: |
1572 | /// <0, 1, 2, ...> |
1573 | /// <n, n+1, n+2, ...> |
1574 | /// where n is the number of elements in the vector. |
1575 | /// For example: |
1576 | /// <0, 0, 1, 1, 2, 2, ...> |
1577 | /// <0, n, 1, n+1, 2, n+2, ...> |
1578 | /// |
1579 | /// When undef's appear in the mask they are treated as if they were whatever |
1580 | /// value is necessary in order to fit the above forms. |
1581 | static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask, |
1582 | MVT VT, SDValue V1, SDValue V2, |
1583 | SelectionDAG &DAG) { |
1584 | |
1585 | const auto &Begin = Mask.begin(); |
1586 | const auto &End = Mask.end(); |
1587 | SDValue OriV1 = V1, OriV2 = V2; |
1588 | |
1589 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
1590 | V1 = OriV1; |
1591 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1)) |
1592 | V1 = OriV2; |
1593 | else |
1594 | return SDValue(); |
1595 | |
1596 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
1597 | V2 = OriV1; |
1598 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1)) |
1599 | V2 = OriV2; |
1600 | else |
1601 | return SDValue(); |
1602 | |
1603 | return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1); |
1604 | } |
1605 | |
1606 | /// Lower VECTOR_SHUFFLE into VPICKEV (if possible). |
1607 | /// |
1608 | /// VPICKEV copies the even elements of each vector into the result vector. |
1609 | /// |
1610 | /// It is possible to lower into VPICKEV when the mask consists of two of the |
1611 | /// following forms concatenated: |
1612 | /// <0, 2, 4, ...> |
1613 | /// <n, n+2, n+4, ...> |
1614 | /// where n is the number of elements in the vector. |
1615 | /// For example: |
1616 | /// <0, 2, 4, ..., 0, 2, 4, ...> |
1617 | /// <0, 2, 4, ..., n, n+2, n+4, ...> |
1618 | /// |
1619 | /// When undef's appear in the mask they are treated as if they were whatever |
1620 | /// value is necessary in order to fit the above forms. |
1621 | static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask, |
1622 | MVT VT, SDValue V1, SDValue V2, |
1623 | SelectionDAG &DAG) { |
1624 | |
1625 | const auto &Begin = Mask.begin(); |
1626 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1627 | const auto &End = Mask.end(); |
1628 | SDValue OriV1 = V1, OriV2 = V2; |
1629 | |
1630 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
1631 | V1 = OriV1; |
1632 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
1633 | V1 = OriV2; |
1634 | else |
1635 | return SDValue(); |
1636 | |
1637 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
1638 | V2 = OriV1; |
1639 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2)) |
1640 | V2 = OriV2; |
1641 | |
1642 | else |
1643 | return SDValue(); |
1644 | |
1645 | return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1); |
1646 | } |
1647 | |
1648 | /// Lower VECTOR_SHUFFLE into VPICKOD (if possible). |
1649 | /// |
1650 | /// VPICKOD copies the odd elements of each vector into the result vector. |
1651 | /// |
1652 | /// It is possible to lower into VPICKOD when the mask consists of two of the |
1653 | /// following forms concatenated: |
1654 | /// <1, 3, 5, ...> |
1655 | /// <n+1, n+3, n+5, ...> |
1656 | /// where n is the number of elements in the vector. |
1657 | /// For example: |
1658 | /// <1, 3, 5, ..., 1, 3, 5, ...> |
1659 | /// <1, 3, 5, ..., n+1, n+3, n+5, ...> |
1660 | /// |
1661 | /// When undef's appear in the mask they are treated as if they were whatever |
1662 | /// value is necessary in order to fit the above forms. |
1663 | static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask, |
1664 | MVT VT, SDValue V1, SDValue V2, |
1665 | SelectionDAG &DAG) { |
1666 | |
1667 | const auto &Begin = Mask.begin(); |
1668 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1669 | const auto &End = Mask.end(); |
1670 | SDValue OriV1 = V1, OriV2 = V2; |
1671 | |
1672 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
1673 | V1 = OriV1; |
1674 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
1675 | V1 = OriV2; |
1676 | else |
1677 | return SDValue(); |
1678 | |
1679 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
1680 | V2 = OriV1; |
1681 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2)) |
1682 | V2 = OriV2; |
1683 | else |
1684 | return SDValue(); |
1685 | |
1686 | return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1); |
1687 | } |
1688 | |
1689 | /// Lower VECTOR_SHUFFLE into VSHUF. |
1690 | /// |
1691 | /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and |
1692 | /// adding it as an operand to the resulting VSHUF. |
1693 | static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask, |
1694 | MVT VT, SDValue V1, SDValue V2, |
1695 | SelectionDAG &DAG) { |
1696 | |
1697 | SmallVector<SDValue, 16> Ops; |
1698 | for (auto M : Mask) |
1699 | Ops.push_back(Elt: DAG.getConstant(Val: M, DL, VT: MVT::i64)); |
1700 | |
1701 | EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); |
1702 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops); |
1703 | |
1704 | // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. |
1705 | // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> |
1706 | // VSHF concatenates the vectors in a bitwise fashion: |
1707 | // <0b00, 0b01> + <0b10, 0b11> -> |
1708 | // 0b0100 + 0b1110 -> 0b01001110 |
1709 | // <0b10, 0b11, 0b00, 0b01> |
1710 | // We must therefore swap the operands to get the correct result. |
1711 | return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1); |
1712 | } |
1713 | |
1714 | /// Dispatching routine to lower various 128-bit LoongArch vector shuffles. |
1715 | /// |
1716 | /// This routine breaks down the specific type of 128-bit shuffle and |
1717 | /// dispatches to the lowering routines accordingly. |
1718 | static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
1719 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
1720 | assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || |
1721 | VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || |
1722 | VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && |
1723 | "Vector type is unsupported for lsx!" ); |
1724 | assert(V1.getSimpleValueType() == V2.getSimpleValueType() && |
1725 | "Two operands have different types!" ); |
1726 | assert(VT.getVectorNumElements() == Mask.size() && |
1727 | "Unexpected mask size for shuffle!" ); |
1728 | assert(Mask.size() % 2 == 0 && "Expected even mask size." ); |
1729 | |
1730 | APInt KnownUndef, KnownZero; |
1731 | computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero); |
1732 | APInt Zeroable = KnownUndef | KnownZero; |
1733 | |
1734 | SDValue Result; |
1735 | // TODO: Add more comparison patterns. |
1736 | if (V2.isUndef()) { |
1737 | if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) |
1738 | return Result; |
1739 | if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) |
1740 | return Result; |
1741 | |
1742 | // TODO: This comment may be enabled in the future to better match the |
1743 | // pattern for instruction selection. |
1744 | /* V2 = V1; */ |
1745 | } |
1746 | |
1747 | // It is recommended not to change the pattern comparison order for better |
1748 | // performance. |
1749 | if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) |
1750 | return Result; |
1751 | if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) |
1752 | return Result; |
1753 | if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) |
1754 | return Result; |
1755 | if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) |
1756 | return Result; |
1757 | if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) |
1758 | return Result; |
1759 | if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) |
1760 | return Result; |
1761 | if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) && |
1762 | (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) |
1763 | return Result; |
1764 | if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG, |
1765 | Zeroable))) |
1766 | return Result; |
1767 | if ((Result = |
1768 | lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable))) |
1769 | return Result; |
1770 | if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG))) |
1771 | return Result; |
1772 | if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) |
1773 | return NewShuffle; |
1774 | if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) |
1775 | return Result; |
1776 | return SDValue(); |
1777 | } |
1778 | |
1779 | /// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). |
1780 | /// |
1781 | /// It is a XVREPLVEI when the mask is: |
1782 | /// <x, x, x, ..., x+n, x+n, x+n, ...> |
1783 | /// where the number of x is equal to n and n is half the length of vector. |
1784 | /// |
1785 | /// When undef's appear in the mask they are treated as if they were whatever |
1786 | /// value is necessary in order to fit the above form. |
1787 | static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, |
1788 | ArrayRef<int> Mask, MVT VT, |
1789 | SDValue V1, SDValue V2, |
1790 | SelectionDAG &DAG) { |
1791 | int SplatIndex = -1; |
1792 | for (const auto &M : Mask) { |
1793 | if (M != -1) { |
1794 | SplatIndex = M; |
1795 | break; |
1796 | } |
1797 | } |
1798 | |
1799 | if (SplatIndex == -1) |
1800 | return DAG.getUNDEF(VT); |
1801 | |
1802 | const auto &Begin = Mask.begin(); |
1803 | const auto &End = Mask.end(); |
1804 | unsigned HalfSize = Mask.size() / 2; |
1805 | |
1806 | assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index" ); |
1807 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: End - HalfSize, ExpectedIndex: SplatIndex, ExpectedIndexStride: 0) && |
1808 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 1, End, ExpectedIndex: SplatIndex + HalfSize, |
1809 | ExpectedIndexStride: 0)) { |
1810 | APInt Imm(64, SplatIndex); |
1811 | return DAG.getNode(Opcode: LoongArchISD::VREPLVEI, DL, VT, N1: V1, |
1812 | N2: DAG.getConstant(Val: Imm, DL, VT: MVT::i64)); |
1813 | } |
1814 | |
1815 | return SDValue(); |
1816 | } |
1817 | |
1818 | /// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). |
1819 | static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, |
1820 | MVT VT, SDValue V1, SDValue V2, |
1821 | SelectionDAG &DAG) { |
1822 | // When the size is less than or equal to 4, lower cost instructions may be |
1823 | // used. |
1824 | if (Mask.size() <= 4) |
1825 | return SDValue(); |
1826 | return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); |
1827 | } |
1828 | |
1829 | /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). |
1830 | static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask, |
1831 | MVT VT, SDValue V1, SDValue V2, |
1832 | SelectionDAG &DAG) { |
1833 | return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); |
1834 | } |
1835 | |
1836 | /// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). |
1837 | static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask, |
1838 | MVT VT, SDValue V1, SDValue V2, |
1839 | SelectionDAG &DAG) { |
1840 | return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); |
1841 | } |
1842 | |
1843 | /// Lower VECTOR_SHUFFLE into XVILVH (if possible). |
1844 | static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask, |
1845 | MVT VT, SDValue V1, SDValue V2, |
1846 | SelectionDAG &DAG) { |
1847 | |
1848 | const auto &Begin = Mask.begin(); |
1849 | const auto &End = Mask.end(); |
1850 | unsigned HalfSize = Mask.size() / 2; |
1851 | unsigned LeftSize = HalfSize / 2; |
1852 | SDValue OriV1 = V1, OriV2 = V2; |
1853 | |
1854 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize, |
1855 | ExpectedIndexStride: 1) && |
1856 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, ExpectedIndexStride: 1)) |
1857 | V1 = OriV1; |
1858 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, |
1859 | ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) && |
1860 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, |
1861 | ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1)) |
1862 | V1 = OriV2; |
1863 | else |
1864 | return SDValue(); |
1865 | |
1866 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: HalfSize - LeftSize, |
1867 | ExpectedIndexStride: 1) && |
1868 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize + LeftSize, |
1869 | ExpectedIndexStride: 1)) |
1870 | V2 = OriV1; |
1871 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, |
1872 | ExpectedIndex: Mask.size() + HalfSize - LeftSize, ExpectedIndexStride: 1) && |
1873 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, |
1874 | ExpectedIndex: Mask.size() + HalfSize + LeftSize, ExpectedIndexStride: 1)) |
1875 | V2 = OriV2; |
1876 | else |
1877 | return SDValue(); |
1878 | |
1879 | return DAG.getNode(Opcode: LoongArchISD::VILVH, DL, VT, N1: V2, N2: V1); |
1880 | } |
1881 | |
1882 | /// Lower VECTOR_SHUFFLE into XVILVL (if possible). |
1883 | static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask, |
1884 | MVT VT, SDValue V1, SDValue V2, |
1885 | SelectionDAG &DAG) { |
1886 | |
1887 | const auto &Begin = Mask.begin(); |
1888 | const auto &End = Mask.end(); |
1889 | unsigned HalfSize = Mask.size() / 2; |
1890 | SDValue OriV1 = V1, OriV2 = V2; |
1891 | |
1892 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) && |
1893 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
1894 | V1 = OriV1; |
1895 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), ExpectedIndexStride: 1) && |
1896 | fitsRegularPattern<int>(Begin: Begin + HalfSize, CheckStride: 2, End, |
1897 | ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
1898 | V1 = OriV2; |
1899 | else |
1900 | return SDValue(); |
1901 | |
1902 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: 0, ExpectedIndexStride: 1) && |
1903 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
1904 | V2 = OriV1; |
1905 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End: End - HalfSize, ExpectedIndex: Mask.size(), |
1906 | ExpectedIndexStride: 1) && |
1907 | fitsRegularPattern<int>(Begin: Begin + 1 + HalfSize, CheckStride: 2, End, |
1908 | ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 1)) |
1909 | V2 = OriV2; |
1910 | else |
1911 | return SDValue(); |
1912 | |
1913 | return DAG.getNode(Opcode: LoongArchISD::VILVL, DL, VT, N1: V2, N2: V1); |
1914 | } |
1915 | |
1916 | /// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). |
1917 | static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask, |
1918 | MVT VT, SDValue V1, SDValue V2, |
1919 | SelectionDAG &DAG) { |
1920 | |
1921 | const auto &Begin = Mask.begin(); |
1922 | const auto &LeftMid = Mask.begin() + Mask.size() / 4; |
1923 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1924 | const auto &RightMid = Mask.end() - Mask.size() / 4; |
1925 | const auto &End = Mask.end(); |
1926 | unsigned HalfSize = Mask.size() / 2; |
1927 | SDValue OriV1 = V1, OriV2 = V2; |
1928 | |
1929 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 0, ExpectedIndexStride: 2) && |
1930 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize, ExpectedIndexStride: 2)) |
1931 | V1 = OriV1; |
1932 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) && |
1933 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2)) |
1934 | V1 = OriV2; |
1935 | else |
1936 | return SDValue(); |
1937 | |
1938 | if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2) && |
1939 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 2)) |
1940 | V2 = OriV1; |
1941 | else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size(), ExpectedIndexStride: 2) && |
1942 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize, ExpectedIndexStride: 2)) |
1943 | V2 = OriV2; |
1944 | |
1945 | else |
1946 | return SDValue(); |
1947 | |
1948 | return DAG.getNode(Opcode: LoongArchISD::VPICKEV, DL, VT, N1: V2, N2: V1); |
1949 | } |
1950 | |
1951 | /// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). |
1952 | static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask, |
1953 | MVT VT, SDValue V1, SDValue V2, |
1954 | SelectionDAG &DAG) { |
1955 | |
1956 | const auto &Begin = Mask.begin(); |
1957 | const auto &LeftMid = Mask.begin() + Mask.size() / 4; |
1958 | const auto &Mid = Mask.begin() + Mask.size() / 2; |
1959 | const auto &RightMid = Mask.end() - Mask.size() / 4; |
1960 | const auto &End = Mask.end(); |
1961 | unsigned HalfSize = Mask.size() / 2; |
1962 | SDValue OriV1 = V1, OriV2 = V2; |
1963 | |
1964 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: 1, ExpectedIndexStride: 2) && |
1965 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2)) |
1966 | V1 = OriV1; |
1967 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: LeftMid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) && |
1968 | fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End: RightMid, ExpectedIndex: Mask.size() + HalfSize + 1, |
1969 | ExpectedIndexStride: 2)) |
1970 | V1 = OriV2; |
1971 | else |
1972 | return SDValue(); |
1973 | |
1974 | if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2) && |
1975 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: HalfSize + 1, ExpectedIndexStride: 2)) |
1976 | V2 = OriV1; |
1977 | else if (fitsRegularPattern<int>(Begin: LeftMid, CheckStride: 1, End: Mid, ExpectedIndex: Mask.size() + 1, ExpectedIndexStride: 2) && |
1978 | fitsRegularPattern<int>(Begin: RightMid, CheckStride: 1, End, ExpectedIndex: Mask.size() + HalfSize + 1, |
1979 | ExpectedIndexStride: 2)) |
1980 | V2 = OriV2; |
1981 | else |
1982 | return SDValue(); |
1983 | |
1984 | return DAG.getNode(Opcode: LoongArchISD::VPICKOD, DL, VT, N1: V2, N2: V1); |
1985 | } |
1986 | |
1987 | /// Lower VECTOR_SHUFFLE into XVSHUF (if possible). |
1988 | static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask, |
1989 | MVT VT, SDValue V1, SDValue V2, |
1990 | SelectionDAG &DAG) { |
1991 | |
1992 | int MaskSize = Mask.size(); |
1993 | int HalfSize = Mask.size() / 2; |
1994 | const auto &Begin = Mask.begin(); |
1995 | const auto &Mid = Mask.begin() + HalfSize; |
1996 | const auto &End = Mask.end(); |
1997 | |
1998 | // VECTOR_SHUFFLE concatenates the vectors: |
1999 | // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> |
2000 | // shuffling -> |
2001 | // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> |
2002 | // |
2003 | // XVSHUF concatenates the vectors: |
2004 | // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7> |
2005 | // shuffling -> |
2006 | // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7> |
2007 | SmallVector<SDValue, 8> MaskAlloc; |
2008 | for (auto it = Begin; it < Mid; it++) { |
2009 | if (*it < 0) // UNDEF |
2010 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64)); |
2011 | else if ((*it >= 0 && *it < HalfSize) || |
2012 | (*it >= MaskSize && *it < MaskSize + HalfSize)) { |
2013 | int M = *it < HalfSize ? *it : *it - HalfSize; |
2014 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64)); |
2015 | } else |
2016 | return SDValue(); |
2017 | } |
2018 | assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!" ); |
2019 | |
2020 | for (auto it = Mid; it < End; it++) { |
2021 | if (*it < 0) // UNDEF |
2022 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: 0, DL, VT: MVT::i64)); |
2023 | else if ((*it >= HalfSize && *it < MaskSize) || |
2024 | (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { |
2025 | int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; |
2026 | MaskAlloc.push_back(Elt: DAG.getTargetConstant(Val: M, DL, VT: MVT::i64)); |
2027 | } else |
2028 | return SDValue(); |
2029 | } |
2030 | assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!" ); |
2031 | |
2032 | EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); |
2033 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops: MaskAlloc); |
2034 | return DAG.getNode(Opcode: LoongArchISD::VSHUF, DL, VT, N1: MaskVec, N2: V2, N3: V1); |
2035 | } |
2036 | |
2037 | /// Shuffle vectors by lane to generate more optimized instructions. |
2038 | /// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. |
2039 | /// |
2040 | /// Therefore, except for the following four cases, other cases are regarded |
2041 | /// as cross-lane shuffles, where optimization is relatively limited. |
2042 | /// |
2043 | /// - Shuffle high, low lanes of two inputs vector |
2044 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> |
2045 | /// - Shuffle low, high lanes of two inputs vector |
2046 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> |
2047 | /// - Shuffle low, low lanes of two inputs vector |
2048 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> |
2049 | /// - Shuffle high, high lanes of two inputs vector |
2050 | /// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> |
2051 | /// |
2052 | /// The first case is the closest to LoongArch instructions and the other |
2053 | /// cases need to be converted to it for processing. |
2054 | /// |
2055 | /// This function may modify V1, V2 and Mask |
2056 | static void canonicalizeShuffleVectorByLane(const SDLoc &DL, |
2057 | MutableArrayRef<int> Mask, MVT VT, |
2058 | SDValue &V1, SDValue &V2, |
2059 | SelectionDAG &DAG) { |
2060 | |
2061 | enum HalfMaskType { HighLaneTy, LowLaneTy, None }; |
2062 | |
2063 | int MaskSize = Mask.size(); |
2064 | int HalfSize = Mask.size() / 2; |
2065 | |
2066 | HalfMaskType preMask = None, postMask = None; |
2067 | |
2068 | if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) { |
2069 | return M < 0 || (M >= 0 && M < HalfSize) || |
2070 | (M >= MaskSize && M < MaskSize + HalfSize); |
2071 | })) |
2072 | preMask = HighLaneTy; |
2073 | else if (std::all_of(first: Mask.begin(), last: Mask.begin() + HalfSize, pred: [&](int M) { |
2074 | return M < 0 || (M >= HalfSize && M < MaskSize) || |
2075 | (M >= MaskSize + HalfSize && M < MaskSize * 2); |
2076 | })) |
2077 | preMask = LowLaneTy; |
2078 | |
2079 | if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) { |
2080 | return M < 0 || (M >= 0 && M < HalfSize) || |
2081 | (M >= MaskSize && M < MaskSize + HalfSize); |
2082 | })) |
2083 | postMask = HighLaneTy; |
2084 | else if (std::all_of(first: Mask.begin() + HalfSize, last: Mask.end(), pred: [&](int M) { |
2085 | return M < 0 || (M >= HalfSize && M < MaskSize) || |
2086 | (M >= MaskSize + HalfSize && M < MaskSize * 2); |
2087 | })) |
2088 | postMask = LowLaneTy; |
2089 | |
2090 | // The pre-half of mask is high lane type, and the post-half of mask |
2091 | // is low lane type, which is closest to the LoongArch instructions. |
2092 | // |
2093 | // Note: In the LoongArch architecture, the high lane of mask corresponds |
2094 | // to the lower 128-bit of vector register, and the low lane of mask |
2095 | // corresponds the higher 128-bit of vector register. |
2096 | if (preMask == HighLaneTy && postMask == LowLaneTy) { |
2097 | return; |
2098 | } |
2099 | if (preMask == LowLaneTy && postMask == HighLaneTy) { |
2100 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
2101 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
2102 | N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64)); |
2103 | V1 = DAG.getBitcast(VT, V: V1); |
2104 | |
2105 | if (!V2.isUndef()) { |
2106 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
2107 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
2108 | N2: DAG.getConstant(Val: 0b01001110, DL, VT: MVT::i64)); |
2109 | V2 = DAG.getBitcast(VT, V: V2); |
2110 | } |
2111 | |
2112 | for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { |
2113 | *it = *it < 0 ? *it : *it - HalfSize; |
2114 | } |
2115 | for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { |
2116 | *it = *it < 0 ? *it : *it + HalfSize; |
2117 | } |
2118 | } else if (preMask == LowLaneTy && postMask == LowLaneTy) { |
2119 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
2120 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
2121 | N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64)); |
2122 | V1 = DAG.getBitcast(VT, V: V1); |
2123 | |
2124 | if (!V2.isUndef()) { |
2125 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
2126 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
2127 | N2: DAG.getConstant(Val: 0b11101110, DL, VT: MVT::i64)); |
2128 | V2 = DAG.getBitcast(VT, V: V2); |
2129 | } |
2130 | |
2131 | for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { |
2132 | *it = *it < 0 ? *it : *it - HalfSize; |
2133 | } |
2134 | } else if (preMask == HighLaneTy && postMask == HighLaneTy) { |
2135 | V1 = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
2136 | V1 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V1, |
2137 | N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64)); |
2138 | V1 = DAG.getBitcast(VT, V: V1); |
2139 | |
2140 | if (!V2.isUndef()) { |
2141 | V2 = DAG.getBitcast(VT: MVT::v4i64, V: V2); |
2142 | V2 = DAG.getNode(Opcode: LoongArchISD::XVPERMI, DL, VT: MVT::v4i64, N1: V2, |
2143 | N2: DAG.getConstant(Val: 0b01000100, DL, VT: MVT::i64)); |
2144 | V2 = DAG.getBitcast(VT, V: V2); |
2145 | } |
2146 | |
2147 | for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { |
2148 | *it = *it < 0 ? *it : *it + HalfSize; |
2149 | } |
2150 | } else { // cross-lane |
2151 | return; |
2152 | } |
2153 | } |
2154 | |
2155 | /// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible). |
2156 | /// Only for 256-bit vector. |
2157 | /// |
2158 | /// For example: |
2159 | /// %2 = shufflevector <4 x i64> %0, <4 x i64> posion, |
2160 | /// <4 x i64> <i32 0, i32 3, i32 2, i32 0> |
2161 | /// is lowerded to: |
2162 | /// (XVPERMI $xr2, $xr0, 78) |
2163 | /// (XVSHUF $xr1, $xr2, $xr0) |
2164 | /// (XVORI $xr0, $xr1, 0) |
2165 | static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, |
2166 | ArrayRef<int> Mask, |
2167 | MVT VT, SDValue V1, |
2168 | SDValue V2, |
2169 | SelectionDAG &DAG) { |
2170 | assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!" ); |
2171 | int Size = Mask.size(); |
2172 | int LaneSize = Size / 2; |
2173 | |
2174 | bool LaneCrossing[2] = {false, false}; |
2175 | for (int i = 0; i < Size; ++i) |
2176 | if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize)) |
2177 | LaneCrossing[(Mask[i] % Size) / LaneSize] = true; |
2178 | |
2179 | // Ensure that all lanes ared involved. |
2180 | if (!LaneCrossing[0] && !LaneCrossing[1]) |
2181 | return SDValue(); |
2182 | |
2183 | SmallVector<int> InLaneMask; |
2184 | InLaneMask.assign(in_start: Mask.begin(), in_end: Mask.end()); |
2185 | for (int i = 0; i < Size; ++i) { |
2186 | int &M = InLaneMask[i]; |
2187 | if (M < 0) |
2188 | continue; |
2189 | if (((M % Size) / LaneSize) != (i / LaneSize)) |
2190 | M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size; |
2191 | } |
2192 | |
2193 | SDValue Flipped = DAG.getBitcast(VT: MVT::v4i64, V: V1); |
2194 | Flipped = DAG.getVectorShuffle(VT: MVT::v4i64, dl: DL, N1: Flipped, |
2195 | N2: DAG.getUNDEF(VT: MVT::v4i64), Mask: {2, 3, 0, 1}); |
2196 | Flipped = DAG.getBitcast(VT, V: Flipped); |
2197 | return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: Flipped, Mask: InLaneMask); |
2198 | } |
2199 | |
2200 | /// Dispatching routine to lower various 256-bit LoongArch vector shuffles. |
2201 | /// |
2202 | /// This routine breaks down the specific type of 256-bit shuffle and |
2203 | /// dispatches to the lowering routines accordingly. |
2204 | static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
2205 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
2206 | assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || |
2207 | VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || |
2208 | VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && |
2209 | "Vector type is unsupported for lasx!" ); |
2210 | assert(V1.getSimpleValueType() == V2.getSimpleValueType() && |
2211 | "Two operands have different types!" ); |
2212 | assert(VT.getVectorNumElements() == Mask.size() && |
2213 | "Unexpected mask size for shuffle!" ); |
2214 | assert(Mask.size() % 2 == 0 && "Expected even mask size." ); |
2215 | assert(Mask.size() >= 4 && "Mask size is less than 4." ); |
2216 | |
2217 | // canonicalize non cross-lane shuffle vector |
2218 | SmallVector<int> NewMask(Mask); |
2219 | canonicalizeShuffleVectorByLane(DL, Mask: NewMask, VT, V1, V2, DAG); |
2220 | |
2221 | APInt KnownUndef, KnownZero; |
2222 | computeZeroableShuffleElements(Mask: NewMask, V1, V2, KnownUndef, KnownZero); |
2223 | APInt Zeroable = KnownUndef | KnownZero; |
2224 | |
2225 | SDValue Result; |
2226 | // TODO: Add more comparison patterns. |
2227 | if (V2.isUndef()) { |
2228 | if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2229 | return Result; |
2230 | if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2231 | return Result; |
2232 | if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, Mask: NewMask, VT, |
2233 | V1, V2, DAG))) |
2234 | return Result; |
2235 | |
2236 | // TODO: This comment may be enabled in the future to better match the |
2237 | // pattern for instruction selection. |
2238 | /* V2 = V1; */ |
2239 | } |
2240 | |
2241 | // It is recommended not to change the pattern comparison order for better |
2242 | // performance. |
2243 | if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2244 | return Result; |
2245 | if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2246 | return Result; |
2247 | if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2248 | return Result; |
2249 | if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2250 | return Result; |
2251 | if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2252 | return Result; |
2253 | if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2254 | return Result; |
2255 | if ((Result = |
2256 | lowerVECTOR_SHUFFLEAsShift(DL, Mask: NewMask, VT, V1, V2, DAG, Zeroable))) |
2257 | return Result; |
2258 | if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2259 | return Result; |
2260 | if (SDValue NewShuffle = widenShuffleMask(DL, Mask: NewMask, VT, V1, V2, DAG)) |
2261 | return NewShuffle; |
2262 | if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, Mask: NewMask, VT, V1, V2, DAG))) |
2263 | return Result; |
2264 | |
2265 | return SDValue(); |
2266 | } |
2267 | |
2268 | SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
2269 | SelectionDAG &DAG) const { |
2270 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op); |
2271 | ArrayRef<int> OrigMask = SVOp->getMask(); |
2272 | SDValue V1 = Op.getOperand(i: 0); |
2273 | SDValue V2 = Op.getOperand(i: 1); |
2274 | MVT VT = Op.getSimpleValueType(); |
2275 | int NumElements = VT.getVectorNumElements(); |
2276 | SDLoc DL(Op); |
2277 | |
2278 | bool V1IsUndef = V1.isUndef(); |
2279 | bool V2IsUndef = V2.isUndef(); |
2280 | if (V1IsUndef && V2IsUndef) |
2281 | return DAG.getUNDEF(VT); |
2282 | |
2283 | // When we create a shuffle node we put the UNDEF node to second operand, |
2284 | // but in some cases the first operand may be transformed to UNDEF. |
2285 | // In this case we should just commute the node. |
2286 | if (V1IsUndef) |
2287 | return DAG.getCommutedVectorShuffle(SV: *SVOp); |
2288 | |
2289 | // Check for non-undef masks pointing at an undef vector and make the masks |
2290 | // undef as well. This makes it easier to match the shuffle based solely on |
2291 | // the mask. |
2292 | if (V2IsUndef && |
2293 | any_of(Range&: OrigMask, P: [NumElements](int M) { return M >= NumElements; })) { |
2294 | SmallVector<int, 8> NewMask(OrigMask); |
2295 | for (int &M : NewMask) |
2296 | if (M >= NumElements) |
2297 | M = -1; |
2298 | return DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask: NewMask); |
2299 | } |
2300 | |
2301 | // Check for illegal shuffle mask element index values. |
2302 | int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); |
2303 | (void)MaskUpperLimit; |
2304 | assert(llvm::all_of(OrigMask, |
2305 | [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && |
2306 | "Out of bounds shuffle index" ); |
2307 | |
2308 | // For each vector width, delegate to a specialized lowering routine. |
2309 | if (VT.is128BitVector()) |
2310 | return lower128BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG); |
2311 | |
2312 | if (VT.is256BitVector()) |
2313 | return lower256BitShuffle(DL, Mask: OrigMask, VT, V1, V2, DAG); |
2314 | |
2315 | return SDValue(); |
2316 | } |
2317 | |
2318 | SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op, |
2319 | SelectionDAG &DAG) const { |
2320 | // Custom lower to ensure the libcall return is passed in an FPR on hard |
2321 | // float ABIs. |
2322 | SDLoc DL(Op); |
2323 | MakeLibCallOptions CallOptions; |
2324 | SDValue Op0 = Op.getOperand(i: 0); |
2325 | SDValue Chain = SDValue(); |
2326 | RTLIB::Libcall LC = RTLIB::getFPROUND(OpVT: Op0.getValueType(), RetVT: MVT::f16); |
2327 | SDValue Res; |
2328 | std::tie(args&: Res, args&: Chain) = |
2329 | makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op0, CallOptions, dl: DL, Chain); |
2330 | if (Subtarget.is64Bit()) |
2331 | return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res); |
2332 | return DAG.getBitcast(VT: MVT::i32, V: Res); |
2333 | } |
2334 | |
2335 | SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op, |
2336 | SelectionDAG &DAG) const { |
2337 | // Custom lower to ensure the libcall argument is passed in an FPR on hard |
2338 | // float ABIs. |
2339 | SDLoc DL(Op); |
2340 | MakeLibCallOptions CallOptions; |
2341 | SDValue Op0 = Op.getOperand(i: 0); |
2342 | SDValue Chain = SDValue(); |
2343 | SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, |
2344 | DL, VT: MVT::f32, Operand: Op0) |
2345 | : DAG.getBitcast(VT: MVT::f32, V: Op0); |
2346 | SDValue Res; |
2347 | std::tie(args&: Res, args&: Chain) = makeLibCall(DAG, LC: RTLIB::FPEXT_F16_F32, RetVT: MVT::f32, Ops: Arg, |
2348 | CallOptions, dl: DL, Chain); |
2349 | return Res; |
2350 | } |
2351 | |
2352 | SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op, |
2353 | SelectionDAG &DAG) const { |
2354 | assert(Subtarget.hasBasicF() && "Unexpected custom legalization" ); |
2355 | SDLoc DL(Op); |
2356 | MakeLibCallOptions CallOptions; |
2357 | RTLIB::Libcall LC = |
2358 | RTLIB::getFPROUND(OpVT: Op.getOperand(i: 0).getValueType(), RetVT: MVT::bf16); |
2359 | SDValue Res = |
2360 | makeLibCall(DAG, LC, RetVT: MVT::f32, Ops: Op.getOperand(i: 0), CallOptions, dl: DL).first; |
2361 | if (Subtarget.is64Bit()) |
2362 | return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Res); |
2363 | return DAG.getBitcast(VT: MVT::i32, V: Res); |
2364 | } |
2365 | |
2366 | SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op, |
2367 | SelectionDAG &DAG) const { |
2368 | assert(Subtarget.hasBasicF() && "Unexpected custom legalization" ); |
2369 | MVT VT = Op.getSimpleValueType(); |
2370 | SDLoc DL(Op); |
2371 | Op = DAG.getNode( |
2372 | Opcode: ISD::SHL, DL, VT: Op.getOperand(i: 0).getValueType(), N1: Op.getOperand(i: 0), |
2373 | N2: DAG.getShiftAmountConstant(Val: 16, VT: Op.getOperand(i: 0).getValueType(), DL)); |
2374 | SDValue Res = Subtarget.is64Bit() ? DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, |
2375 | DL, VT: MVT::f32, Operand: Op) |
2376 | : DAG.getBitcast(VT: MVT::f32, V: Op); |
2377 | if (VT != MVT::f32) |
2378 | return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT, Operand: Res); |
2379 | return Res; |
2380 | } |
2381 | |
2382 | static bool isConstantOrUndef(const SDValue Op) { |
2383 | if (Op->isUndef()) |
2384 | return true; |
2385 | if (isa<ConstantSDNode>(Val: Op)) |
2386 | return true; |
2387 | if (isa<ConstantFPSDNode>(Val: Op)) |
2388 | return true; |
2389 | return false; |
2390 | } |
2391 | |
2392 | static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { |
2393 | for (unsigned i = 0; i < Op->getNumOperands(); ++i) |
2394 | if (isConstantOrUndef(Op: Op->getOperand(Num: i))) |
2395 | return true; |
2396 | return false; |
2397 | } |
2398 | |
2399 | // Lower BUILD_VECTOR as broadcast load (if possible). |
2400 | // For example: |
2401 | // %a = load i8, ptr %ptr |
2402 | // %b = build_vector %a, %a, %a, %a |
2403 | // is lowered to : |
2404 | // (VLDREPL_B $a0, 0) |
2405 | static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, |
2406 | const SDLoc &DL, |
2407 | SelectionDAG &DAG) { |
2408 | MVT VT = BVOp->getSimpleValueType(ResNo: 0); |
2409 | int NumOps = BVOp->getNumOperands(); |
2410 | |
2411 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
2412 | "Unsupported vector type for broadcast." ); |
2413 | |
2414 | SDValue IdentitySrc; |
2415 | bool IsIdeneity = true; |
2416 | |
2417 | for (int i = 0; i != NumOps; i++) { |
2418 | SDValue Op = BVOp->getOperand(Num: i); |
2419 | if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) { |
2420 | IsIdeneity = false; |
2421 | break; |
2422 | } |
2423 | IdentitySrc = BVOp->getOperand(Num: 0); |
2424 | } |
2425 | |
2426 | // make sure that this load is valid and only has one user. |
2427 | if (!IdentitySrc || !BVOp->isOnlyUserOf(N: IdentitySrc.getNode())) |
2428 | return SDValue(); |
2429 | |
2430 | if (IsIdeneity) { |
2431 | auto *LN = cast<LoadSDNode>(Val&: IdentitySrc); |
2432 | SDVTList Tys = |
2433 | LN->isIndexed() |
2434 | ? DAG.getVTList(VT1: VT, VT2: LN->getBasePtr().getValueType(), VT3: MVT::Other) |
2435 | : DAG.getVTList(VT1: VT, VT2: MVT::Other); |
2436 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()}; |
2437 | SDValue BCast = DAG.getNode(Opcode: LoongArchISD::VLDREPL, DL, VTList: Tys, Ops); |
2438 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(LN, 1), To: BCast.getValue(R: 1)); |
2439 | return BCast; |
2440 | } |
2441 | return SDValue(); |
2442 | } |
2443 | |
2444 | SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, |
2445 | SelectionDAG &DAG) const { |
2446 | BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op); |
2447 | EVT ResTy = Op->getValueType(ResNo: 0); |
2448 | SDLoc DL(Op); |
2449 | APInt SplatValue, SplatUndef; |
2450 | unsigned SplatBitSize; |
2451 | bool HasAnyUndefs; |
2452 | bool Is128Vec = ResTy.is128BitVector(); |
2453 | bool Is256Vec = ResTy.is256BitVector(); |
2454 | |
2455 | if ((!Subtarget.hasExtLSX() || !Is128Vec) && |
2456 | (!Subtarget.hasExtLASX() || !Is256Vec)) |
2457 | return SDValue(); |
2458 | |
2459 | if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(BVOp: Node, DL, DAG)) |
2460 | return Result; |
2461 | |
2462 | if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
2463 | /*MinSplatBits=*/8) && |
2464 | SplatBitSize <= 64) { |
2465 | // We can only cope with 8, 16, 32, or 64-bit elements. |
2466 | if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && |
2467 | SplatBitSize != 64) |
2468 | return SDValue(); |
2469 | |
2470 | EVT ViaVecTy; |
2471 | |
2472 | switch (SplatBitSize) { |
2473 | default: |
2474 | return SDValue(); |
2475 | case 8: |
2476 | ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; |
2477 | break; |
2478 | case 16: |
2479 | ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; |
2480 | break; |
2481 | case 32: |
2482 | ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; |
2483 | break; |
2484 | case 64: |
2485 | ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; |
2486 | break; |
2487 | } |
2488 | |
2489 | // SelectionDAG::getConstant will promote SplatValue appropriately. |
2490 | SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy); |
2491 | |
2492 | // Bitcast to the type we originally wanted. |
2493 | if (ViaVecTy != ResTy) |
2494 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result); |
2495 | |
2496 | return Result; |
2497 | } |
2498 | |
2499 | if (DAG.isSplatValue(V: Op, /*AllowUndefs=*/false)) |
2500 | return Op; |
2501 | |
2502 | if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) { |
2503 | // Use INSERT_VECTOR_ELT operations rather than expand to stores. |
2504 | // The resulting code is the same length as the expansion, but it doesn't |
2505 | // use memory operations. |
2506 | EVT ResTy = Node->getValueType(ResNo: 0); |
2507 | |
2508 | assert(ResTy.isVector()); |
2509 | |
2510 | unsigned NumElts = ResTy.getVectorNumElements(); |
2511 | SDValue Vector = DAG.getUNDEF(VT: ResTy); |
2512 | for (unsigned i = 0; i < NumElts; ++i) { |
2513 | Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, |
2514 | N2: Node->getOperand(Num: i), |
2515 | N3: DAG.getConstant(Val: i, DL, VT: Subtarget.getGRLenVT())); |
2516 | } |
2517 | return Vector; |
2518 | } |
2519 | |
2520 | return SDValue(); |
2521 | } |
2522 | |
2523 | SDValue |
2524 | LoongArchTargetLowering::(SDValue Op, |
2525 | SelectionDAG &DAG) const { |
2526 | EVT VecTy = Op->getOperand(Num: 0)->getValueType(ResNo: 0); |
2527 | SDValue Idx = Op->getOperand(Num: 1); |
2528 | EVT EltTy = VecTy.getVectorElementType(); |
2529 | unsigned NumElts = VecTy.getVectorNumElements(); |
2530 | |
2531 | if (isa<ConstantSDNode>(Val: Idx) && |
2532 | (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || |
2533 | EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) |
2534 | return Op; |
2535 | |
2536 | return SDValue(); |
2537 | } |
2538 | |
2539 | SDValue |
2540 | LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
2541 | SelectionDAG &DAG) const { |
2542 | if (isa<ConstantSDNode>(Val: Op->getOperand(Num: 2))) |
2543 | return Op; |
2544 | return SDValue(); |
2545 | } |
2546 | |
2547 | SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, |
2548 | SelectionDAG &DAG) const { |
2549 | SDLoc DL(Op); |
2550 | SyncScope::ID FenceSSID = |
2551 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
2552 | |
2553 | // singlethread fences only synchronize with signal handlers on the same |
2554 | // thread and thus only need to preserve instruction order, not actually |
2555 | // enforce memory ordering. |
2556 | if (FenceSSID == SyncScope::SingleThread) |
2557 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
2558 | return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0)); |
2559 | |
2560 | return Op; |
2561 | } |
2562 | |
2563 | SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op, |
2564 | SelectionDAG &DAG) const { |
2565 | |
2566 | if (Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i32) { |
2567 | DAG.getContext()->emitError( |
2568 | ErrorStr: "On LA64, only 64-bit registers can be written." ); |
2569 | return Op.getOperand(i: 0); |
2570 | } |
2571 | |
2572 | if (!Subtarget.is64Bit() && Op.getOperand(i: 2).getValueType() == MVT::i64) { |
2573 | DAG.getContext()->emitError( |
2574 | ErrorStr: "On LA32, only 32-bit registers can be written." ); |
2575 | return Op.getOperand(i: 0); |
2576 | } |
2577 | |
2578 | return Op; |
2579 | } |
2580 | |
2581 | SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, |
2582 | SelectionDAG &DAG) const { |
2583 | if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) { |
2584 | DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_frame_address' must " |
2585 | "be a constant integer" ); |
2586 | return SDValue(); |
2587 | } |
2588 | |
2589 | MachineFunction &MF = DAG.getMachineFunction(); |
2590 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
2591 | Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); |
2592 | EVT VT = Op.getValueType(); |
2593 | SDLoc DL(Op); |
2594 | SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT); |
2595 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
2596 | int GRLenInBytes = Subtarget.getGRLen() / 8; |
2597 | |
2598 | while (Depth--) { |
2599 | int Offset = -(GRLenInBytes * 2); |
2600 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, |
2601 | N2: DAG.getSignedConstant(Val: Offset, DL, VT)); |
2602 | FrameAddr = |
2603 | DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(), Ptr, PtrInfo: MachinePointerInfo()); |
2604 | } |
2605 | return FrameAddr; |
2606 | } |
2607 | |
2608 | SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, |
2609 | SelectionDAG &DAG) const { |
2610 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
2611 | return SDValue(); |
2612 | |
2613 | // Currently only support lowering return address for current frame. |
2614 | if (Op.getConstantOperandVal(i: 0) != 0) { |
2615 | DAG.getContext()->emitError( |
2616 | ErrorStr: "return address can only be determined for the current frame" ); |
2617 | return SDValue(); |
2618 | } |
2619 | |
2620 | MachineFunction &MF = DAG.getMachineFunction(); |
2621 | MF.getFrameInfo().setReturnAddressIsTaken(true); |
2622 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2623 | |
2624 | // Return the value of the return address register, marking it an implicit |
2625 | // live-in. |
2626 | Register Reg = MF.addLiveIn(PReg: Subtarget.getRegisterInfo()->getRARegister(), |
2627 | RC: getRegClassFor(VT: GRLenVT)); |
2628 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg, VT: GRLenVT); |
2629 | } |
2630 | |
2631 | SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, |
2632 | SelectionDAG &DAG) const { |
2633 | MachineFunction &MF = DAG.getMachineFunction(); |
2634 | auto Size = Subtarget.getGRLen() / 8; |
2635 | auto FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: 0, IsImmutable: false); |
2636 | return DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
2637 | } |
2638 | |
2639 | SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, |
2640 | SelectionDAG &DAG) const { |
2641 | MachineFunction &MF = DAG.getMachineFunction(); |
2642 | auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>(); |
2643 | |
2644 | SDLoc DL(Op); |
2645 | SDValue FI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), |
2646 | VT: getPointerTy(DL: MF.getDataLayout())); |
2647 | |
2648 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
2649 | // memory location argument. |
2650 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
2651 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FI, Ptr: Op.getOperand(i: 1), |
2652 | PtrInfo: MachinePointerInfo(SV)); |
2653 | } |
2654 | |
2655 | SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, |
2656 | SelectionDAG &DAG) const { |
2657 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
2658 | !Subtarget.hasBasicD() && "unexpected target features" ); |
2659 | |
2660 | SDLoc DL(Op); |
2661 | SDValue Op0 = Op.getOperand(i: 0); |
2662 | if (Op0->getOpcode() == ISD::AND) { |
2663 | auto *C = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: 1)); |
2664 | if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) |
2665 | return Op; |
2666 | } |
2667 | |
2668 | if (Op0->getOpcode() == LoongArchISD::BSTRPICK && |
2669 | Op0.getConstantOperandVal(i: 1) < UINT64_C(0X1F) && |
2670 | Op0.getConstantOperandVal(i: 2) == UINT64_C(0)) |
2671 | return Op; |
2672 | |
2673 | if (Op0.getOpcode() == ISD::AssertZext && |
2674 | dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLT(VT: MVT::i32)) |
2675 | return Op; |
2676 | |
2677 | EVT OpVT = Op0.getValueType(); |
2678 | EVT RetVT = Op.getValueType(); |
2679 | RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); |
2680 | MakeLibCallOptions CallOptions; |
2681 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
2682 | SDValue Chain = SDValue(); |
2683 | SDValue Result; |
2684 | std::tie(args&: Result, args&: Chain) = |
2685 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
2686 | return Result; |
2687 | } |
2688 | |
2689 | SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, |
2690 | SelectionDAG &DAG) const { |
2691 | assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && |
2692 | !Subtarget.hasBasicD() && "unexpected target features" ); |
2693 | |
2694 | SDLoc DL(Op); |
2695 | SDValue Op0 = Op.getOperand(i: 0); |
2696 | |
2697 | if ((Op0.getOpcode() == ISD::AssertSext || |
2698 | Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && |
2699 | dyn_cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT().bitsLE(VT: MVT::i32)) |
2700 | return Op; |
2701 | |
2702 | EVT OpVT = Op0.getValueType(); |
2703 | EVT RetVT = Op.getValueType(); |
2704 | RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); |
2705 | MakeLibCallOptions CallOptions; |
2706 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT, Value: true); |
2707 | SDValue Chain = SDValue(); |
2708 | SDValue Result; |
2709 | std::tie(args&: Result, args&: Chain) = |
2710 | makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op0, CallOptions, dl: DL, Chain); |
2711 | return Result; |
2712 | } |
2713 | |
2714 | SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, |
2715 | SelectionDAG &DAG) const { |
2716 | |
2717 | SDLoc DL(Op); |
2718 | EVT VT = Op.getValueType(); |
2719 | SDValue Op0 = Op.getOperand(i: 0); |
2720 | EVT Op0VT = Op0.getValueType(); |
2721 | |
2722 | if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 && |
2723 | Subtarget.is64Bit() && Subtarget.hasBasicF()) { |
2724 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
2725 | return DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: NewOp0); |
2726 | } |
2727 | if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) { |
2728 | SDValue Lo, Hi; |
2729 | std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Op0, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
2730 | return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
2731 | } |
2732 | return Op; |
2733 | } |
2734 | |
2735 | SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, |
2736 | SelectionDAG &DAG) const { |
2737 | |
2738 | SDLoc DL(Op); |
2739 | SDValue Op0 = Op.getOperand(i: 0); |
2740 | |
2741 | if (Op0.getValueType() == MVT::f16) |
2742 | Op0 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Op0); |
2743 | |
2744 | if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && |
2745 | !Subtarget.hasBasicD()) { |
2746 | SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: MVT::f32, Operand: Op0); |
2747 | return DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Dst); |
2748 | } |
2749 | |
2750 | EVT FPTy = EVT::getFloatingPointVT(BitWidth: Op.getValueSizeInBits()); |
2751 | SDValue Trunc = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FPTy, Operand: Op0); |
2752 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Op.getValueType(), Operand: Trunc); |
2753 | } |
2754 | |
2755 | static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, |
2756 | SelectionDAG &DAG, unsigned Flags) { |
2757 | return DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: Flags); |
2758 | } |
2759 | |
2760 | static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, |
2761 | SelectionDAG &DAG, unsigned Flags) { |
2762 | return DAG.getTargetBlockAddress(BA: N->getBlockAddress(), VT: Ty, Offset: N->getOffset(), |
2763 | TargetFlags: Flags); |
2764 | } |
2765 | |
2766 | static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, |
2767 | SelectionDAG &DAG, unsigned Flags) { |
2768 | return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(), |
2769 | Offset: N->getOffset(), TargetFlags: Flags); |
2770 | } |
2771 | |
2772 | static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, |
2773 | SelectionDAG &DAG, unsigned Flags) { |
2774 | return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags); |
2775 | } |
2776 | |
2777 | template <class NodeTy> |
2778 | SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
2779 | CodeModel::Model M, |
2780 | bool IsLocal) const { |
2781 | SDLoc DL(N); |
2782 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
2783 | SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); |
2784 | SDValue Load; |
2785 | |
2786 | switch (M) { |
2787 | default: |
2788 | report_fatal_error(reason: "Unsupported code model" ); |
2789 | |
2790 | case CodeModel::Large: { |
2791 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
2792 | |
2793 | // This is not actually used, but is necessary for successfully matching |
2794 | // the PseudoLA_*_LARGE nodes. |
2795 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
2796 | if (IsLocal) { |
2797 | // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that |
2798 | // eventually becomes the desired 5-insn code sequence. |
2799 | Load = SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL_LARGE, dl: DL, VT: Ty, |
2800 | Op1: Tmp, Op2: Addr), |
2801 | 0); |
2802 | } else { |
2803 | // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that |
2804 | // eventually becomes the desired 5-insn code sequence. |
2805 | Load = SDValue( |
2806 | DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT_LARGE, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), |
2807 | 0); |
2808 | } |
2809 | break; |
2810 | } |
2811 | |
2812 | case CodeModel::Small: |
2813 | case CodeModel::Medium: |
2814 | if (IsLocal) { |
2815 | // This generates the pattern (PseudoLA_PCREL sym), which expands to |
2816 | // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). |
2817 | Load = SDValue( |
2818 | DAG.getMachineNode(Opcode: LoongArch::PseudoLA_PCREL, dl: DL, VT: Ty, Op1: Addr), 0); |
2819 | } else { |
2820 | // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d |
2821 | // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). |
2822 | Load = |
2823 | SDValue(DAG.getMachineNode(Opcode: LoongArch::PseudoLA_GOT, dl: DL, VT: Ty, Op1: Addr), 0); |
2824 | } |
2825 | } |
2826 | |
2827 | if (!IsLocal) { |
2828 | // Mark the load instruction as invariant to enable hoisting in MachineLICM. |
2829 | MachineFunction &MF = DAG.getMachineFunction(); |
2830 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
2831 | PtrInfo: MachinePointerInfo::getGOT(MF), |
2832 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
2833 | MachineMemOperand::MOInvariant, |
2834 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
2835 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Load.getNode()), NewMemRefs: {MemOp}); |
2836 | } |
2837 | |
2838 | return Load; |
2839 | } |
2840 | |
2841 | SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, |
2842 | SelectionDAG &DAG) const { |
2843 | return getAddr(N: cast<BlockAddressSDNode>(Val&: Op), DAG, |
2844 | M: DAG.getTarget().getCodeModel()); |
2845 | } |
2846 | |
2847 | SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, |
2848 | SelectionDAG &DAG) const { |
2849 | return getAddr(N: cast<JumpTableSDNode>(Val&: Op), DAG, |
2850 | M: DAG.getTarget().getCodeModel()); |
2851 | } |
2852 | |
2853 | SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, |
2854 | SelectionDAG &DAG) const { |
2855 | return getAddr(N: cast<ConstantPoolSDNode>(Val&: Op), DAG, |
2856 | M: DAG.getTarget().getCodeModel()); |
2857 | } |
2858 | |
2859 | SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, |
2860 | SelectionDAG &DAG) const { |
2861 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
2862 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
2863 | auto CM = DAG.getTarget().getCodeModel(); |
2864 | const GlobalValue *GV = N->getGlobal(); |
2865 | |
2866 | if (GV->isDSOLocal() && isa<GlobalVariable>(Val: GV)) { |
2867 | if (auto GCM = dyn_cast<GlobalVariable>(Val: GV)->getCodeModel()) |
2868 | CM = *GCM; |
2869 | } |
2870 | |
2871 | return getAddr(N, DAG, M: CM, IsLocal: GV->isDSOLocal()); |
2872 | } |
2873 | |
2874 | SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, |
2875 | SelectionDAG &DAG, |
2876 | unsigned Opc, bool UseGOT, |
2877 | bool Large) const { |
2878 | SDLoc DL(N); |
2879 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
2880 | MVT GRLenVT = Subtarget.getGRLenVT(); |
2881 | |
2882 | // This is not actually used, but is necessary for successfully matching the |
2883 | // PseudoLA_*_LARGE nodes. |
2884 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
2885 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
2886 | |
2887 | // Only IE needs an extra argument for large code model. |
2888 | SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE |
2889 | ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
2890 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
2891 | |
2892 | // If it is LE for normal/medium code model, the add tp operation will occur |
2893 | // during the pseudo-instruction expansion. |
2894 | if (Opc == LoongArch::PseudoLA_TLS_LE && !Large) |
2895 | return Offset; |
2896 | |
2897 | if (UseGOT) { |
2898 | // Mark the load instruction as invariant to enable hoisting in MachineLICM. |
2899 | MachineFunction &MF = DAG.getMachineFunction(); |
2900 | MachineMemOperand *MemOp = MF.getMachineMemOperand( |
2901 | PtrInfo: MachinePointerInfo::getGOT(MF), |
2902 | f: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | |
2903 | MachineMemOperand::MOInvariant, |
2904 | MemTy: LLT(Ty.getSimpleVT()), base_alignment: Align(Ty.getFixedSizeInBits() / 8)); |
2905 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Offset.getNode()), NewMemRefs: {MemOp}); |
2906 | } |
2907 | |
2908 | // Add the thread pointer. |
2909 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Ty, N1: Offset, |
2910 | N2: DAG.getRegister(Reg: LoongArch::R2, VT: GRLenVT)); |
2911 | } |
2912 | |
2913 | SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, |
2914 | SelectionDAG &DAG, |
2915 | unsigned Opc, |
2916 | bool Large) const { |
2917 | SDLoc DL(N); |
2918 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
2919 | IntegerType *CallTy = Type::getIntNTy(C&: *DAG.getContext(), N: Ty.getSizeInBits()); |
2920 | |
2921 | // This is not actually used, but is necessary for successfully matching the |
2922 | // PseudoLA_*_LARGE nodes. |
2923 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
2924 | |
2925 | // Use a PC-relative addressing mode to access the dynamic GOT address. |
2926 | SDValue Addr = DAG.getTargetGlobalAddress(GV: N->getGlobal(), DL, VT: Ty, offset: 0, TargetFlags: 0); |
2927 | SDValue Load = Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
2928 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
2929 | |
2930 | // Prepare argument list to generate call. |
2931 | ArgListTy Args; |
2932 | ArgListEntry Entry; |
2933 | Entry.Node = Load; |
2934 | Entry.Ty = CallTy; |
2935 | Args.push_back(x: Entry); |
2936 | |
2937 | // Setup call to __tls_get_addr. |
2938 | TargetLowering::CallLoweringInfo CLI(DAG); |
2939 | CLI.setDebugLoc(DL) |
2940 | .setChain(DAG.getEntryNode()) |
2941 | .setLibCallee(CC: CallingConv::C, ResultType: CallTy, |
2942 | Target: DAG.getExternalSymbol(Sym: "__tls_get_addr" , VT: Ty), |
2943 | ArgsList: std::move(Args)); |
2944 | |
2945 | return LowerCallTo(CLI).first; |
2946 | } |
2947 | |
2948 | SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, |
2949 | SelectionDAG &DAG, unsigned Opc, |
2950 | bool Large) const { |
2951 | SDLoc DL(N); |
2952 | EVT Ty = getPointerTy(DL: DAG.getDataLayout()); |
2953 | const GlobalValue *GV = N->getGlobal(); |
2954 | |
2955 | // This is not actually used, but is necessary for successfully matching the |
2956 | // PseudoLA_*_LARGE nodes. |
2957 | SDValue Tmp = DAG.getConstant(Val: 0, DL, VT: Ty); |
2958 | |
2959 | // Use a PC-relative addressing mode to access the global dynamic GOT address. |
2960 | // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym). |
2961 | SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: 0, TargetFlags: 0); |
2962 | return Large ? SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Tmp, Op2: Addr), 0) |
2963 | : SDValue(DAG.getMachineNode(Opcode: Opc, dl: DL, VT: Ty, Op1: Addr), 0); |
2964 | } |
2965 | |
2966 | SDValue |
2967 | LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, |
2968 | SelectionDAG &DAG) const { |
2969 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
2970 | CallingConv::GHC) |
2971 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
2972 | |
2973 | bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; |
2974 | assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64" ); |
2975 | |
2976 | GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op); |
2977 | assert(N->getOffset() == 0 && "unexpected offset in global node" ); |
2978 | |
2979 | if (DAG.getTarget().useEmulatedTLS()) |
2980 | reportFatalUsageError(reason: "the emulated TLS is prohibited" ); |
2981 | |
2982 | bool IsDesc = DAG.getTarget().useTLSDESC(); |
2983 | |
2984 | switch (getTargetMachine().getTLSModel(GV: N->getGlobal())) { |
2985 | case TLSModel::GeneralDynamic: |
2986 | // In this model, application code calls the dynamic linker function |
2987 | // __tls_get_addr to locate TLS offsets into the dynamic thread vector at |
2988 | // runtime. |
2989 | if (!IsDesc) |
2990 | return getDynamicTLSAddr(N, DAG, |
2991 | Opc: Large ? LoongArch::PseudoLA_TLS_GD_LARGE |
2992 | : LoongArch::PseudoLA_TLS_GD, |
2993 | Large); |
2994 | break; |
2995 | case TLSModel::LocalDynamic: |
2996 | // Same as GeneralDynamic, except for assembly modifiers and relocation |
2997 | // records. |
2998 | if (!IsDesc) |
2999 | return getDynamicTLSAddr(N, DAG, |
3000 | Opc: Large ? LoongArch::PseudoLA_TLS_LD_LARGE |
3001 | : LoongArch::PseudoLA_TLS_LD, |
3002 | Large); |
3003 | break; |
3004 | case TLSModel::InitialExec: |
3005 | // This model uses the GOT to resolve TLS offsets. |
3006 | return getStaticTLSAddr(N, DAG, |
3007 | Opc: Large ? LoongArch::PseudoLA_TLS_IE_LARGE |
3008 | : LoongArch::PseudoLA_TLS_IE, |
3009 | /*UseGOT=*/true, Large); |
3010 | case TLSModel::LocalExec: |
3011 | // This model is used when static linking as the TLS offsets are resolved |
3012 | // during program linking. |
3013 | // |
3014 | // This node doesn't need an extra argument for the large code model. |
3015 | return getStaticTLSAddr(N, DAG, Opc: LoongArch::PseudoLA_TLS_LE, |
3016 | /*UseGOT=*/false, Large); |
3017 | } |
3018 | |
3019 | return getTLSDescAddr(N, DAG, |
3020 | Opc: Large ? LoongArch::PseudoLA_TLS_DESC_LARGE |
3021 | : LoongArch::PseudoLA_TLS_DESC, |
3022 | Large); |
3023 | } |
3024 | |
3025 | template <unsigned N> |
3026 | static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, |
3027 | SelectionDAG &DAG, bool IsSigned = false) { |
3028 | auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp)); |
3029 | // Check the ImmArg. |
3030 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
3031 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
3032 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + |
3033 | ": argument out of range." ); |
3034 | return DAG.getNode(Opcode: ISD::UNDEF, DL: SDLoc(Op), VT: Op.getValueType()); |
3035 | } |
3036 | return SDValue(); |
3037 | } |
3038 | |
3039 | SDValue |
3040 | LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
3041 | SelectionDAG &DAG) const { |
3042 | switch (Op.getConstantOperandVal(i: 0)) { |
3043 | default: |
3044 | return SDValue(); // Don't custom lower most intrinsics. |
3045 | case Intrinsic::thread_pointer: { |
3046 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3047 | return DAG.getRegister(Reg: LoongArch::R2, VT: PtrVT); |
3048 | } |
3049 | case Intrinsic::loongarch_lsx_vpickve2gr_d: |
3050 | case Intrinsic::loongarch_lsx_vpickve2gr_du: |
3051 | case Intrinsic::loongarch_lsx_vreplvei_d: |
3052 | case Intrinsic::loongarch_lasx_xvrepl128vei_d: |
3053 | return checkIntrinsicImmArg<1>(Op, ImmOp: 2, DAG); |
3054 | case Intrinsic::loongarch_lsx_vreplvei_w: |
3055 | case Intrinsic::loongarch_lasx_xvrepl128vei_w: |
3056 | case Intrinsic::loongarch_lasx_xvpickve2gr_d: |
3057 | case Intrinsic::loongarch_lasx_xvpickve2gr_du: |
3058 | case Intrinsic::loongarch_lasx_xvpickve_d: |
3059 | case Intrinsic::loongarch_lasx_xvpickve_d_f: |
3060 | return checkIntrinsicImmArg<2>(Op, ImmOp: 2, DAG); |
3061 | case Intrinsic::loongarch_lasx_xvinsve0_d: |
3062 | return checkIntrinsicImmArg<2>(Op, ImmOp: 3, DAG); |
3063 | case Intrinsic::loongarch_lsx_vsat_b: |
3064 | case Intrinsic::loongarch_lsx_vsat_bu: |
3065 | case Intrinsic::loongarch_lsx_vrotri_b: |
3066 | case Intrinsic::loongarch_lsx_vsllwil_h_b: |
3067 | case Intrinsic::loongarch_lsx_vsllwil_hu_bu: |
3068 | case Intrinsic::loongarch_lsx_vsrlri_b: |
3069 | case Intrinsic::loongarch_lsx_vsrari_b: |
3070 | case Intrinsic::loongarch_lsx_vreplvei_h: |
3071 | case Intrinsic::loongarch_lasx_xvsat_b: |
3072 | case Intrinsic::loongarch_lasx_xvsat_bu: |
3073 | case Intrinsic::loongarch_lasx_xvrotri_b: |
3074 | case Intrinsic::loongarch_lasx_xvsllwil_h_b: |
3075 | case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: |
3076 | case Intrinsic::loongarch_lasx_xvsrlri_b: |
3077 | case Intrinsic::loongarch_lasx_xvsrari_b: |
3078 | case Intrinsic::loongarch_lasx_xvrepl128vei_h: |
3079 | case Intrinsic::loongarch_lasx_xvpickve_w: |
3080 | case Intrinsic::loongarch_lasx_xvpickve_w_f: |
3081 | return checkIntrinsicImmArg<3>(Op, ImmOp: 2, DAG); |
3082 | case Intrinsic::loongarch_lasx_xvinsve0_w: |
3083 | return checkIntrinsicImmArg<3>(Op, ImmOp: 3, DAG); |
3084 | case Intrinsic::loongarch_lsx_vsat_h: |
3085 | case Intrinsic::loongarch_lsx_vsat_hu: |
3086 | case Intrinsic::loongarch_lsx_vrotri_h: |
3087 | case Intrinsic::loongarch_lsx_vsllwil_w_h: |
3088 | case Intrinsic::loongarch_lsx_vsllwil_wu_hu: |
3089 | case Intrinsic::loongarch_lsx_vsrlri_h: |
3090 | case Intrinsic::loongarch_lsx_vsrari_h: |
3091 | case Intrinsic::loongarch_lsx_vreplvei_b: |
3092 | case Intrinsic::loongarch_lasx_xvsat_h: |
3093 | case Intrinsic::loongarch_lasx_xvsat_hu: |
3094 | case Intrinsic::loongarch_lasx_xvrotri_h: |
3095 | case Intrinsic::loongarch_lasx_xvsllwil_w_h: |
3096 | case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: |
3097 | case Intrinsic::loongarch_lasx_xvsrlri_h: |
3098 | case Intrinsic::loongarch_lasx_xvsrari_h: |
3099 | case Intrinsic::loongarch_lasx_xvrepl128vei_b: |
3100 | return checkIntrinsicImmArg<4>(Op, ImmOp: 2, DAG); |
3101 | case Intrinsic::loongarch_lsx_vsrlni_b_h: |
3102 | case Intrinsic::loongarch_lsx_vsrani_b_h: |
3103 | case Intrinsic::loongarch_lsx_vsrlrni_b_h: |
3104 | case Intrinsic::loongarch_lsx_vsrarni_b_h: |
3105 | case Intrinsic::loongarch_lsx_vssrlni_b_h: |
3106 | case Intrinsic::loongarch_lsx_vssrani_b_h: |
3107 | case Intrinsic::loongarch_lsx_vssrlni_bu_h: |
3108 | case Intrinsic::loongarch_lsx_vssrani_bu_h: |
3109 | case Intrinsic::loongarch_lsx_vssrlrni_b_h: |
3110 | case Intrinsic::loongarch_lsx_vssrarni_b_h: |
3111 | case Intrinsic::loongarch_lsx_vssrlrni_bu_h: |
3112 | case Intrinsic::loongarch_lsx_vssrarni_bu_h: |
3113 | case Intrinsic::loongarch_lasx_xvsrlni_b_h: |
3114 | case Intrinsic::loongarch_lasx_xvsrani_b_h: |
3115 | case Intrinsic::loongarch_lasx_xvsrlrni_b_h: |
3116 | case Intrinsic::loongarch_lasx_xvsrarni_b_h: |
3117 | case Intrinsic::loongarch_lasx_xvssrlni_b_h: |
3118 | case Intrinsic::loongarch_lasx_xvssrani_b_h: |
3119 | case Intrinsic::loongarch_lasx_xvssrlni_bu_h: |
3120 | case Intrinsic::loongarch_lasx_xvssrani_bu_h: |
3121 | case Intrinsic::loongarch_lasx_xvssrlrni_b_h: |
3122 | case Intrinsic::loongarch_lasx_xvssrarni_b_h: |
3123 | case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: |
3124 | case Intrinsic::loongarch_lasx_xvssrarni_bu_h: |
3125 | return checkIntrinsicImmArg<4>(Op, ImmOp: 3, DAG); |
3126 | case Intrinsic::loongarch_lsx_vsat_w: |
3127 | case Intrinsic::loongarch_lsx_vsat_wu: |
3128 | case Intrinsic::loongarch_lsx_vrotri_w: |
3129 | case Intrinsic::loongarch_lsx_vsllwil_d_w: |
3130 | case Intrinsic::loongarch_lsx_vsllwil_du_wu: |
3131 | case Intrinsic::loongarch_lsx_vsrlri_w: |
3132 | case Intrinsic::loongarch_lsx_vsrari_w: |
3133 | case Intrinsic::loongarch_lsx_vslei_bu: |
3134 | case Intrinsic::loongarch_lsx_vslei_hu: |
3135 | case Intrinsic::loongarch_lsx_vslei_wu: |
3136 | case Intrinsic::loongarch_lsx_vslei_du: |
3137 | case Intrinsic::loongarch_lsx_vslti_bu: |
3138 | case Intrinsic::loongarch_lsx_vslti_hu: |
3139 | case Intrinsic::loongarch_lsx_vslti_wu: |
3140 | case Intrinsic::loongarch_lsx_vslti_du: |
3141 | case Intrinsic::loongarch_lsx_vbsll_v: |
3142 | case Intrinsic::loongarch_lsx_vbsrl_v: |
3143 | case Intrinsic::loongarch_lasx_xvsat_w: |
3144 | case Intrinsic::loongarch_lasx_xvsat_wu: |
3145 | case Intrinsic::loongarch_lasx_xvrotri_w: |
3146 | case Intrinsic::loongarch_lasx_xvsllwil_d_w: |
3147 | case Intrinsic::loongarch_lasx_xvsllwil_du_wu: |
3148 | case Intrinsic::loongarch_lasx_xvsrlri_w: |
3149 | case Intrinsic::loongarch_lasx_xvsrari_w: |
3150 | case Intrinsic::loongarch_lasx_xvslei_bu: |
3151 | case Intrinsic::loongarch_lasx_xvslei_hu: |
3152 | case Intrinsic::loongarch_lasx_xvslei_wu: |
3153 | case Intrinsic::loongarch_lasx_xvslei_du: |
3154 | case Intrinsic::loongarch_lasx_xvslti_bu: |
3155 | case Intrinsic::loongarch_lasx_xvslti_hu: |
3156 | case Intrinsic::loongarch_lasx_xvslti_wu: |
3157 | case Intrinsic::loongarch_lasx_xvslti_du: |
3158 | case Intrinsic::loongarch_lasx_xvbsll_v: |
3159 | case Intrinsic::loongarch_lasx_xvbsrl_v: |
3160 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG); |
3161 | case Intrinsic::loongarch_lsx_vseqi_b: |
3162 | case Intrinsic::loongarch_lsx_vseqi_h: |
3163 | case Intrinsic::loongarch_lsx_vseqi_w: |
3164 | case Intrinsic::loongarch_lsx_vseqi_d: |
3165 | case Intrinsic::loongarch_lsx_vslei_b: |
3166 | case Intrinsic::loongarch_lsx_vslei_h: |
3167 | case Intrinsic::loongarch_lsx_vslei_w: |
3168 | case Intrinsic::loongarch_lsx_vslei_d: |
3169 | case Intrinsic::loongarch_lsx_vslti_b: |
3170 | case Intrinsic::loongarch_lsx_vslti_h: |
3171 | case Intrinsic::loongarch_lsx_vslti_w: |
3172 | case Intrinsic::loongarch_lsx_vslti_d: |
3173 | case Intrinsic::loongarch_lasx_xvseqi_b: |
3174 | case Intrinsic::loongarch_lasx_xvseqi_h: |
3175 | case Intrinsic::loongarch_lasx_xvseqi_w: |
3176 | case Intrinsic::loongarch_lasx_xvseqi_d: |
3177 | case Intrinsic::loongarch_lasx_xvslei_b: |
3178 | case Intrinsic::loongarch_lasx_xvslei_h: |
3179 | case Intrinsic::loongarch_lasx_xvslei_w: |
3180 | case Intrinsic::loongarch_lasx_xvslei_d: |
3181 | case Intrinsic::loongarch_lasx_xvslti_b: |
3182 | case Intrinsic::loongarch_lasx_xvslti_h: |
3183 | case Intrinsic::loongarch_lasx_xvslti_w: |
3184 | case Intrinsic::loongarch_lasx_xvslti_d: |
3185 | return checkIntrinsicImmArg<5>(Op, ImmOp: 2, DAG, /*IsSigned=*/true); |
3186 | case Intrinsic::loongarch_lsx_vsrlni_h_w: |
3187 | case Intrinsic::loongarch_lsx_vsrani_h_w: |
3188 | case Intrinsic::loongarch_lsx_vsrlrni_h_w: |
3189 | case Intrinsic::loongarch_lsx_vsrarni_h_w: |
3190 | case Intrinsic::loongarch_lsx_vssrlni_h_w: |
3191 | case Intrinsic::loongarch_lsx_vssrani_h_w: |
3192 | case Intrinsic::loongarch_lsx_vssrlni_hu_w: |
3193 | case Intrinsic::loongarch_lsx_vssrani_hu_w: |
3194 | case Intrinsic::loongarch_lsx_vssrlrni_h_w: |
3195 | case Intrinsic::loongarch_lsx_vssrarni_h_w: |
3196 | case Intrinsic::loongarch_lsx_vssrlrni_hu_w: |
3197 | case Intrinsic::loongarch_lsx_vssrarni_hu_w: |
3198 | case Intrinsic::loongarch_lsx_vfrstpi_b: |
3199 | case Intrinsic::loongarch_lsx_vfrstpi_h: |
3200 | case Intrinsic::loongarch_lasx_xvsrlni_h_w: |
3201 | case Intrinsic::loongarch_lasx_xvsrani_h_w: |
3202 | case Intrinsic::loongarch_lasx_xvsrlrni_h_w: |
3203 | case Intrinsic::loongarch_lasx_xvsrarni_h_w: |
3204 | case Intrinsic::loongarch_lasx_xvssrlni_h_w: |
3205 | case Intrinsic::loongarch_lasx_xvssrani_h_w: |
3206 | case Intrinsic::loongarch_lasx_xvssrlni_hu_w: |
3207 | case Intrinsic::loongarch_lasx_xvssrani_hu_w: |
3208 | case Intrinsic::loongarch_lasx_xvssrlrni_h_w: |
3209 | case Intrinsic::loongarch_lasx_xvssrarni_h_w: |
3210 | case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: |
3211 | case Intrinsic::loongarch_lasx_xvssrarni_hu_w: |
3212 | case Intrinsic::loongarch_lasx_xvfrstpi_b: |
3213 | case Intrinsic::loongarch_lasx_xvfrstpi_h: |
3214 | return checkIntrinsicImmArg<5>(Op, ImmOp: 3, DAG); |
3215 | case Intrinsic::loongarch_lsx_vsat_d: |
3216 | case Intrinsic::loongarch_lsx_vsat_du: |
3217 | case Intrinsic::loongarch_lsx_vrotri_d: |
3218 | case Intrinsic::loongarch_lsx_vsrlri_d: |
3219 | case Intrinsic::loongarch_lsx_vsrari_d: |
3220 | case Intrinsic::loongarch_lasx_xvsat_d: |
3221 | case Intrinsic::loongarch_lasx_xvsat_du: |
3222 | case Intrinsic::loongarch_lasx_xvrotri_d: |
3223 | case Intrinsic::loongarch_lasx_xvsrlri_d: |
3224 | case Intrinsic::loongarch_lasx_xvsrari_d: |
3225 | return checkIntrinsicImmArg<6>(Op, ImmOp: 2, DAG); |
3226 | case Intrinsic::loongarch_lsx_vsrlni_w_d: |
3227 | case Intrinsic::loongarch_lsx_vsrani_w_d: |
3228 | case Intrinsic::loongarch_lsx_vsrlrni_w_d: |
3229 | case Intrinsic::loongarch_lsx_vsrarni_w_d: |
3230 | case Intrinsic::loongarch_lsx_vssrlni_w_d: |
3231 | case Intrinsic::loongarch_lsx_vssrani_w_d: |
3232 | case Intrinsic::loongarch_lsx_vssrlni_wu_d: |
3233 | case Intrinsic::loongarch_lsx_vssrani_wu_d: |
3234 | case Intrinsic::loongarch_lsx_vssrlrni_w_d: |
3235 | case Intrinsic::loongarch_lsx_vssrarni_w_d: |
3236 | case Intrinsic::loongarch_lsx_vssrlrni_wu_d: |
3237 | case Intrinsic::loongarch_lsx_vssrarni_wu_d: |
3238 | case Intrinsic::loongarch_lasx_xvsrlni_w_d: |
3239 | case Intrinsic::loongarch_lasx_xvsrani_w_d: |
3240 | case Intrinsic::loongarch_lasx_xvsrlrni_w_d: |
3241 | case Intrinsic::loongarch_lasx_xvsrarni_w_d: |
3242 | case Intrinsic::loongarch_lasx_xvssrlni_w_d: |
3243 | case Intrinsic::loongarch_lasx_xvssrani_w_d: |
3244 | case Intrinsic::loongarch_lasx_xvssrlni_wu_d: |
3245 | case Intrinsic::loongarch_lasx_xvssrani_wu_d: |
3246 | case Intrinsic::loongarch_lasx_xvssrlrni_w_d: |
3247 | case Intrinsic::loongarch_lasx_xvssrarni_w_d: |
3248 | case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: |
3249 | case Intrinsic::loongarch_lasx_xvssrarni_wu_d: |
3250 | return checkIntrinsicImmArg<6>(Op, ImmOp: 3, DAG); |
3251 | case Intrinsic::loongarch_lsx_vsrlni_d_q: |
3252 | case Intrinsic::loongarch_lsx_vsrani_d_q: |
3253 | case Intrinsic::loongarch_lsx_vsrlrni_d_q: |
3254 | case Intrinsic::loongarch_lsx_vsrarni_d_q: |
3255 | case Intrinsic::loongarch_lsx_vssrlni_d_q: |
3256 | case Intrinsic::loongarch_lsx_vssrani_d_q: |
3257 | case Intrinsic::loongarch_lsx_vssrlni_du_q: |
3258 | case Intrinsic::loongarch_lsx_vssrani_du_q: |
3259 | case Intrinsic::loongarch_lsx_vssrlrni_d_q: |
3260 | case Intrinsic::loongarch_lsx_vssrarni_d_q: |
3261 | case Intrinsic::loongarch_lsx_vssrlrni_du_q: |
3262 | case Intrinsic::loongarch_lsx_vssrarni_du_q: |
3263 | case Intrinsic::loongarch_lasx_xvsrlni_d_q: |
3264 | case Intrinsic::loongarch_lasx_xvsrani_d_q: |
3265 | case Intrinsic::loongarch_lasx_xvsrlrni_d_q: |
3266 | case Intrinsic::loongarch_lasx_xvsrarni_d_q: |
3267 | case Intrinsic::loongarch_lasx_xvssrlni_d_q: |
3268 | case Intrinsic::loongarch_lasx_xvssrani_d_q: |
3269 | case Intrinsic::loongarch_lasx_xvssrlni_du_q: |
3270 | case Intrinsic::loongarch_lasx_xvssrani_du_q: |
3271 | case Intrinsic::loongarch_lasx_xvssrlrni_d_q: |
3272 | case Intrinsic::loongarch_lasx_xvssrarni_d_q: |
3273 | case Intrinsic::loongarch_lasx_xvssrlrni_du_q: |
3274 | case Intrinsic::loongarch_lasx_xvssrarni_du_q: |
3275 | return checkIntrinsicImmArg<7>(Op, ImmOp: 3, DAG); |
3276 | case Intrinsic::loongarch_lsx_vnori_b: |
3277 | case Intrinsic::loongarch_lsx_vshuf4i_b: |
3278 | case Intrinsic::loongarch_lsx_vshuf4i_h: |
3279 | case Intrinsic::loongarch_lsx_vshuf4i_w: |
3280 | case Intrinsic::loongarch_lasx_xvnori_b: |
3281 | case Intrinsic::loongarch_lasx_xvshuf4i_b: |
3282 | case Intrinsic::loongarch_lasx_xvshuf4i_h: |
3283 | case Intrinsic::loongarch_lasx_xvshuf4i_w: |
3284 | case Intrinsic::loongarch_lasx_xvpermi_d: |
3285 | return checkIntrinsicImmArg<8>(Op, ImmOp: 2, DAG); |
3286 | case Intrinsic::loongarch_lsx_vshuf4i_d: |
3287 | case Intrinsic::loongarch_lsx_vpermi_w: |
3288 | case Intrinsic::loongarch_lsx_vbitseli_b: |
3289 | case Intrinsic::loongarch_lsx_vextrins_b: |
3290 | case Intrinsic::loongarch_lsx_vextrins_h: |
3291 | case Intrinsic::loongarch_lsx_vextrins_w: |
3292 | case Intrinsic::loongarch_lsx_vextrins_d: |
3293 | case Intrinsic::loongarch_lasx_xvshuf4i_d: |
3294 | case Intrinsic::loongarch_lasx_xvpermi_w: |
3295 | case Intrinsic::loongarch_lasx_xvpermi_q: |
3296 | case Intrinsic::loongarch_lasx_xvbitseli_b: |
3297 | case Intrinsic::loongarch_lasx_xvextrins_b: |
3298 | case Intrinsic::loongarch_lasx_xvextrins_h: |
3299 | case Intrinsic::loongarch_lasx_xvextrins_w: |
3300 | case Intrinsic::loongarch_lasx_xvextrins_d: |
3301 | return checkIntrinsicImmArg<8>(Op, ImmOp: 3, DAG); |
3302 | case Intrinsic::loongarch_lsx_vrepli_b: |
3303 | case Intrinsic::loongarch_lsx_vrepli_h: |
3304 | case Intrinsic::loongarch_lsx_vrepli_w: |
3305 | case Intrinsic::loongarch_lsx_vrepli_d: |
3306 | case Intrinsic::loongarch_lasx_xvrepli_b: |
3307 | case Intrinsic::loongarch_lasx_xvrepli_h: |
3308 | case Intrinsic::loongarch_lasx_xvrepli_w: |
3309 | case Intrinsic::loongarch_lasx_xvrepli_d: |
3310 | return checkIntrinsicImmArg<10>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
3311 | case Intrinsic::loongarch_lsx_vldi: |
3312 | case Intrinsic::loongarch_lasx_xvldi: |
3313 | return checkIntrinsicImmArg<13>(Op, ImmOp: 1, DAG, /*IsSigned=*/true); |
3314 | } |
3315 | } |
3316 | |
3317 | // Helper function that emits error message for intrinsics with chain and return |
3318 | // merge values of a UNDEF and the chain. |
3319 | static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, |
3320 | StringRef ErrorMsg, |
3321 | SelectionDAG &DAG) { |
3322 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
3323 | return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: Op.getValueType()), Op.getOperand(i: 0)}, |
3324 | dl: SDLoc(Op)); |
3325 | } |
3326 | |
3327 | SDValue |
3328 | LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
3329 | SelectionDAG &DAG) const { |
3330 | SDLoc DL(Op); |
3331 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3332 | EVT VT = Op.getValueType(); |
3333 | SDValue Chain = Op.getOperand(i: 0); |
3334 | const StringRef ErrorMsgOOR = "argument out of range" ; |
3335 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
3336 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
3337 | |
3338 | switch (Op.getConstantOperandVal(i: 1)) { |
3339 | default: |
3340 | return Op; |
3341 | case Intrinsic::loongarch_crc_w_b_w: |
3342 | case Intrinsic::loongarch_crc_w_h_w: |
3343 | case Intrinsic::loongarch_crc_w_w_w: |
3344 | case Intrinsic::loongarch_crc_w_d_w: |
3345 | case Intrinsic::loongarch_crcc_w_b_w: |
3346 | case Intrinsic::loongarch_crcc_w_h_w: |
3347 | case Intrinsic::loongarch_crcc_w_w_w: |
3348 | case Intrinsic::loongarch_crcc_w_d_w: |
3349 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
3350 | case Intrinsic::loongarch_csrrd_w: |
3351 | case Intrinsic::loongarch_csrrd_d: { |
3352 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
3353 | return !isUInt<14>(x: Imm) |
3354 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3355 | : DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other}, |
3356 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3357 | } |
3358 | case Intrinsic::loongarch_csrwr_w: |
3359 | case Intrinsic::loongarch_csrwr_d: { |
3360 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
3361 | return !isUInt<14>(x: Imm) |
3362 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3363 | : DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other}, |
3364 | Ops: {Chain, Op.getOperand(i: 2), |
3365 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3366 | } |
3367 | case Intrinsic::loongarch_csrxchg_w: |
3368 | case Intrinsic::loongarch_csrxchg_d: { |
3369 | unsigned Imm = Op.getConstantOperandVal(i: 4); |
3370 | return !isUInt<14>(x: Imm) |
3371 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3372 | : DAG.getNode(Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other}, |
3373 | Ops: {Chain, Op.getOperand(i: 2), Op.getOperand(i: 3), |
3374 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3375 | } |
3376 | case Intrinsic::loongarch_iocsrrd_d: { |
3377 | return DAG.getNode( |
3378 | Opcode: LoongArchISD::IOCSRRD_D, DL, ResultTys: {GRLenVT, MVT::Other}, |
3379 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op.getOperand(i: 2))}); |
3380 | } |
3381 | #define IOCSRRD_CASE(NAME, NODE) \ |
3382 | case Intrinsic::loongarch_##NAME: { \ |
3383 | return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \ |
3384 | {Chain, Op.getOperand(2)}); \ |
3385 | } |
3386 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
3387 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
3388 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
3389 | #undef IOCSRRD_CASE |
3390 | case Intrinsic::loongarch_cpucfg: { |
3391 | return DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other}, |
3392 | Ops: {Chain, Op.getOperand(i: 2)}); |
3393 | } |
3394 | case Intrinsic::loongarch_lddir_d: { |
3395 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
3396 | return !isUInt<8>(x: Imm) |
3397 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3398 | : Op; |
3399 | } |
3400 | case Intrinsic::loongarch_movfcsr2gr: { |
3401 | if (!Subtarget.hasBasicF()) |
3402 | return emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
3403 | unsigned Imm = Op.getConstantOperandVal(i: 2); |
3404 | return !isUInt<2>(x: Imm) |
3405 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3406 | : DAG.getNode(Opcode: LoongArchISD::MOVFCSR2GR, DL, ResultTys: {VT, MVT::Other}, |
3407 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
3408 | } |
3409 | case Intrinsic::loongarch_lsx_vld: |
3410 | case Intrinsic::loongarch_lsx_vldrepl_b: |
3411 | case Intrinsic::loongarch_lasx_xvld: |
3412 | case Intrinsic::loongarch_lasx_xvldrepl_b: |
3413 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
3414 | ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3415 | : SDValue(); |
3416 | case Intrinsic::loongarch_lsx_vldrepl_h: |
3417 | case Intrinsic::loongarch_lasx_xvldrepl_h: |
3418 | return !isShiftedInt<11, 1>( |
3419 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
3420 | ? emitIntrinsicWithChainErrorMessage( |
3421 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
3422 | : SDValue(); |
3423 | case Intrinsic::loongarch_lsx_vldrepl_w: |
3424 | case Intrinsic::loongarch_lasx_xvldrepl_w: |
3425 | return !isShiftedInt<10, 2>( |
3426 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
3427 | ? emitIntrinsicWithChainErrorMessage( |
3428 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
3429 | : SDValue(); |
3430 | case Intrinsic::loongarch_lsx_vldrepl_d: |
3431 | case Intrinsic::loongarch_lasx_xvldrepl_d: |
3432 | return !isShiftedInt<9, 3>( |
3433 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 3))->getSExtValue()) |
3434 | ? emitIntrinsicWithChainErrorMessage( |
3435 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
3436 | : SDValue(); |
3437 | } |
3438 | } |
3439 | |
3440 | // Helper function that emits error message for intrinsics with void return |
3441 | // value and return the chain. |
3442 | static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, |
3443 | SelectionDAG &DAG) { |
3444 | |
3445 | DAG.getContext()->emitError(ErrorStr: Op->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
3446 | return Op.getOperand(i: 0); |
3447 | } |
3448 | |
3449 | SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, |
3450 | SelectionDAG &DAG) const { |
3451 | SDLoc DL(Op); |
3452 | MVT GRLenVT = Subtarget.getGRLenVT(); |
3453 | SDValue Chain = Op.getOperand(i: 0); |
3454 | uint64_t IntrinsicEnum = Op.getConstantOperandVal(i: 1); |
3455 | SDValue Op2 = Op.getOperand(i: 2); |
3456 | const StringRef ErrorMsgOOR = "argument out of range" ; |
3457 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
3458 | const StringRef ErrorMsgReqLA32 = "requires loongarch32" ; |
3459 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
3460 | |
3461 | switch (IntrinsicEnum) { |
3462 | default: |
3463 | // TODO: Add more Intrinsics. |
3464 | return SDValue(); |
3465 | case Intrinsic::loongarch_cacop_d: |
3466 | case Intrinsic::loongarch_cacop_w: { |
3467 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) |
3468 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG); |
3469 | if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) |
3470 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA32, DAG); |
3471 | // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) |
3472 | unsigned Imm1 = Op2->getAsZExtVal(); |
3473 | int Imm2 = cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue(); |
3474 | if (!isUInt<5>(x: Imm1) || !isInt<12>(x: Imm2)) |
3475 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG); |
3476 | return Op; |
3477 | } |
3478 | case Intrinsic::loongarch_dbar: { |
3479 | unsigned Imm = Op2->getAsZExtVal(); |
3480 | return !isUInt<15>(x: Imm) |
3481 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3482 | : DAG.getNode(Opcode: LoongArchISD::DBAR, DL, VT: MVT::Other, N1: Chain, |
3483 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
3484 | } |
3485 | case Intrinsic::loongarch_ibar: { |
3486 | unsigned Imm = Op2->getAsZExtVal(); |
3487 | return !isUInt<15>(x: Imm) |
3488 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3489 | : DAG.getNode(Opcode: LoongArchISD::IBAR, DL, VT: MVT::Other, N1: Chain, |
3490 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
3491 | } |
3492 | case Intrinsic::loongarch_break: { |
3493 | unsigned Imm = Op2->getAsZExtVal(); |
3494 | return !isUInt<15>(x: Imm) |
3495 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3496 | : DAG.getNode(Opcode: LoongArchISD::BREAK, DL, VT: MVT::Other, N1: Chain, |
3497 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
3498 | } |
3499 | case Intrinsic::loongarch_movgr2fcsr: { |
3500 | if (!Subtarget.hasBasicF()) |
3501 | return emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqF, DAG); |
3502 | unsigned Imm = Op2->getAsZExtVal(); |
3503 | return !isUInt<2>(x: Imm) |
3504 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3505 | : DAG.getNode(Opcode: LoongArchISD::MOVGR2FCSR, DL, VT: MVT::Other, N1: Chain, |
3506 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT), |
3507 | N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, |
3508 | Operand: Op.getOperand(i: 3))); |
3509 | } |
3510 | case Intrinsic::loongarch_syscall: { |
3511 | unsigned Imm = Op2->getAsZExtVal(); |
3512 | return !isUInt<15>(x: Imm) |
3513 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3514 | : DAG.getNode(Opcode: LoongArchISD::SYSCALL, DL, VT: MVT::Other, N1: Chain, |
3515 | N2: DAG.getConstant(Val: Imm, DL, VT: GRLenVT)); |
3516 | } |
3517 | #define IOCSRWR_CASE(NAME, NODE) \ |
3518 | case Intrinsic::loongarch_##NAME: { \ |
3519 | SDValue Op3 = Op.getOperand(3); \ |
3520 | return Subtarget.is64Bit() \ |
3521 | ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \ |
3522 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
3523 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \ |
3524 | : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \ |
3525 | Op3); \ |
3526 | } |
3527 | IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B); |
3528 | IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H); |
3529 | IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W); |
3530 | #undef IOCSRWR_CASE |
3531 | case Intrinsic::loongarch_iocsrwr_d: { |
3532 | return !Subtarget.is64Bit() |
3533 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
3534 | : DAG.getNode(Opcode: LoongArchISD::IOCSRWR_D, DL, VT: MVT::Other, N1: Chain, |
3535 | N2: Op2, |
3536 | N3: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, |
3537 | Operand: Op.getOperand(i: 3))); |
3538 | } |
3539 | #define ASRT_LE_GT_CASE(NAME) \ |
3540 | case Intrinsic::loongarch_##NAME: { \ |
3541 | return !Subtarget.is64Bit() \ |
3542 | ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \ |
3543 | : Op; \ |
3544 | } |
3545 | ASRT_LE_GT_CASE(asrtle_d) |
3546 | ASRT_LE_GT_CASE(asrtgt_d) |
3547 | #undef ASRT_LE_GT_CASE |
3548 | case Intrinsic::loongarch_ldpte_d: { |
3549 | unsigned Imm = Op.getConstantOperandVal(i: 3); |
3550 | return !Subtarget.is64Bit() |
3551 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgReqLA64, DAG) |
3552 | : !isUInt<8>(x: Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3553 | : Op; |
3554 | } |
3555 | case Intrinsic::loongarch_lsx_vst: |
3556 | case Intrinsic::loongarch_lasx_xvst: |
3557 | return !isInt<12>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) |
3558 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3559 | : SDValue(); |
3560 | case Intrinsic::loongarch_lasx_xvstelm_b: |
3561 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3562 | !isUInt<5>(x: Op.getConstantOperandVal(i: 5))) |
3563 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3564 | : SDValue(); |
3565 | case Intrinsic::loongarch_lsx_vstelm_b: |
3566 | return (!isInt<8>(x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3567 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
3568 | ? emitIntrinsicErrorMessage(Op, ErrorMsg: ErrorMsgOOR, DAG) |
3569 | : SDValue(); |
3570 | case Intrinsic::loongarch_lasx_xvstelm_h: |
3571 | return (!isShiftedInt<8, 1>( |
3572 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3573 | !isUInt<4>(x: Op.getConstantOperandVal(i: 5))) |
3574 | ? emitIntrinsicErrorMessage( |
3575 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
3576 | : SDValue(); |
3577 | case Intrinsic::loongarch_lsx_vstelm_h: |
3578 | return (!isShiftedInt<8, 1>( |
3579 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3580 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
3581 | ? emitIntrinsicErrorMessage( |
3582 | Op, ErrorMsg: "argument out of range or not a multiple of 2" , DAG) |
3583 | : SDValue(); |
3584 | case Intrinsic::loongarch_lasx_xvstelm_w: |
3585 | return (!isShiftedInt<8, 2>( |
3586 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3587 | !isUInt<3>(x: Op.getConstantOperandVal(i: 5))) |
3588 | ? emitIntrinsicErrorMessage( |
3589 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
3590 | : SDValue(); |
3591 | case Intrinsic::loongarch_lsx_vstelm_w: |
3592 | return (!isShiftedInt<8, 2>( |
3593 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3594 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
3595 | ? emitIntrinsicErrorMessage( |
3596 | Op, ErrorMsg: "argument out of range or not a multiple of 4" , DAG) |
3597 | : SDValue(); |
3598 | case Intrinsic::loongarch_lasx_xvstelm_d: |
3599 | return (!isShiftedInt<8, 3>( |
3600 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3601 | !isUInt<2>(x: Op.getConstantOperandVal(i: 5))) |
3602 | ? emitIntrinsicErrorMessage( |
3603 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
3604 | : SDValue(); |
3605 | case Intrinsic::loongarch_lsx_vstelm_d: |
3606 | return (!isShiftedInt<8, 3>( |
3607 | x: cast<ConstantSDNode>(Val: Op.getOperand(i: 4))->getSExtValue()) || |
3608 | !isUInt<1>(x: Op.getConstantOperandVal(i: 5))) |
3609 | ? emitIntrinsicErrorMessage( |
3610 | Op, ErrorMsg: "argument out of range or not a multiple of 8" , DAG) |
3611 | : SDValue(); |
3612 | } |
3613 | } |
3614 | |
3615 | SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, |
3616 | SelectionDAG &DAG) const { |
3617 | SDLoc DL(Op); |
3618 | SDValue Lo = Op.getOperand(i: 0); |
3619 | SDValue Hi = Op.getOperand(i: 1); |
3620 | SDValue Shamt = Op.getOperand(i: 2); |
3621 | EVT VT = Lo.getValueType(); |
3622 | |
3623 | // if Shamt-GRLen < 0: // Shamt < GRLen |
3624 | // Lo = Lo << Shamt |
3625 | // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) |
3626 | // else: |
3627 | // Lo = 0 |
3628 | // Hi = Lo << (Shamt-GRLen) |
3629 | |
3630 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
3631 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
3632 | SDValue MinusGRLen = |
3633 | DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
3634 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
3635 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
3636 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
3637 | |
3638 | SDValue LoTrue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: Shamt); |
3639 | SDValue ShiftRight1Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: One); |
3640 | SDValue ShiftRightLo = |
3641 | DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShiftRight1Lo, N2: GRLenMinus1Shamt); |
3642 | SDValue ShiftLeftHi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: Shamt); |
3643 | SDValue HiTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftLeftHi, N2: ShiftRightLo); |
3644 | SDValue HiFalse = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Lo, N2: ShamtMinusGRLen); |
3645 | |
3646 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
3647 | |
3648 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: Zero); |
3649 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
3650 | |
3651 | SDValue Parts[2] = {Lo, Hi}; |
3652 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
3653 | } |
3654 | |
3655 | SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, |
3656 | SelectionDAG &DAG, |
3657 | bool IsSRA) const { |
3658 | SDLoc DL(Op); |
3659 | SDValue Lo = Op.getOperand(i: 0); |
3660 | SDValue Hi = Op.getOperand(i: 1); |
3661 | SDValue Shamt = Op.getOperand(i: 2); |
3662 | EVT VT = Lo.getValueType(); |
3663 | |
3664 | // SRA expansion: |
3665 | // if Shamt-GRLen < 0: // Shamt < GRLen |
3666 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
3667 | // Hi = Hi >>s Shamt |
3668 | // else: |
3669 | // Lo = Hi >>s (Shamt-GRLen); |
3670 | // Hi = Hi >>s (GRLen-1) |
3671 | // |
3672 | // SRL expansion: |
3673 | // if Shamt-GRLen < 0: // Shamt < GRLen |
3674 | // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) |
3675 | // Hi = Hi >>u Shamt |
3676 | // else: |
3677 | // Lo = Hi >>u (Shamt-GRLen); |
3678 | // Hi = 0; |
3679 | |
3680 | unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; |
3681 | |
3682 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT); |
3683 | SDValue One = DAG.getConstant(Val: 1, DL, VT); |
3684 | SDValue MinusGRLen = |
3685 | DAG.getSignedConstant(Val: -(int)Subtarget.getGRLen(), DL, VT); |
3686 | SDValue GRLenMinus1 = DAG.getConstant(Val: Subtarget.getGRLen() - 1, DL, VT); |
3687 | SDValue ShamtMinusGRLen = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Shamt, N2: MinusGRLen); |
3688 | SDValue GRLenMinus1Shamt = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Shamt, N2: GRLenMinus1); |
3689 | |
3690 | SDValue ShiftRightLo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo, N2: Shamt); |
3691 | SDValue ShiftLeftHi1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi, N2: One); |
3692 | SDValue ShiftLeftHi = |
3693 | DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShiftLeftHi1, N2: GRLenMinus1Shamt); |
3694 | SDValue LoTrue = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShiftRightLo, N2: ShiftLeftHi); |
3695 | SDValue HiTrue = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: Shamt); |
3696 | SDValue LoFalse = DAG.getNode(Opcode: ShiftRightOp, DL, VT, N1: Hi, N2: ShamtMinusGRLen); |
3697 | SDValue HiFalse = |
3698 | IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Hi, N2: GRLenMinus1) : Zero; |
3699 | |
3700 | SDValue CC = DAG.getSetCC(DL, VT, LHS: ShamtMinusGRLen, RHS: Zero, Cond: ISD::SETLT); |
3701 | |
3702 | Lo = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: LoTrue, N3: LoFalse); |
3703 | Hi = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: CC, N2: HiTrue, N3: HiFalse); |
3704 | |
3705 | SDValue Parts[2] = {Lo, Hi}; |
3706 | return DAG.getMergeValues(Ops: Parts, dl: DL); |
3707 | } |
3708 | |
3709 | // Returns the opcode of the target-specific SDNode that implements the 32-bit |
3710 | // form of the given Opcode. |
3711 | static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { |
3712 | switch (Opcode) { |
3713 | default: |
3714 | llvm_unreachable("Unexpected opcode" ); |
3715 | case ISD::SDIV: |
3716 | return LoongArchISD::DIV_W; |
3717 | case ISD::UDIV: |
3718 | return LoongArchISD::DIV_WU; |
3719 | case ISD::SREM: |
3720 | return LoongArchISD::MOD_W; |
3721 | case ISD::UREM: |
3722 | return LoongArchISD::MOD_WU; |
3723 | case ISD::SHL: |
3724 | return LoongArchISD::SLL_W; |
3725 | case ISD::SRA: |
3726 | return LoongArchISD::SRA_W; |
3727 | case ISD::SRL: |
3728 | return LoongArchISD::SRL_W; |
3729 | case ISD::ROTL: |
3730 | case ISD::ROTR: |
3731 | return LoongArchISD::ROTR_W; |
3732 | case ISD::CTTZ: |
3733 | return LoongArchISD::CTZ_W; |
3734 | case ISD::CTLZ: |
3735 | return LoongArchISD::CLZ_W; |
3736 | } |
3737 | } |
3738 | |
3739 | // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG |
3740 | // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would |
3741 | // otherwise be promoted to i64, making it difficult to select the |
3742 | // SLL_W/.../*W later one because the fact the operation was originally of |
3743 | // type i8/i16/i32 is lost. |
3744 | static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, |
3745 | unsigned ExtOpc = ISD::ANY_EXTEND) { |
3746 | SDLoc DL(N); |
3747 | LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(Opcode: N->getOpcode()); |
3748 | SDValue NewOp0, NewRes; |
3749 | |
3750 | switch (NumOp) { |
3751 | default: |
3752 | llvm_unreachable("Unexpected NumOp" ); |
3753 | case 1: { |
3754 | NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
3755 | NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, Operand: NewOp0); |
3756 | break; |
3757 | } |
3758 | case 2: { |
3759 | NewOp0 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
3760 | SDValue NewOp1 = DAG.getNode(Opcode: ExtOpc, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
3761 | if (N->getOpcode() == ISD::ROTL) { |
3762 | SDValue TmpOp = DAG.getConstant(Val: 32, DL, VT: MVT::i64); |
3763 | NewOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: TmpOp, N2: NewOp1); |
3764 | } |
3765 | NewRes = DAG.getNode(Opcode: WOpcode, DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
3766 | break; |
3767 | } |
3768 | // TODO:Handle more NumOp. |
3769 | } |
3770 | |
3771 | // ReplaceNodeResults requires we maintain the same type for the return |
3772 | // value. |
3773 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: NewRes); |
3774 | } |
3775 | |
3776 | // Converts the given 32-bit operation to a i64 operation with signed extension |
3777 | // semantic to reduce the signed extension instructions. |
3778 | static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { |
3779 | SDLoc DL(N); |
3780 | SDValue NewOp0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 0)); |
3781 | SDValue NewOp1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 1)); |
3782 | SDValue NewWOp = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::i64, N1: NewOp0, N2: NewOp1); |
3783 | SDValue NewRes = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i64, N1: NewWOp, |
3784 | N2: DAG.getValueType(MVT::i32)); |
3785 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: NewRes); |
3786 | } |
3787 | |
3788 | // Helper function that emits error message for intrinsics with/without chain |
3789 | // and return a UNDEF or and the chain as the results. |
3790 | static void emitErrorAndReplaceIntrinsicResults( |
3791 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, |
3792 | StringRef ErrorMsg, bool WithChain = true) { |
3793 | DAG.getContext()->emitError(ErrorStr: N->getOperationName(G: 0) + ": " + ErrorMsg + "." ); |
3794 | Results.push_back(Elt: DAG.getUNDEF(VT: N->getValueType(ResNo: 0))); |
3795 | if (!WithChain) |
3796 | return; |
3797 | Results.push_back(Elt: N->getOperand(Num: 0)); |
3798 | } |
3799 | |
3800 | template <unsigned N> |
3801 | static void |
3802 | replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, |
3803 | SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, |
3804 | unsigned ResOp) { |
3805 | const StringRef ErrorMsgOOR = "argument out of range" ; |
3806 | unsigned Imm = Node->getConstantOperandVal(Num: 2); |
3807 | if (!isUInt<N>(Imm)) { |
3808 | emitErrorAndReplaceIntrinsicResults(N: Node, Results, DAG, ErrorMsg: ErrorMsgOOR, |
3809 | /*WithChain=*/false); |
3810 | return; |
3811 | } |
3812 | SDLoc DL(Node); |
3813 | SDValue Vec = Node->getOperand(Num: 1); |
3814 | |
3815 | SDValue PickElt = |
3816 | DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), N1: Vec, |
3817 | N2: DAG.getConstant(Val: Imm, DL, VT: Subtarget.getGRLenVT()), |
3818 | N3: DAG.getValueType(Vec.getValueType().getVectorElementType())); |
3819 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: Node->getValueType(ResNo: 0), |
3820 | Operand: PickElt.getValue(R: 0))); |
3821 | } |
3822 | |
3823 | static void replaceVecCondBranchResults(SDNode *N, |
3824 | SmallVectorImpl<SDValue> &Results, |
3825 | SelectionDAG &DAG, |
3826 | const LoongArchSubtarget &Subtarget, |
3827 | unsigned ResOp) { |
3828 | SDLoc DL(N); |
3829 | SDValue Vec = N->getOperand(Num: 1); |
3830 | |
3831 | SDValue CB = DAG.getNode(Opcode: ResOp, DL, VT: Subtarget.getGRLenVT(), Operand: Vec); |
3832 | Results.push_back( |
3833 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N->getValueType(ResNo: 0), Operand: CB.getValue(R: 0))); |
3834 | } |
3835 | |
3836 | static void |
3837 | replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
3838 | SelectionDAG &DAG, |
3839 | const LoongArchSubtarget &Subtarget) { |
3840 | switch (N->getConstantOperandVal(Num: 0)) { |
3841 | default: |
3842 | llvm_unreachable("Unexpected Intrinsic." ); |
3843 | case Intrinsic::loongarch_lsx_vpickve2gr_b: |
3844 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
3845 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
3846 | break; |
3847 | case Intrinsic::loongarch_lsx_vpickve2gr_h: |
3848 | case Intrinsic::loongarch_lasx_xvpickve2gr_w: |
3849 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
3850 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
3851 | break; |
3852 | case Intrinsic::loongarch_lsx_vpickve2gr_w: |
3853 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
3854 | ResOp: LoongArchISD::VPICK_SEXT_ELT); |
3855 | break; |
3856 | case Intrinsic::loongarch_lsx_vpickve2gr_bu: |
3857 | replaceVPICKVE2GRResults<4>(Node: N, Results, DAG, Subtarget, |
3858 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
3859 | break; |
3860 | case Intrinsic::loongarch_lsx_vpickve2gr_hu: |
3861 | case Intrinsic::loongarch_lasx_xvpickve2gr_wu: |
3862 | replaceVPICKVE2GRResults<3>(Node: N, Results, DAG, Subtarget, |
3863 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
3864 | break; |
3865 | case Intrinsic::loongarch_lsx_vpickve2gr_wu: |
3866 | replaceVPICKVE2GRResults<2>(Node: N, Results, DAG, Subtarget, |
3867 | ResOp: LoongArchISD::VPICK_ZEXT_ELT); |
3868 | break; |
3869 | case Intrinsic::loongarch_lsx_bz_b: |
3870 | case Intrinsic::loongarch_lsx_bz_h: |
3871 | case Intrinsic::loongarch_lsx_bz_w: |
3872 | case Intrinsic::loongarch_lsx_bz_d: |
3873 | case Intrinsic::loongarch_lasx_xbz_b: |
3874 | case Intrinsic::loongarch_lasx_xbz_h: |
3875 | case Intrinsic::loongarch_lasx_xbz_w: |
3876 | case Intrinsic::loongarch_lasx_xbz_d: |
3877 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
3878 | ResOp: LoongArchISD::VALL_ZERO); |
3879 | break; |
3880 | case Intrinsic::loongarch_lsx_bz_v: |
3881 | case Intrinsic::loongarch_lasx_xbz_v: |
3882 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
3883 | ResOp: LoongArchISD::VANY_ZERO); |
3884 | break; |
3885 | case Intrinsic::loongarch_lsx_bnz_b: |
3886 | case Intrinsic::loongarch_lsx_bnz_h: |
3887 | case Intrinsic::loongarch_lsx_bnz_w: |
3888 | case Intrinsic::loongarch_lsx_bnz_d: |
3889 | case Intrinsic::loongarch_lasx_xbnz_b: |
3890 | case Intrinsic::loongarch_lasx_xbnz_h: |
3891 | case Intrinsic::loongarch_lasx_xbnz_w: |
3892 | case Intrinsic::loongarch_lasx_xbnz_d: |
3893 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
3894 | ResOp: LoongArchISD::VALL_NONZERO); |
3895 | break; |
3896 | case Intrinsic::loongarch_lsx_bnz_v: |
3897 | case Intrinsic::loongarch_lasx_xbnz_v: |
3898 | replaceVecCondBranchResults(N, Results, DAG, Subtarget, |
3899 | ResOp: LoongArchISD::VANY_NONZERO); |
3900 | break; |
3901 | } |
3902 | } |
3903 | |
3904 | static void replaceCMP_XCHG_128Results(SDNode *N, |
3905 | SmallVectorImpl<SDValue> &Results, |
3906 | SelectionDAG &DAG) { |
3907 | assert(N->getValueType(0) == MVT::i128 && |
3908 | "AtomicCmpSwap on types less than 128 should be legal" ); |
3909 | MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand(); |
3910 | |
3911 | unsigned Opcode; |
3912 | switch (MemOp->getMergedOrdering()) { |
3913 | case AtomicOrdering::Acquire: |
3914 | case AtomicOrdering::AcquireRelease: |
3915 | case AtomicOrdering::SequentiallyConsistent: |
3916 | Opcode = LoongArch::PseudoCmpXchg128Acquire; |
3917 | break; |
3918 | case AtomicOrdering::Monotonic: |
3919 | case AtomicOrdering::Release: |
3920 | Opcode = LoongArch::PseudoCmpXchg128; |
3921 | break; |
3922 | default: |
3923 | llvm_unreachable("Unexpected ordering!" ); |
3924 | } |
3925 | |
3926 | SDLoc DL(N); |
3927 | auto CmpVal = DAG.SplitScalar(N: N->getOperand(Num: 2), DL, LoVT: MVT::i64, HiVT: MVT::i64); |
3928 | auto NewVal = DAG.SplitScalar(N: N->getOperand(Num: 3), DL, LoVT: MVT::i64, HiVT: MVT::i64); |
3929 | SDValue Ops[] = {N->getOperand(Num: 1), CmpVal.first, CmpVal.second, |
3930 | NewVal.first, NewVal.second, N->getOperand(Num: 0)}; |
3931 | |
3932 | SDNode *CmpSwap = DAG.getMachineNode( |
3933 | Opcode, dl: SDLoc(N), VTs: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::i64, VT4: MVT::Other), |
3934 | Ops); |
3935 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp}); |
3936 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128, |
3937 | N1: SDValue(CmpSwap, 0), N2: SDValue(CmpSwap, 1))); |
3938 | Results.push_back(Elt: SDValue(CmpSwap, 3)); |
3939 | } |
3940 | |
3941 | void LoongArchTargetLowering::ReplaceNodeResults( |
3942 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
3943 | SDLoc DL(N); |
3944 | EVT VT = N->getValueType(ResNo: 0); |
3945 | switch (N->getOpcode()) { |
3946 | default: |
3947 | llvm_unreachable("Don't know how to legalize this operation" ); |
3948 | case ISD::ADD: |
3949 | case ISD::SUB: |
3950 | assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && |
3951 | "Unexpected custom legalisation" ); |
3952 | Results.push_back(Elt: customLegalizeToWOpWithSExt(N, DAG)); |
3953 | break; |
3954 | case ISD::SDIV: |
3955 | case ISD::UDIV: |
3956 | case ISD::SREM: |
3957 | case ISD::UREM: |
3958 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
3959 | "Unexpected custom legalisation" ); |
3960 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2, |
3961 | ExtOpc: Subtarget.hasDiv32() && VT == MVT::i32 |
3962 | ? ISD::ANY_EXTEND |
3963 | : ISD::SIGN_EXTEND)); |
3964 | break; |
3965 | case ISD::SHL: |
3966 | case ISD::SRA: |
3967 | case ISD::SRL: |
3968 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
3969 | "Unexpected custom legalisation" ); |
3970 | if (N->getOperand(Num: 1).getOpcode() != ISD::Constant) { |
3971 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
3972 | break; |
3973 | } |
3974 | break; |
3975 | case ISD::ROTL: |
3976 | case ISD::ROTR: |
3977 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
3978 | "Unexpected custom legalisation" ); |
3979 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 2)); |
3980 | break; |
3981 | case ISD::FP_TO_SINT: { |
3982 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
3983 | "Unexpected custom legalisation" ); |
3984 | SDValue Src = N->getOperand(Num: 0); |
3985 | EVT FVT = EVT::getFloatingPointVT(BitWidth: N->getValueSizeInBits(ResNo: 0)); |
3986 | if (getTypeAction(Context&: *DAG.getContext(), VT: Src.getValueType()) != |
3987 | TargetLowering::TypeSoftenFloat) { |
3988 | if (!isTypeLegal(VT: Src.getValueType())) |
3989 | return; |
3990 | if (Src.getValueType() == MVT::f16) |
3991 | Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f32, Operand: Src); |
3992 | SDValue Dst = DAG.getNode(Opcode: LoongArchISD::FTINT, DL, VT: FVT, Operand: Src); |
3993 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Dst)); |
3994 | return; |
3995 | } |
3996 | // If the FP type needs to be softened, emit a library call using the 'si' |
3997 | // version. If we left it to default legalization we'd end up with 'di'. |
3998 | RTLIB::Libcall LC; |
3999 | LC = RTLIB::getFPTOSINT(OpVT: Src.getValueType(), RetVT: VT); |
4000 | MakeLibCallOptions CallOptions; |
4001 | EVT OpVT = Src.getValueType(); |
4002 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: VT, Value: true); |
4003 | SDValue Chain = SDValue(); |
4004 | SDValue Result; |
4005 | std::tie(args&: Result, args&: Chain) = |
4006 | makeLibCall(DAG, LC, RetVT: VT, Ops: Src, CallOptions, dl: DL, Chain); |
4007 | Results.push_back(Elt: Result); |
4008 | break; |
4009 | } |
4010 | case ISD::BITCAST: { |
4011 | SDValue Src = N->getOperand(Num: 0); |
4012 | EVT SrcVT = Src.getValueType(); |
4013 | if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && |
4014 | Subtarget.hasBasicF()) { |
4015 | SDValue Dst = |
4016 | DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Src); |
4017 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Dst)); |
4018 | } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) { |
4019 | SDValue NewReg = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL, |
4020 | VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Src); |
4021 | SDValue RetReg = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, |
4022 | N1: NewReg.getValue(R: 0), N2: NewReg.getValue(R: 1)); |
4023 | Results.push_back(Elt: RetReg); |
4024 | } |
4025 | break; |
4026 | } |
4027 | case ISD::FP_TO_UINT: { |
4028 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
4029 | "Unexpected custom legalisation" ); |
4030 | auto &TLI = DAG.getTargetLoweringInfo(); |
4031 | SDValue Tmp1, Tmp2; |
4032 | TLI.expandFP_TO_UINT(N, Result&: Tmp1, Chain&: Tmp2, DAG); |
4033 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Tmp1)); |
4034 | break; |
4035 | } |
4036 | case ISD::BSWAP: { |
4037 | SDValue Src = N->getOperand(Num: 0); |
4038 | assert((VT == MVT::i16 || VT == MVT::i32) && |
4039 | "Unexpected custom legalization" ); |
4040 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4041 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
4042 | SDValue Tmp; |
4043 | switch (VT.getSizeInBits()) { |
4044 | default: |
4045 | llvm_unreachable("Unexpected operand width" ); |
4046 | case 16: |
4047 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2H, DL, VT: GRLenVT, Operand: NewSrc); |
4048 | break; |
4049 | case 32: |
4050 | // Only LA64 will get to here due to the size mismatch between VT and |
4051 | // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo. |
4052 | Tmp = DAG.getNode(Opcode: LoongArchISD::REVB_2W, DL, VT: GRLenVT, Operand: NewSrc); |
4053 | break; |
4054 | } |
4055 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
4056 | break; |
4057 | } |
4058 | case ISD::BITREVERSE: { |
4059 | SDValue Src = N->getOperand(Num: 0); |
4060 | assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) && |
4061 | "Unexpected custom legalization" ); |
4062 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4063 | SDValue NewSrc = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: GRLenVT, Operand: Src); |
4064 | SDValue Tmp; |
4065 | switch (VT.getSizeInBits()) { |
4066 | default: |
4067 | llvm_unreachable("Unexpected operand width" ); |
4068 | case 8: |
4069 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL, VT: GRLenVT, Operand: NewSrc); |
4070 | break; |
4071 | case 32: |
4072 | Tmp = DAG.getNode(Opcode: LoongArchISD::BITREV_W, DL, VT: GRLenVT, Operand: NewSrc); |
4073 | break; |
4074 | } |
4075 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Tmp)); |
4076 | break; |
4077 | } |
4078 | case ISD::CTLZ: |
4079 | case ISD::CTTZ: { |
4080 | assert(VT == MVT::i32 && Subtarget.is64Bit() && |
4081 | "Unexpected custom legalisation" ); |
4082 | Results.push_back(Elt: customLegalizeToWOp(N, DAG, NumOp: 1)); |
4083 | break; |
4084 | } |
4085 | case ISD::INTRINSIC_W_CHAIN: { |
4086 | SDValue Chain = N->getOperand(Num: 0); |
4087 | SDValue Op2 = N->getOperand(Num: 2); |
4088 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4089 | const StringRef ErrorMsgOOR = "argument out of range" ; |
4090 | const StringRef ErrorMsgReqLA64 = "requires loongarch64" ; |
4091 | const StringRef ErrorMsgReqF = "requires basic 'f' target feature" ; |
4092 | |
4093 | switch (N->getConstantOperandVal(Num: 1)) { |
4094 | default: |
4095 | llvm_unreachable("Unexpected Intrinsic." ); |
4096 | case Intrinsic::loongarch_movfcsr2gr: { |
4097 | if (!Subtarget.hasBasicF()) { |
4098 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqF); |
4099 | return; |
4100 | } |
4101 | unsigned Imm = Op2->getAsZExtVal(); |
4102 | if (!isUInt<2>(x: Imm)) { |
4103 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
4104 | return; |
4105 | } |
4106 | SDValue MOVFCSR2GRResults = DAG.getNode( |
4107 | Opcode: LoongArchISD::MOVFCSR2GR, DL: SDLoc(N), ResultTys: {MVT::i64, MVT::Other}, |
4108 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
4109 | Results.push_back( |
4110 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: MOVFCSR2GRResults.getValue(R: 0))); |
4111 | Results.push_back(Elt: MOVFCSR2GRResults.getValue(R: 1)); |
4112 | break; |
4113 | } |
4114 | #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \ |
4115 | case Intrinsic::loongarch_##NAME: { \ |
4116 | SDValue NODE = DAG.getNode( \ |
4117 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
4118 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \ |
4119 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
4120 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
4121 | Results.push_back(NODE.getValue(1)); \ |
4122 | break; \ |
4123 | } |
4124 | CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W) |
4125 | CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W) |
4126 | CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W) |
4127 | CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W) |
4128 | CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W) |
4129 | CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W) |
4130 | #undef CRC_CASE_EXT_BINARYOP |
4131 | |
4132 | #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \ |
4133 | case Intrinsic::loongarch_##NAME: { \ |
4134 | SDValue NODE = DAG.getNode( \ |
4135 | LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
4136 | {Chain, Op2, \ |
4137 | DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ |
4138 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ |
4139 | Results.push_back(NODE.getValue(1)); \ |
4140 | break; \ |
4141 | } |
4142 | CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) |
4143 | CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W) |
4144 | #undef CRC_CASE_EXT_UNARYOP |
4145 | #define CSR_CASE(ID) \ |
4146 | case Intrinsic::loongarch_##ID: { \ |
4147 | if (!Subtarget.is64Bit()) \ |
4148 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ |
4149 | break; \ |
4150 | } |
4151 | CSR_CASE(csrrd_d); |
4152 | CSR_CASE(csrwr_d); |
4153 | CSR_CASE(csrxchg_d); |
4154 | CSR_CASE(iocsrrd_d); |
4155 | #undef CSR_CASE |
4156 | case Intrinsic::loongarch_csrrd_w: { |
4157 | unsigned Imm = Op2->getAsZExtVal(); |
4158 | if (!isUInt<14>(x: Imm)) { |
4159 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
4160 | return; |
4161 | } |
4162 | SDValue CSRRDResults = |
4163 | DAG.getNode(Opcode: LoongArchISD::CSRRD, DL, ResultTys: {GRLenVT, MVT::Other}, |
4164 | Ops: {Chain, DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
4165 | Results.push_back( |
4166 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRRDResults.getValue(R: 0))); |
4167 | Results.push_back(Elt: CSRRDResults.getValue(R: 1)); |
4168 | break; |
4169 | } |
4170 | case Intrinsic::loongarch_csrwr_w: { |
4171 | unsigned Imm = N->getConstantOperandVal(Num: 3); |
4172 | if (!isUInt<14>(x: Imm)) { |
4173 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
4174 | return; |
4175 | } |
4176 | SDValue CSRWRResults = |
4177 | DAG.getNode(Opcode: LoongArchISD::CSRWR, DL, ResultTys: {GRLenVT, MVT::Other}, |
4178 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2), |
4179 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
4180 | Results.push_back( |
4181 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRWRResults.getValue(R: 0))); |
4182 | Results.push_back(Elt: CSRWRResults.getValue(R: 1)); |
4183 | break; |
4184 | } |
4185 | case Intrinsic::loongarch_csrxchg_w: { |
4186 | unsigned Imm = N->getConstantOperandVal(Num: 4); |
4187 | if (!isUInt<14>(x: Imm)) { |
4188 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgOOR); |
4189 | return; |
4190 | } |
4191 | SDValue CSRXCHGResults = DAG.getNode( |
4192 | Opcode: LoongArchISD::CSRXCHG, DL, ResultTys: {GRLenVT, MVT::Other}, |
4193 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2), |
4194 | DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: N->getOperand(Num: 3)), |
4195 | DAG.getConstant(Val: Imm, DL, VT: GRLenVT)}); |
4196 | Results.push_back( |
4197 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CSRXCHGResults.getValue(R: 0))); |
4198 | Results.push_back(Elt: CSRXCHGResults.getValue(R: 1)); |
4199 | break; |
4200 | } |
4201 | #define IOCSRRD_CASE(NAME, NODE) \ |
4202 | case Intrinsic::loongarch_##NAME: { \ |
4203 | SDValue IOCSRRDResults = \ |
4204 | DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \ |
4205 | {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \ |
4206 | Results.push_back( \ |
4207 | DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \ |
4208 | Results.push_back(IOCSRRDResults.getValue(1)); \ |
4209 | break; \ |
4210 | } |
4211 | IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B); |
4212 | IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H); |
4213 | IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W); |
4214 | #undef IOCSRRD_CASE |
4215 | case Intrinsic::loongarch_cpucfg: { |
4216 | SDValue CPUCFGResults = |
4217 | DAG.getNode(Opcode: LoongArchISD::CPUCFG, DL, ResultTys: {GRLenVT, MVT::Other}, |
4218 | Ops: {Chain, DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op2)}); |
4219 | Results.push_back( |
4220 | Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: CPUCFGResults.getValue(R: 0))); |
4221 | Results.push_back(Elt: CPUCFGResults.getValue(R: 1)); |
4222 | break; |
4223 | } |
4224 | case Intrinsic::loongarch_lddir_d: { |
4225 | if (!Subtarget.is64Bit()) { |
4226 | emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsg: ErrorMsgReqLA64); |
4227 | return; |
4228 | } |
4229 | break; |
4230 | } |
4231 | } |
4232 | break; |
4233 | } |
4234 | case ISD::READ_REGISTER: { |
4235 | if (Subtarget.is64Bit()) |
4236 | DAG.getContext()->emitError( |
4237 | ErrorStr: "On LA64, only 64-bit registers can be read." ); |
4238 | else |
4239 | DAG.getContext()->emitError( |
4240 | ErrorStr: "On LA32, only 32-bit registers can be read." ); |
4241 | Results.push_back(Elt: DAG.getUNDEF(VT)); |
4242 | Results.push_back(Elt: N->getOperand(Num: 0)); |
4243 | break; |
4244 | } |
4245 | case ISD::INTRINSIC_WO_CHAIN: { |
4246 | replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); |
4247 | break; |
4248 | } |
4249 | case ISD::LROUND: { |
4250 | SDValue Op0 = N->getOperand(Num: 0); |
4251 | EVT OpVT = Op0.getValueType(); |
4252 | RTLIB::Libcall LC = |
4253 | OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32; |
4254 | MakeLibCallOptions CallOptions; |
4255 | CallOptions.setTypeListBeforeSoften(OpsVT: OpVT, RetVT: MVT::i64, Value: true); |
4256 | SDValue Result = makeLibCall(DAG, LC, RetVT: MVT::i64, Ops: Op0, CallOptions, dl: DL).first; |
4257 | Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Result); |
4258 | Results.push_back(Elt: Result); |
4259 | break; |
4260 | } |
4261 | case ISD::ATOMIC_CMP_SWAP: { |
4262 | replaceCMP_XCHG_128Results(N, Results, DAG); |
4263 | break; |
4264 | } |
4265 | case ISD::TRUNCATE: { |
4266 | MVT VT = N->getSimpleValueType(ResNo: 0); |
4267 | if (getTypeAction(Context&: *DAG.getContext(), VT) != TypeWidenVector) |
4268 | return; |
4269 | |
4270 | MVT WidenVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT).getSimpleVT(); |
4271 | SDValue In = N->getOperand(Num: 0); |
4272 | EVT InVT = In.getValueType(); |
4273 | EVT InEltVT = InVT.getVectorElementType(); |
4274 | EVT EltVT = VT.getVectorElementType(); |
4275 | unsigned MinElts = VT.getVectorNumElements(); |
4276 | unsigned WidenNumElts = WidenVT.getVectorNumElements(); |
4277 | unsigned InBits = InVT.getSizeInBits(); |
4278 | |
4279 | if ((128 % InBits) == 0 && WidenVT.is128BitVector()) { |
4280 | if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) { |
4281 | int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits(); |
4282 | SmallVector<int, 16> TruncMask(WidenNumElts, -1); |
4283 | for (unsigned I = 0; I < MinElts; ++I) |
4284 | TruncMask[I] = Scale * I; |
4285 | |
4286 | unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits(); |
4287 | MVT SVT = In.getSimpleValueType().getScalarType(); |
4288 | MVT VT = MVT::getVectorVT(VT: SVT, NumElements: WidenNumElts); |
4289 | SDValue WidenIn = |
4290 | DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT, N1: DAG.getUNDEF(VT), N2: In, |
4291 | N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
4292 | assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) && |
4293 | "Illegal vector type in truncation" ); |
4294 | WidenIn = DAG.getBitcast(VT: WidenVT, V: WidenIn); |
4295 | Results.push_back( |
4296 | Elt: DAG.getVectorShuffle(VT: WidenVT, dl: DL, N1: WidenIn, N2: WidenIn, Mask: TruncMask)); |
4297 | return; |
4298 | } |
4299 | } |
4300 | |
4301 | break; |
4302 | } |
4303 | } |
4304 | } |
4305 | |
4306 | static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, |
4307 | TargetLowering::DAGCombinerInfo &DCI, |
4308 | const LoongArchSubtarget &Subtarget) { |
4309 | if (DCI.isBeforeLegalizeOps()) |
4310 | return SDValue(); |
4311 | |
4312 | SDValue FirstOperand = N->getOperand(Num: 0); |
4313 | SDValue SecondOperand = N->getOperand(Num: 1); |
4314 | unsigned FirstOperandOpc = FirstOperand.getOpcode(); |
4315 | EVT ValTy = N->getValueType(ResNo: 0); |
4316 | SDLoc DL(N); |
4317 | uint64_t lsb, msb; |
4318 | unsigned SMIdx, SMLen; |
4319 | ConstantSDNode *CN; |
4320 | SDValue NewOperand; |
4321 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4322 | |
4323 | // BSTRPICK requires the 32S feature. |
4324 | if (!Subtarget.has32S()) |
4325 | return SDValue(); |
4326 | |
4327 | // Op's second operand must be a shifted mask. |
4328 | if (!(CN = dyn_cast<ConstantSDNode>(Val&: SecondOperand)) || |
4329 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx&: SMIdx, MaskLen&: SMLen)) |
4330 | return SDValue(); |
4331 | |
4332 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { |
4333 | // Pattern match BSTRPICK. |
4334 | // $dst = and ((sra or srl) $src , lsb), (2**len - 1) |
4335 | // => BSTRPICK $dst, $src, msb, lsb |
4336 | // where msb = lsb + len - 1 |
4337 | |
4338 | // The second operand of the shift must be an immediate. |
4339 | if (!(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1)))) |
4340 | return SDValue(); |
4341 | |
4342 | lsb = CN->getZExtValue(); |
4343 | |
4344 | // Return if the shifted mask does not start at bit 0 or the sum of its |
4345 | // length and lsb exceeds the word's size. |
4346 | if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) |
4347 | return SDValue(); |
4348 | |
4349 | NewOperand = FirstOperand.getOperand(i: 0); |
4350 | } else { |
4351 | // Pattern match BSTRPICK. |
4352 | // $dst = and $src, (2**len- 1) , if len > 12 |
4353 | // => BSTRPICK $dst, $src, msb, lsb |
4354 | // where lsb = 0 and msb = len - 1 |
4355 | |
4356 | // If the mask is <= 0xfff, andi can be used instead. |
4357 | if (CN->getZExtValue() <= 0xfff) |
4358 | return SDValue(); |
4359 | |
4360 | // Return if the MSB exceeds. |
4361 | if (SMIdx + SMLen > ValTy.getSizeInBits()) |
4362 | return SDValue(); |
4363 | |
4364 | if (SMIdx > 0) { |
4365 | // Omit if the constant has more than 2 uses. This a conservative |
4366 | // decision. Whether it is a win depends on the HW microarchitecture. |
4367 | // However it should always be better for 1 and 2 uses. |
4368 | if (CN->use_size() > 2) |
4369 | return SDValue(); |
4370 | // Return if the constant can be composed by a single LU12I.W. |
4371 | if ((CN->getZExtValue() & 0xfff) == 0) |
4372 | return SDValue(); |
4373 | // Return if the constand can be composed by a single ADDI with |
4374 | // the zero register. |
4375 | if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0) |
4376 | return SDValue(); |
4377 | } |
4378 | |
4379 | lsb = SMIdx; |
4380 | NewOperand = FirstOperand; |
4381 | } |
4382 | |
4383 | msb = lsb + SMLen - 1; |
4384 | SDValue NR0 = DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, N1: NewOperand, |
4385 | N2: DAG.getConstant(Val: msb, DL, VT: GRLenVT), |
4386 | N3: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
4387 | if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0) |
4388 | return NR0; |
4389 | // Try to optimize to |
4390 | // bstrpick $Rd, $Rs, msb, lsb |
4391 | // slli $Rd, $Rd, lsb |
4392 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: ValTy, N1: NR0, |
4393 | N2: DAG.getConstant(Val: lsb, DL, VT: GRLenVT)); |
4394 | } |
4395 | |
4396 | static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, |
4397 | TargetLowering::DAGCombinerInfo &DCI, |
4398 | const LoongArchSubtarget &Subtarget) { |
4399 | // BSTRPICK requires the 32S feature. |
4400 | if (!Subtarget.has32S()) |
4401 | return SDValue(); |
4402 | |
4403 | if (DCI.isBeforeLegalizeOps()) |
4404 | return SDValue(); |
4405 | |
4406 | // $dst = srl (and $src, Mask), Shamt |
4407 | // => |
4408 | // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt |
4409 | // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 |
4410 | // |
4411 | |
4412 | SDValue FirstOperand = N->getOperand(Num: 0); |
4413 | ConstantSDNode *CN; |
4414 | EVT ValTy = N->getValueType(ResNo: 0); |
4415 | SDLoc DL(N); |
4416 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4417 | unsigned MaskIdx, MaskLen; |
4418 | uint64_t Shamt; |
4419 | |
4420 | // The first operand must be an AND and the second operand of the AND must be |
4421 | // a shifted mask. |
4422 | if (FirstOperand.getOpcode() != ISD::AND || |
4423 | !(CN = dyn_cast<ConstantSDNode>(Val: FirstOperand.getOperand(i: 1))) || |
4424 | !isShiftedMask_64(Value: CN->getZExtValue(), MaskIdx, MaskLen)) |
4425 | return SDValue(); |
4426 | |
4427 | // The second operand (shift amount) must be an immediate. |
4428 | if (!(CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))) |
4429 | return SDValue(); |
4430 | |
4431 | Shamt = CN->getZExtValue(); |
4432 | if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) |
4433 | return DAG.getNode(Opcode: LoongArchISD::BSTRPICK, DL, VT: ValTy, |
4434 | N1: FirstOperand->getOperand(Num: 0), |
4435 | N2: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
4436 | N3: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
4437 | |
4438 | return SDValue(); |
4439 | } |
4440 | |
4441 | // Helper to peek through bitops/trunc/setcc to determine size of source vector. |
4442 | // Allows BITCASTCombine to determine what size vector generated a <X x i1>. |
4443 | static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, |
4444 | unsigned Depth) { |
4445 | // Limit recursion. |
4446 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
4447 | return false; |
4448 | switch (Src.getOpcode()) { |
4449 | case ISD::SETCC: |
4450 | case ISD::TRUNCATE: |
4451 | return Src.getOperand(i: 0).getValueSizeInBits() == Size; |
4452 | case ISD::FREEZE: |
4453 | return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1); |
4454 | case ISD::AND: |
4455 | case ISD::XOR: |
4456 | case ISD::OR: |
4457 | return checkBitcastSrcVectorSize(Src: Src.getOperand(i: 0), Size, Depth: Depth + 1) && |
4458 | checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1); |
4459 | case ISD::SELECT: |
4460 | case ISD::VSELECT: |
4461 | return Src.getOperand(i: 0).getScalarValueSizeInBits() == 1 && |
4462 | checkBitcastSrcVectorSize(Src: Src.getOperand(i: 1), Size, Depth: Depth + 1) && |
4463 | checkBitcastSrcVectorSize(Src: Src.getOperand(i: 2), Size, Depth: Depth + 1); |
4464 | case ISD::BUILD_VECTOR: |
4465 | return ISD::isBuildVectorAllZeros(N: Src.getNode()) || |
4466 | ISD::isBuildVectorAllOnes(N: Src.getNode()); |
4467 | } |
4468 | return false; |
4469 | } |
4470 | |
4471 | // Helper to push sign extension of vXi1 SETCC result through bitops. |
4472 | static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, |
4473 | SDValue Src, const SDLoc &DL) { |
4474 | switch (Src.getOpcode()) { |
4475 | case ISD::SETCC: |
4476 | case ISD::FREEZE: |
4477 | case ISD::TRUNCATE: |
4478 | case ISD::BUILD_VECTOR: |
4479 | return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src); |
4480 | case ISD::AND: |
4481 | case ISD::XOR: |
4482 | case ISD::OR: |
4483 | return DAG.getNode( |
4484 | Opcode: Src.getOpcode(), DL, VT: SExtVT, |
4485 | N1: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 0), DL), |
4486 | N2: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL)); |
4487 | case ISD::SELECT: |
4488 | case ISD::VSELECT: |
4489 | return DAG.getSelect( |
4490 | DL, VT: SExtVT, Cond: Src.getOperand(i: 0), |
4491 | LHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 1), DL), |
4492 | RHS: signExtendBitcastSrcVector(DAG, SExtVT, Src: Src.getOperand(i: 2), DL)); |
4493 | } |
4494 | llvm_unreachable("Unexpected node type for vXi1 sign extension" ); |
4495 | } |
4496 | |
4497 | static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, |
4498 | TargetLowering::DAGCombinerInfo &DCI, |
4499 | const LoongArchSubtarget &Subtarget) { |
4500 | SDLoc DL(N); |
4501 | EVT VT = N->getValueType(ResNo: 0); |
4502 | SDValue Src = N->getOperand(Num: 0); |
4503 | EVT SrcVT = Src.getValueType(); |
4504 | |
4505 | if (!DCI.isBeforeLegalizeOps()) |
4506 | return SDValue(); |
4507 | |
4508 | if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) |
4509 | return SDValue(); |
4510 | |
4511 | unsigned Opc = ISD::DELETED_NODE; |
4512 | // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible |
4513 | if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) { |
4514 | bool UseLASX; |
4515 | EVT CmpVT = Src.getOperand(i: 0).getValueType(); |
4516 | EVT EltVT = CmpVT.getVectorElementType(); |
4517 | |
4518 | if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() <= 128) |
4519 | UseLASX = false; |
4520 | else if (Subtarget.has32S() && Subtarget.hasExtLASX() && |
4521 | CmpVT.getSizeInBits() <= 256) |
4522 | UseLASX = true; |
4523 | else |
4524 | return SDValue(); |
4525 | |
4526 | SDValue SrcN1 = Src.getOperand(i: 1); |
4527 | switch (cast<CondCodeSDNode>(Val: Src.getOperand(i: 2))->get()) { |
4528 | default: |
4529 | break; |
4530 | case ISD::SETEQ: |
4531 | // x == 0 => not (vmsknez.b x) |
4532 | if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8) |
4533 | Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ; |
4534 | break; |
4535 | case ISD::SETGT: |
4536 | // x > -1 => vmskgez.b x |
4537 | if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && EltVT == MVT::i8) |
4538 | Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; |
4539 | break; |
4540 | case ISD::SETGE: |
4541 | // x >= 0 => vmskgez.b x |
4542 | if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8) |
4543 | Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ; |
4544 | break; |
4545 | case ISD::SETLT: |
4546 | // x < 0 => vmskltz.{b,h,w,d} x |
4547 | if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && |
4548 | (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || |
4549 | EltVT == MVT::i64)) |
4550 | Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; |
4551 | break; |
4552 | case ISD::SETLE: |
4553 | // x <= -1 => vmskltz.{b,h,w,d} x |
4554 | if (ISD::isBuildVectorAllOnes(N: SrcN1.getNode()) && |
4555 | (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 || |
4556 | EltVT == MVT::i64)) |
4557 | Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; |
4558 | break; |
4559 | case ISD::SETNE: |
4560 | // x != 0 => vmsknez.b x |
4561 | if (ISD::isBuildVectorAllZeros(N: SrcN1.getNode()) && EltVT == MVT::i8) |
4562 | Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ; |
4563 | break; |
4564 | } |
4565 | } |
4566 | |
4567 | // Generate vXi1 using [X]VMSKLTZ |
4568 | if (Opc == ISD::DELETED_NODE) { |
4569 | MVT SExtVT; |
4570 | bool UseLASX = false; |
4571 | bool PropagateSExt = false; |
4572 | switch (SrcVT.getSimpleVT().SimpleTy) { |
4573 | default: |
4574 | return SDValue(); |
4575 | case MVT::v2i1: |
4576 | SExtVT = MVT::v2i64; |
4577 | break; |
4578 | case MVT::v4i1: |
4579 | SExtVT = MVT::v4i32; |
4580 | if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) { |
4581 | SExtVT = MVT::v4i64; |
4582 | UseLASX = true; |
4583 | PropagateSExt = true; |
4584 | } |
4585 | break; |
4586 | case MVT::v8i1: |
4587 | SExtVT = MVT::v8i16; |
4588 | if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) { |
4589 | SExtVT = MVT::v8i32; |
4590 | UseLASX = true; |
4591 | PropagateSExt = true; |
4592 | } |
4593 | break; |
4594 | case MVT::v16i1: |
4595 | SExtVT = MVT::v16i8; |
4596 | if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, Size: 256, Depth: 0)) { |
4597 | SExtVT = MVT::v16i16; |
4598 | UseLASX = true; |
4599 | PropagateSExt = true; |
4600 | } |
4601 | break; |
4602 | case MVT::v32i1: |
4603 | SExtVT = MVT::v32i8; |
4604 | UseLASX = true; |
4605 | break; |
4606 | }; |
4607 | if (UseLASX && !Subtarget.has32S() && !Subtarget.hasExtLASX()) |
4608 | return SDValue(); |
4609 | Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) |
4610 | : DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: SExtVT, Operand: Src); |
4611 | Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ; |
4612 | } else { |
4613 | Src = Src.getOperand(i: 0); |
4614 | } |
4615 | |
4616 | SDValue V = DAG.getNode(Opcode: Opc, DL, VT: MVT::i64, Operand: Src); |
4617 | EVT T = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SrcVT.getVectorNumElements()); |
4618 | V = DAG.getZExtOrTrunc(Op: V, DL, VT: T); |
4619 | return DAG.getBitcast(VT, V); |
4620 | } |
4621 | |
4622 | static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, |
4623 | TargetLowering::DAGCombinerInfo &DCI, |
4624 | const LoongArchSubtarget &Subtarget) { |
4625 | MVT GRLenVT = Subtarget.getGRLenVT(); |
4626 | EVT ValTy = N->getValueType(ResNo: 0); |
4627 | SDValue N0 = N->getOperand(Num: 0), N1 = N->getOperand(Num: 1); |
4628 | ConstantSDNode *CN0, *CN1; |
4629 | SDLoc DL(N); |
4630 | unsigned ValBits = ValTy.getSizeInBits(); |
4631 | unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; |
4632 | unsigned Shamt; |
4633 | bool SwapAndRetried = false; |
4634 | |
4635 | // BSTRPICK requires the 32S feature. |
4636 | if (!Subtarget.has32S()) |
4637 | return SDValue(); |
4638 | |
4639 | if (DCI.isBeforeLegalizeOps()) |
4640 | return SDValue(); |
4641 | |
4642 | if (ValBits != 32 && ValBits != 64) |
4643 | return SDValue(); |
4644 | |
4645 | Retry: |
4646 | // 1st pattern to match BSTRINS: |
4647 | // R = or (and X, mask0), (and (shl Y, lsb), mask1) |
4648 | // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 |
4649 | // => |
4650 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
4651 | if (N0.getOpcode() == ISD::AND && |
4652 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
4653 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
4654 | N1.getOpcode() == ISD::AND && N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
4655 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
4656 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
4657 | MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && |
4658 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
4659 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
4660 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
4661 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n" ); |
4662 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
4663 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
4664 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
4665 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
4666 | } |
4667 | |
4668 | // 2nd pattern to match BSTRINS: |
4669 | // R = or (and X, mask0), (shl (and Y, mask1), lsb) |
4670 | // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) |
4671 | // => |
4672 | // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) |
4673 | if (N0.getOpcode() == ISD::AND && |
4674 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
4675 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
4676 | N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
4677 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
4678 | (Shamt = CN1->getZExtValue()) == MaskIdx0 && |
4679 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
4680 | isShiftedMask_64(Value: CN1->getZExtValue(), MaskIdx&: MaskIdx1, MaskLen&: MaskLen1) && |
4681 | MaskLen0 == MaskLen1 && MaskIdx1 == 0 && |
4682 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
4683 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n" ); |
4684 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
4685 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
4686 | N3: DAG.getConstant(Val: (MaskIdx0 + MaskLen0 - 1), DL, VT: GRLenVT), |
4687 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
4688 | } |
4689 | |
4690 | // 3rd pattern to match BSTRINS: |
4691 | // R = or (and X, mask0), (and Y, mask1) |
4692 | // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 |
4693 | // => |
4694 | // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb |
4695 | // where msb = lsb + size - 1 |
4696 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && |
4697 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
4698 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
4699 | (MaskIdx0 + MaskLen0 <= 64) && |
4700 | (CN1 = dyn_cast<ConstantSDNode>(Val: N1->getOperand(Num: 1))) && |
4701 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
4702 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n" ); |
4703 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
4704 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), N1, |
4705 | N2: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)), |
4706 | N3: DAG.getConstant(Val: ValBits == 32 |
4707 | ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
4708 | : (MaskIdx0 + MaskLen0 - 1), |
4709 | DL, VT: GRLenVT), |
4710 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
4711 | } |
4712 | |
4713 | // 4th pattern to match BSTRINS: |
4714 | // R = or (and X, mask), (shl Y, shamt) |
4715 | // where mask = (2**shamt - 1) |
4716 | // => |
4717 | // R = BSTRINS X, Y, ValBits - 1, shamt |
4718 | // where ValBits = 32 or 64 |
4719 | if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && |
4720 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
4721 | isShiftedMask_64(Value: CN0->getZExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
4722 | MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
4723 | (Shamt = CN1->getZExtValue()) == MaskLen0 && |
4724 | (MaskIdx0 + MaskLen0 <= ValBits)) { |
4725 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n" ); |
4726 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
4727 | N2: N1.getOperand(i: 0), |
4728 | N3: DAG.getConstant(Val: (ValBits - 1), DL, VT: GRLenVT), |
4729 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
4730 | } |
4731 | |
4732 | // 5th pattern to match BSTRINS: |
4733 | // R = or (and X, mask), const |
4734 | // where ~mask = (2**size - 1) << lsb, mask & const = 0 |
4735 | // => |
4736 | // R = BSTRINS X, (const >> lsb), msb, lsb |
4737 | // where msb = lsb + size - 1 |
4738 | if (N0.getOpcode() == ISD::AND && |
4739 | (CN0 = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) && |
4740 | isShiftedMask_64(Value: ~CN0->getSExtValue(), MaskIdx&: MaskIdx0, MaskLen&: MaskLen0) && |
4741 | (CN1 = dyn_cast<ConstantSDNode>(Val&: N1)) && |
4742 | (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { |
4743 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n" ); |
4744 | return DAG.getNode( |
4745 | Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0.getOperand(i: 0), |
4746 | N2: DAG.getSignedConstant(Val: CN1->getSExtValue() >> MaskIdx0, DL, VT: ValTy), |
4747 | N3: DAG.getConstant(Val: ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) |
4748 | : (MaskIdx0 + MaskLen0 - 1), |
4749 | DL, VT: GRLenVT), |
4750 | N4: DAG.getConstant(Val: MaskIdx0, DL, VT: GRLenVT)); |
4751 | } |
4752 | |
4753 | // 6th pattern. |
4754 | // a = b | ((c & mask) << shamt), where all positions in b to be overwritten |
4755 | // by the incoming bits are known to be zero. |
4756 | // => |
4757 | // a = BSTRINS b, c, shamt + MaskLen - 1, shamt |
4758 | // |
4759 | // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th |
4760 | // pattern is more common than the 1st. So we put the 1st before the 6th in |
4761 | // order to match as many nodes as possible. |
4762 | ConstantSDNode *CNMask, *CNShamt; |
4763 | unsigned MaskIdx, MaskLen; |
4764 | if (N1.getOpcode() == ISD::SHL && N1.getOperand(i: 0).getOpcode() == ISD::AND && |
4765 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
4766 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
4767 | MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
4768 | CNShamt->getZExtValue() + MaskLen <= ValBits) { |
4769 | Shamt = CNShamt->getZExtValue(); |
4770 | APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); |
4771 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
4772 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n" ); |
4773 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
4774 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
4775 | N3: DAG.getConstant(Val: Shamt + MaskLen - 1, DL, VT: GRLenVT), |
4776 | N4: DAG.getConstant(Val: Shamt, DL, VT: GRLenVT)); |
4777 | } |
4778 | } |
4779 | |
4780 | // 7th pattern. |
4781 | // a = b | ((c << shamt) & shifted_mask), where all positions in b to be |
4782 | // overwritten by the incoming bits are known to be zero. |
4783 | // => |
4784 | // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx |
4785 | // |
4786 | // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd |
4787 | // before the 7th in order to match as many nodes as possible. |
4788 | if (N1.getOpcode() == ISD::AND && |
4789 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
4790 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen) && |
4791 | N1.getOperand(i: 0).getOpcode() == ISD::SHL && |
4792 | (CNShamt = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 0).getOperand(i: 1))) && |
4793 | CNShamt->getZExtValue() == MaskIdx) { |
4794 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
4795 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
4796 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n" ); |
4797 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
4798 | N2: N1.getOperand(i: 0).getOperand(i: 0), |
4799 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
4800 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
4801 | } |
4802 | } |
4803 | |
4804 | // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. |
4805 | if (!SwapAndRetried) { |
4806 | std::swap(a&: N0, b&: N1); |
4807 | SwapAndRetried = true; |
4808 | goto Retry; |
4809 | } |
4810 | |
4811 | SwapAndRetried = false; |
4812 | Retry2: |
4813 | // 8th pattern. |
4814 | // a = b | (c & shifted_mask), where all positions in b to be overwritten by |
4815 | // the incoming bits are known to be zero. |
4816 | // => |
4817 | // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx |
4818 | // |
4819 | // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So |
4820 | // we put it here in order to match as many nodes as possible or generate less |
4821 | // instructions. |
4822 | if (N1.getOpcode() == ISD::AND && |
4823 | (CNMask = dyn_cast<ConstantSDNode>(Val: N1.getOperand(i: 1))) && |
4824 | isShiftedMask_64(Value: CNMask->getZExtValue(), MaskIdx, MaskLen)) { |
4825 | APInt ShMask(ValBits, CNMask->getZExtValue()); |
4826 | if (ShMask.isSubsetOf(RHS: DAG.computeKnownBits(Op: N0).Zero)) { |
4827 | LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n" ); |
4828 | return DAG.getNode(Opcode: LoongArchISD::BSTRINS, DL, VT: ValTy, N1: N0, |
4829 | N2: DAG.getNode(Opcode: ISD::SRL, DL, VT: N1->getValueType(ResNo: 0), |
4830 | N1: N1->getOperand(Num: 0), |
4831 | N2: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)), |
4832 | N3: DAG.getConstant(Val: MaskIdx + MaskLen - 1, DL, VT: GRLenVT), |
4833 | N4: DAG.getConstant(Val: MaskIdx, DL, VT: GRLenVT)); |
4834 | } |
4835 | } |
4836 | // Swap N0/N1 and retry. |
4837 | if (!SwapAndRetried) { |
4838 | std::swap(a&: N0, b&: N1); |
4839 | SwapAndRetried = true; |
4840 | goto Retry2; |
4841 | } |
4842 | |
4843 | return SDValue(); |
4844 | } |
4845 | |
4846 | static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { |
4847 | ExtType = ISD::NON_EXTLOAD; |
4848 | |
4849 | switch (V.getNode()->getOpcode()) { |
4850 | case ISD::LOAD: { |
4851 | LoadSDNode *LoadNode = cast<LoadSDNode>(Val: V.getNode()); |
4852 | if ((LoadNode->getMemoryVT() == MVT::i8) || |
4853 | (LoadNode->getMemoryVT() == MVT::i16)) { |
4854 | ExtType = LoadNode->getExtensionType(); |
4855 | return true; |
4856 | } |
4857 | return false; |
4858 | } |
4859 | case ISD::AssertSext: { |
4860 | VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1)); |
4861 | if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { |
4862 | ExtType = ISD::SEXTLOAD; |
4863 | return true; |
4864 | } |
4865 | return false; |
4866 | } |
4867 | case ISD::AssertZext: { |
4868 | VTSDNode *TypeNode = cast<VTSDNode>(Val: V.getNode()->getOperand(Num: 1)); |
4869 | if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { |
4870 | ExtType = ISD::ZEXTLOAD; |
4871 | return true; |
4872 | } |
4873 | return false; |
4874 | } |
4875 | default: |
4876 | return false; |
4877 | } |
4878 | |
4879 | return false; |
4880 | } |
4881 | |
4882 | // Eliminate redundant truncation and zero-extension nodes. |
4883 | // * Case 1: |
4884 | // +------------+ +------------+ +------------+ |
4885 | // | Input1 | | Input2 | | CC | |
4886 | // +------------+ +------------+ +------------+ |
4887 | // | | | |
4888 | // V V +----+ |
4889 | // +------------+ +------------+ | |
4890 | // | TRUNCATE | | TRUNCATE | | |
4891 | // +------------+ +------------+ | |
4892 | // | | | |
4893 | // V V | |
4894 | // +------------+ +------------+ | |
4895 | // | ZERO_EXT | | ZERO_EXT | | |
4896 | // +------------+ +------------+ | |
4897 | // | | | |
4898 | // | +-------------+ | |
4899 | // V V | | |
4900 | // +----------------+ | | |
4901 | // | AND | | | |
4902 | // +----------------+ | | |
4903 | // | | | |
4904 | // +---------------+ | | |
4905 | // | | | |
4906 | // V V V |
4907 | // +-------------+ |
4908 | // | CMP | |
4909 | // +-------------+ |
4910 | // * Case 2: |
4911 | // +------------+ +------------+ +-------------+ +------------+ +------------+ |
4912 | // | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | |
4913 | // +------------+ +------------+ +-------------+ +------------+ +------------+ |
4914 | // | | | | | |
4915 | // V | | | | |
4916 | // +------------+ | | | | |
4917 | // | XOR |<---------------------+ | | |
4918 | // +------------+ | | | |
4919 | // | | | | |
4920 | // V V +---------------+ | |
4921 | // +------------+ +------------+ | | |
4922 | // | TRUNCATE | | TRUNCATE | | +-------------------------+ |
4923 | // +------------+ +------------+ | | |
4924 | // | | | | |
4925 | // V V | | |
4926 | // +------------+ +------------+ | | |
4927 | // | ZERO_EXT | | ZERO_EXT | | | |
4928 | // +------------+ +------------+ | | |
4929 | // | | | | |
4930 | // V V | | |
4931 | // +----------------+ | | |
4932 | // | AND | | | |
4933 | // +----------------+ | | |
4934 | // | | | |
4935 | // +---------------+ | | |
4936 | // | | | |
4937 | // V V V |
4938 | // +-------------+ |
4939 | // | CMP | |
4940 | // +-------------+ |
4941 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, |
4942 | TargetLowering::DAGCombinerInfo &DCI, |
4943 | const LoongArchSubtarget &Subtarget) { |
4944 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
4945 | |
4946 | SDNode *AndNode = N->getOperand(Num: 0).getNode(); |
4947 | if (AndNode->getOpcode() != ISD::AND) |
4948 | return SDValue(); |
4949 | |
4950 | SDValue AndInputValue2 = AndNode->getOperand(Num: 1); |
4951 | if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) |
4952 | return SDValue(); |
4953 | |
4954 | SDValue CmpInputValue = N->getOperand(Num: 1); |
4955 | SDValue AndInputValue1 = AndNode->getOperand(Num: 0); |
4956 | if (AndInputValue1.getOpcode() == ISD::XOR) { |
4957 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
4958 | return SDValue(); |
4959 | ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val: AndInputValue1.getOperand(i: 1)); |
4960 | if (!CN || CN->getSExtValue() != -1) |
4961 | return SDValue(); |
4962 | CN = dyn_cast<ConstantSDNode>(Val&: CmpInputValue); |
4963 | if (!CN || CN->getSExtValue() != 0) |
4964 | return SDValue(); |
4965 | AndInputValue1 = AndInputValue1.getOperand(i: 0); |
4966 | if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) |
4967 | return SDValue(); |
4968 | } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { |
4969 | if (AndInputValue2 != CmpInputValue) |
4970 | return SDValue(); |
4971 | } else { |
4972 | return SDValue(); |
4973 | } |
4974 | |
4975 | SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(Num: 0); |
4976 | if (TruncValue1.getOpcode() != ISD::TRUNCATE) |
4977 | return SDValue(); |
4978 | |
4979 | SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(Num: 0); |
4980 | if (TruncValue2.getOpcode() != ISD::TRUNCATE) |
4981 | return SDValue(); |
4982 | |
4983 | SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(Num: 0); |
4984 | SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(Num: 0); |
4985 | ISD::LoadExtType ExtType1; |
4986 | ISD::LoadExtType ExtType2; |
4987 | |
4988 | if (!checkValueWidth(V: TruncInputValue1, ExtType&: ExtType1) || |
4989 | !checkValueWidth(V: TruncInputValue2, ExtType&: ExtType2)) |
4990 | return SDValue(); |
4991 | |
4992 | if (TruncInputValue1->getValueType(ResNo: 0) != TruncInputValue2->getValueType(ResNo: 0) || |
4993 | AndNode->getValueType(ResNo: 0) != TruncInputValue1->getValueType(ResNo: 0)) |
4994 | return SDValue(); |
4995 | |
4996 | if ((ExtType2 != ISD::ZEXTLOAD) && |
4997 | ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) |
4998 | return SDValue(); |
4999 | |
5000 | // These truncation and zero-extension nodes are not necessary, remove them. |
5001 | SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N), VT: AndNode->getValueType(ResNo: 0), |
5002 | N1: TruncInputValue1, N2: TruncInputValue2); |
5003 | SDValue NewSetCC = |
5004 | DAG.getSetCC(DL: SDLoc(N), VT: N->getValueType(ResNo: 0), LHS: NewAnd, RHS: TruncInputValue2, Cond: CC); |
5005 | DAG.ReplaceAllUsesWith(From: N, To: NewSetCC.getNode()); |
5006 | return SDValue(N, 0); |
5007 | } |
5008 | |
5009 | // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. |
5010 | static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, |
5011 | TargetLowering::DAGCombinerInfo &DCI, |
5012 | const LoongArchSubtarget &Subtarget) { |
5013 | if (DCI.isBeforeLegalizeOps()) |
5014 | return SDValue(); |
5015 | |
5016 | SDValue Src = N->getOperand(Num: 0); |
5017 | if (Src.getOpcode() != LoongArchISD::REVB_2W) |
5018 | return SDValue(); |
5019 | |
5020 | return DAG.getNode(Opcode: LoongArchISD::BITREV_4B, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
5021 | Operand: Src.getOperand(i: 0)); |
5022 | } |
5023 | |
5024 | template <unsigned N> |
5025 | static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, |
5026 | SelectionDAG &DAG, |
5027 | const LoongArchSubtarget &Subtarget, |
5028 | bool IsSigned = false) { |
5029 | SDLoc DL(Node); |
5030 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
5031 | // Check the ImmArg. |
5032 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
5033 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
5034 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
5035 | ": argument out of range." ); |
5036 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: Subtarget.getGRLenVT()); |
5037 | } |
5038 | return DAG.getConstant(Val: CImm->getZExtValue(), DL, VT: Subtarget.getGRLenVT()); |
5039 | } |
5040 | |
5041 | template <unsigned N> |
5042 | static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, |
5043 | SelectionDAG &DAG, bool IsSigned = false) { |
5044 | SDLoc DL(Node); |
5045 | EVT ResTy = Node->getValueType(ResNo: 0); |
5046 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: ImmOp)); |
5047 | |
5048 | // Check the ImmArg. |
5049 | if ((IsSigned && !isInt<N>(CImm->getSExtValue())) || |
5050 | (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) { |
5051 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
5052 | ": argument out of range." ); |
5053 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
5054 | } |
5055 | return DAG.getConstant( |
5056 | Val: APInt(ResTy.getScalarType().getSizeInBits(), |
5057 | IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), |
5058 | DL, VT: ResTy); |
5059 | } |
5060 | |
5061 | static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { |
5062 | SDLoc DL(Node); |
5063 | EVT ResTy = Node->getValueType(ResNo: 0); |
5064 | SDValue Vec = Node->getOperand(Num: 2); |
5065 | SDValue Mask = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, DL, VT: ResTy); |
5066 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: Mask); |
5067 | } |
5068 | |
5069 | static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { |
5070 | SDLoc DL(Node); |
5071 | EVT ResTy = Node->getValueType(ResNo: 0); |
5072 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy); |
5073 | SDValue Bit = |
5074 | DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Node, DAG)); |
5075 | |
5076 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), |
5077 | N2: DAG.getNOT(DL, Val: Bit, VT: ResTy)); |
5078 | } |
5079 | |
5080 | template <unsigned N> |
5081 | static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { |
5082 | SDLoc DL(Node); |
5083 | EVT ResTy = Node->getValueType(ResNo: 0); |
5084 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
5085 | // Check the unsigned ImmArg. |
5086 | if (!isUInt<N>(CImm->getZExtValue())) { |
5087 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
5088 | ": argument out of range." ); |
5089 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
5090 | } |
5091 | |
5092 | APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
5093 | SDValue Mask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy); |
5094 | |
5095 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: Mask); |
5096 | } |
5097 | |
5098 | template <unsigned N> |
5099 | static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { |
5100 | SDLoc DL(Node); |
5101 | EVT ResTy = Node->getValueType(ResNo: 0); |
5102 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
5103 | // Check the unsigned ImmArg. |
5104 | if (!isUInt<N>(CImm->getZExtValue())) { |
5105 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
5106 | ": argument out of range." ); |
5107 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
5108 | } |
5109 | |
5110 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
5111 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
5112 | return DAG.getNode(Opcode: ISD::OR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
5113 | } |
5114 | |
5115 | template <unsigned N> |
5116 | static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { |
5117 | SDLoc DL(Node); |
5118 | EVT ResTy = Node->getValueType(ResNo: 0); |
5119 | auto *CImm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
5120 | // Check the unsigned ImmArg. |
5121 | if (!isUInt<N>(CImm->getZExtValue())) { |
5122 | DAG.getContext()->emitError(ErrorStr: Node->getOperationName(G: 0) + |
5123 | ": argument out of range." ); |
5124 | return DAG.getNode(Opcode: ISD::UNDEF, DL, VT: ResTy); |
5125 | } |
5126 | |
5127 | APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); |
5128 | SDValue BitImm = DAG.getConstant(Val: Imm, DL, VT: ResTy); |
5129 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: ResTy, N1: Node->getOperand(Num: 1), N2: BitImm); |
5130 | } |
5131 | |
5132 | static SDValue |
5133 | performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, |
5134 | TargetLowering::DAGCombinerInfo &DCI, |
5135 | const LoongArchSubtarget &Subtarget) { |
5136 | SDLoc DL(N); |
5137 | switch (N->getConstantOperandVal(Num: 0)) { |
5138 | default: |
5139 | break; |
5140 | case Intrinsic::loongarch_lsx_vadd_b: |
5141 | case Intrinsic::loongarch_lsx_vadd_h: |
5142 | case Intrinsic::loongarch_lsx_vadd_w: |
5143 | case Intrinsic::loongarch_lsx_vadd_d: |
5144 | case Intrinsic::loongarch_lasx_xvadd_b: |
5145 | case Intrinsic::loongarch_lasx_xvadd_h: |
5146 | case Intrinsic::loongarch_lasx_xvadd_w: |
5147 | case Intrinsic::loongarch_lasx_xvadd_d: |
5148 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5149 | N2: N->getOperand(Num: 2)); |
5150 | case Intrinsic::loongarch_lsx_vaddi_bu: |
5151 | case Intrinsic::loongarch_lsx_vaddi_hu: |
5152 | case Intrinsic::loongarch_lsx_vaddi_wu: |
5153 | case Intrinsic::loongarch_lsx_vaddi_du: |
5154 | case Intrinsic::loongarch_lasx_xvaddi_bu: |
5155 | case Intrinsic::loongarch_lasx_xvaddi_hu: |
5156 | case Intrinsic::loongarch_lasx_xvaddi_wu: |
5157 | case Intrinsic::loongarch_lasx_xvaddi_du: |
5158 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5159 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5160 | case Intrinsic::loongarch_lsx_vsub_b: |
5161 | case Intrinsic::loongarch_lsx_vsub_h: |
5162 | case Intrinsic::loongarch_lsx_vsub_w: |
5163 | case Intrinsic::loongarch_lsx_vsub_d: |
5164 | case Intrinsic::loongarch_lasx_xvsub_b: |
5165 | case Intrinsic::loongarch_lasx_xvsub_h: |
5166 | case Intrinsic::loongarch_lasx_xvsub_w: |
5167 | case Intrinsic::loongarch_lasx_xvsub_d: |
5168 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5169 | N2: N->getOperand(Num: 2)); |
5170 | case Intrinsic::loongarch_lsx_vsubi_bu: |
5171 | case Intrinsic::loongarch_lsx_vsubi_hu: |
5172 | case Intrinsic::loongarch_lsx_vsubi_wu: |
5173 | case Intrinsic::loongarch_lsx_vsubi_du: |
5174 | case Intrinsic::loongarch_lasx_xvsubi_bu: |
5175 | case Intrinsic::loongarch_lasx_xvsubi_hu: |
5176 | case Intrinsic::loongarch_lasx_xvsubi_wu: |
5177 | case Intrinsic::loongarch_lasx_xvsubi_du: |
5178 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5179 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5180 | case Intrinsic::loongarch_lsx_vneg_b: |
5181 | case Intrinsic::loongarch_lsx_vneg_h: |
5182 | case Intrinsic::loongarch_lsx_vneg_w: |
5183 | case Intrinsic::loongarch_lsx_vneg_d: |
5184 | case Intrinsic::loongarch_lasx_xvneg_b: |
5185 | case Intrinsic::loongarch_lasx_xvneg_h: |
5186 | case Intrinsic::loongarch_lasx_xvneg_w: |
5187 | case Intrinsic::loongarch_lasx_xvneg_d: |
5188 | return DAG.getNode( |
5189 | Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: 0), |
5190 | N1: DAG.getConstant( |
5191 | Val: APInt(N->getValueType(ResNo: 0).getScalarType().getSizeInBits(), 0, |
5192 | /*isSigned=*/true), |
5193 | DL: SDLoc(N), VT: N->getValueType(ResNo: 0)), |
5194 | N2: N->getOperand(Num: 1)); |
5195 | case Intrinsic::loongarch_lsx_vmax_b: |
5196 | case Intrinsic::loongarch_lsx_vmax_h: |
5197 | case Intrinsic::loongarch_lsx_vmax_w: |
5198 | case Intrinsic::loongarch_lsx_vmax_d: |
5199 | case Intrinsic::loongarch_lasx_xvmax_b: |
5200 | case Intrinsic::loongarch_lasx_xvmax_h: |
5201 | case Intrinsic::loongarch_lasx_xvmax_w: |
5202 | case Intrinsic::loongarch_lasx_xvmax_d: |
5203 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5204 | N2: N->getOperand(Num: 2)); |
5205 | case Intrinsic::loongarch_lsx_vmax_bu: |
5206 | case Intrinsic::loongarch_lsx_vmax_hu: |
5207 | case Intrinsic::loongarch_lsx_vmax_wu: |
5208 | case Intrinsic::loongarch_lsx_vmax_du: |
5209 | case Intrinsic::loongarch_lasx_xvmax_bu: |
5210 | case Intrinsic::loongarch_lasx_xvmax_hu: |
5211 | case Intrinsic::loongarch_lasx_xvmax_wu: |
5212 | case Intrinsic::loongarch_lasx_xvmax_du: |
5213 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5214 | N2: N->getOperand(Num: 2)); |
5215 | case Intrinsic::loongarch_lsx_vmaxi_b: |
5216 | case Intrinsic::loongarch_lsx_vmaxi_h: |
5217 | case Intrinsic::loongarch_lsx_vmaxi_w: |
5218 | case Intrinsic::loongarch_lsx_vmaxi_d: |
5219 | case Intrinsic::loongarch_lasx_xvmaxi_b: |
5220 | case Intrinsic::loongarch_lasx_xvmaxi_h: |
5221 | case Intrinsic::loongarch_lasx_xvmaxi_w: |
5222 | case Intrinsic::loongarch_lasx_xvmaxi_d: |
5223 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5224 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
5225 | case Intrinsic::loongarch_lsx_vmaxi_bu: |
5226 | case Intrinsic::loongarch_lsx_vmaxi_hu: |
5227 | case Intrinsic::loongarch_lsx_vmaxi_wu: |
5228 | case Intrinsic::loongarch_lsx_vmaxi_du: |
5229 | case Intrinsic::loongarch_lasx_xvmaxi_bu: |
5230 | case Intrinsic::loongarch_lasx_xvmaxi_hu: |
5231 | case Intrinsic::loongarch_lasx_xvmaxi_wu: |
5232 | case Intrinsic::loongarch_lasx_xvmaxi_du: |
5233 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5234 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5235 | case Intrinsic::loongarch_lsx_vmin_b: |
5236 | case Intrinsic::loongarch_lsx_vmin_h: |
5237 | case Intrinsic::loongarch_lsx_vmin_w: |
5238 | case Intrinsic::loongarch_lsx_vmin_d: |
5239 | case Intrinsic::loongarch_lasx_xvmin_b: |
5240 | case Intrinsic::loongarch_lasx_xvmin_h: |
5241 | case Intrinsic::loongarch_lasx_xvmin_w: |
5242 | case Intrinsic::loongarch_lasx_xvmin_d: |
5243 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5244 | N2: N->getOperand(Num: 2)); |
5245 | case Intrinsic::loongarch_lsx_vmin_bu: |
5246 | case Intrinsic::loongarch_lsx_vmin_hu: |
5247 | case Intrinsic::loongarch_lsx_vmin_wu: |
5248 | case Intrinsic::loongarch_lsx_vmin_du: |
5249 | case Intrinsic::loongarch_lasx_xvmin_bu: |
5250 | case Intrinsic::loongarch_lasx_xvmin_hu: |
5251 | case Intrinsic::loongarch_lasx_xvmin_wu: |
5252 | case Intrinsic::loongarch_lasx_xvmin_du: |
5253 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5254 | N2: N->getOperand(Num: 2)); |
5255 | case Intrinsic::loongarch_lsx_vmini_b: |
5256 | case Intrinsic::loongarch_lsx_vmini_h: |
5257 | case Intrinsic::loongarch_lsx_vmini_w: |
5258 | case Intrinsic::loongarch_lsx_vmini_d: |
5259 | case Intrinsic::loongarch_lasx_xvmini_b: |
5260 | case Intrinsic::loongarch_lasx_xvmini_h: |
5261 | case Intrinsic::loongarch_lasx_xvmini_w: |
5262 | case Intrinsic::loongarch_lasx_xvmini_d: |
5263 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5264 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG, /*IsSigned=*/true)); |
5265 | case Intrinsic::loongarch_lsx_vmini_bu: |
5266 | case Intrinsic::loongarch_lsx_vmini_hu: |
5267 | case Intrinsic::loongarch_lsx_vmini_wu: |
5268 | case Intrinsic::loongarch_lsx_vmini_du: |
5269 | case Intrinsic::loongarch_lasx_xvmini_bu: |
5270 | case Intrinsic::loongarch_lasx_xvmini_hu: |
5271 | case Intrinsic::loongarch_lasx_xvmini_wu: |
5272 | case Intrinsic::loongarch_lasx_xvmini_du: |
5273 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5274 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5275 | case Intrinsic::loongarch_lsx_vmul_b: |
5276 | case Intrinsic::loongarch_lsx_vmul_h: |
5277 | case Intrinsic::loongarch_lsx_vmul_w: |
5278 | case Intrinsic::loongarch_lsx_vmul_d: |
5279 | case Intrinsic::loongarch_lasx_xvmul_b: |
5280 | case Intrinsic::loongarch_lasx_xvmul_h: |
5281 | case Intrinsic::loongarch_lasx_xvmul_w: |
5282 | case Intrinsic::loongarch_lasx_xvmul_d: |
5283 | return DAG.getNode(Opcode: ISD::MUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5284 | N2: N->getOperand(Num: 2)); |
5285 | case Intrinsic::loongarch_lsx_vmadd_b: |
5286 | case Intrinsic::loongarch_lsx_vmadd_h: |
5287 | case Intrinsic::loongarch_lsx_vmadd_w: |
5288 | case Intrinsic::loongarch_lsx_vmadd_d: |
5289 | case Intrinsic::loongarch_lasx_xvmadd_b: |
5290 | case Intrinsic::loongarch_lasx_xvmadd_h: |
5291 | case Intrinsic::loongarch_lasx_xvmadd_w: |
5292 | case Intrinsic::loongarch_lasx_xvmadd_d: { |
5293 | EVT ResTy = N->getValueType(ResNo: 0); |
5294 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
5295 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
5296 | N2: N->getOperand(Num: 3))); |
5297 | } |
5298 | case Intrinsic::loongarch_lsx_vmsub_b: |
5299 | case Intrinsic::loongarch_lsx_vmsub_h: |
5300 | case Intrinsic::loongarch_lsx_vmsub_w: |
5301 | case Intrinsic::loongarch_lsx_vmsub_d: |
5302 | case Intrinsic::loongarch_lasx_xvmsub_b: |
5303 | case Intrinsic::loongarch_lasx_xvmsub_h: |
5304 | case Intrinsic::loongarch_lasx_xvmsub_w: |
5305 | case Intrinsic::loongarch_lasx_xvmsub_d: { |
5306 | EVT ResTy = N->getValueType(ResNo: 0); |
5307 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 1), |
5308 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(N), VT: ResTy, N1: N->getOperand(Num: 2), |
5309 | N2: N->getOperand(Num: 3))); |
5310 | } |
5311 | case Intrinsic::loongarch_lsx_vdiv_b: |
5312 | case Intrinsic::loongarch_lsx_vdiv_h: |
5313 | case Intrinsic::loongarch_lsx_vdiv_w: |
5314 | case Intrinsic::loongarch_lsx_vdiv_d: |
5315 | case Intrinsic::loongarch_lasx_xvdiv_b: |
5316 | case Intrinsic::loongarch_lasx_xvdiv_h: |
5317 | case Intrinsic::loongarch_lasx_xvdiv_w: |
5318 | case Intrinsic::loongarch_lasx_xvdiv_d: |
5319 | return DAG.getNode(Opcode: ISD::SDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5320 | N2: N->getOperand(Num: 2)); |
5321 | case Intrinsic::loongarch_lsx_vdiv_bu: |
5322 | case Intrinsic::loongarch_lsx_vdiv_hu: |
5323 | case Intrinsic::loongarch_lsx_vdiv_wu: |
5324 | case Intrinsic::loongarch_lsx_vdiv_du: |
5325 | case Intrinsic::loongarch_lasx_xvdiv_bu: |
5326 | case Intrinsic::loongarch_lasx_xvdiv_hu: |
5327 | case Intrinsic::loongarch_lasx_xvdiv_wu: |
5328 | case Intrinsic::loongarch_lasx_xvdiv_du: |
5329 | return DAG.getNode(Opcode: ISD::UDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5330 | N2: N->getOperand(Num: 2)); |
5331 | case Intrinsic::loongarch_lsx_vmod_b: |
5332 | case Intrinsic::loongarch_lsx_vmod_h: |
5333 | case Intrinsic::loongarch_lsx_vmod_w: |
5334 | case Intrinsic::loongarch_lsx_vmod_d: |
5335 | case Intrinsic::loongarch_lasx_xvmod_b: |
5336 | case Intrinsic::loongarch_lasx_xvmod_h: |
5337 | case Intrinsic::loongarch_lasx_xvmod_w: |
5338 | case Intrinsic::loongarch_lasx_xvmod_d: |
5339 | return DAG.getNode(Opcode: ISD::SREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5340 | N2: N->getOperand(Num: 2)); |
5341 | case Intrinsic::loongarch_lsx_vmod_bu: |
5342 | case Intrinsic::loongarch_lsx_vmod_hu: |
5343 | case Intrinsic::loongarch_lsx_vmod_wu: |
5344 | case Intrinsic::loongarch_lsx_vmod_du: |
5345 | case Intrinsic::loongarch_lasx_xvmod_bu: |
5346 | case Intrinsic::loongarch_lasx_xvmod_hu: |
5347 | case Intrinsic::loongarch_lasx_xvmod_wu: |
5348 | case Intrinsic::loongarch_lasx_xvmod_du: |
5349 | return DAG.getNode(Opcode: ISD::UREM, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5350 | N2: N->getOperand(Num: 2)); |
5351 | case Intrinsic::loongarch_lsx_vand_v: |
5352 | case Intrinsic::loongarch_lasx_xvand_v: |
5353 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5354 | N2: N->getOperand(Num: 2)); |
5355 | case Intrinsic::loongarch_lsx_vor_v: |
5356 | case Intrinsic::loongarch_lasx_xvor_v: |
5357 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5358 | N2: N->getOperand(Num: 2)); |
5359 | case Intrinsic::loongarch_lsx_vxor_v: |
5360 | case Intrinsic::loongarch_lasx_xvxor_v: |
5361 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5362 | N2: N->getOperand(Num: 2)); |
5363 | case Intrinsic::loongarch_lsx_vnor_v: |
5364 | case Intrinsic::loongarch_lasx_xvnor_v: { |
5365 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5366 | N2: N->getOperand(Num: 2)); |
5367 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
5368 | } |
5369 | case Intrinsic::loongarch_lsx_vandi_b: |
5370 | case Intrinsic::loongarch_lasx_xvandi_b: |
5371 | return DAG.getNode(Opcode: ISD::AND, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5372 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
5373 | case Intrinsic::loongarch_lsx_vori_b: |
5374 | case Intrinsic::loongarch_lasx_xvori_b: |
5375 | return DAG.getNode(Opcode: ISD::OR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5376 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
5377 | case Intrinsic::loongarch_lsx_vxori_b: |
5378 | case Intrinsic::loongarch_lasx_xvxori_b: |
5379 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5380 | N2: lowerVectorSplatImm<8>(Node: N, ImmOp: 2, DAG)); |
5381 | case Intrinsic::loongarch_lsx_vsll_b: |
5382 | case Intrinsic::loongarch_lsx_vsll_h: |
5383 | case Intrinsic::loongarch_lsx_vsll_w: |
5384 | case Intrinsic::loongarch_lsx_vsll_d: |
5385 | case Intrinsic::loongarch_lasx_xvsll_b: |
5386 | case Intrinsic::loongarch_lasx_xvsll_h: |
5387 | case Intrinsic::loongarch_lasx_xvsll_w: |
5388 | case Intrinsic::loongarch_lasx_xvsll_d: |
5389 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5390 | N2: truncateVecElts(Node: N, DAG)); |
5391 | case Intrinsic::loongarch_lsx_vslli_b: |
5392 | case Intrinsic::loongarch_lasx_xvslli_b: |
5393 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5394 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
5395 | case Intrinsic::loongarch_lsx_vslli_h: |
5396 | case Intrinsic::loongarch_lasx_xvslli_h: |
5397 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5398 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
5399 | case Intrinsic::loongarch_lsx_vslli_w: |
5400 | case Intrinsic::loongarch_lasx_xvslli_w: |
5401 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5402 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5403 | case Intrinsic::loongarch_lsx_vslli_d: |
5404 | case Intrinsic::loongarch_lasx_xvslli_d: |
5405 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5406 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
5407 | case Intrinsic::loongarch_lsx_vsrl_b: |
5408 | case Intrinsic::loongarch_lsx_vsrl_h: |
5409 | case Intrinsic::loongarch_lsx_vsrl_w: |
5410 | case Intrinsic::loongarch_lsx_vsrl_d: |
5411 | case Intrinsic::loongarch_lasx_xvsrl_b: |
5412 | case Intrinsic::loongarch_lasx_xvsrl_h: |
5413 | case Intrinsic::loongarch_lasx_xvsrl_w: |
5414 | case Intrinsic::loongarch_lasx_xvsrl_d: |
5415 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5416 | N2: truncateVecElts(Node: N, DAG)); |
5417 | case Intrinsic::loongarch_lsx_vsrli_b: |
5418 | case Intrinsic::loongarch_lasx_xvsrli_b: |
5419 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5420 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
5421 | case Intrinsic::loongarch_lsx_vsrli_h: |
5422 | case Intrinsic::loongarch_lasx_xvsrli_h: |
5423 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5424 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
5425 | case Intrinsic::loongarch_lsx_vsrli_w: |
5426 | case Intrinsic::loongarch_lasx_xvsrli_w: |
5427 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5428 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5429 | case Intrinsic::loongarch_lsx_vsrli_d: |
5430 | case Intrinsic::loongarch_lasx_xvsrli_d: |
5431 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5432 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
5433 | case Intrinsic::loongarch_lsx_vsra_b: |
5434 | case Intrinsic::loongarch_lsx_vsra_h: |
5435 | case Intrinsic::loongarch_lsx_vsra_w: |
5436 | case Intrinsic::loongarch_lsx_vsra_d: |
5437 | case Intrinsic::loongarch_lasx_xvsra_b: |
5438 | case Intrinsic::loongarch_lasx_xvsra_h: |
5439 | case Intrinsic::loongarch_lasx_xvsra_w: |
5440 | case Intrinsic::loongarch_lasx_xvsra_d: |
5441 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5442 | N2: truncateVecElts(Node: N, DAG)); |
5443 | case Intrinsic::loongarch_lsx_vsrai_b: |
5444 | case Intrinsic::loongarch_lasx_xvsrai_b: |
5445 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5446 | N2: lowerVectorSplatImm<3>(Node: N, ImmOp: 2, DAG)); |
5447 | case Intrinsic::loongarch_lsx_vsrai_h: |
5448 | case Intrinsic::loongarch_lasx_xvsrai_h: |
5449 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5450 | N2: lowerVectorSplatImm<4>(Node: N, ImmOp: 2, DAG)); |
5451 | case Intrinsic::loongarch_lsx_vsrai_w: |
5452 | case Intrinsic::loongarch_lasx_xvsrai_w: |
5453 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5454 | N2: lowerVectorSplatImm<5>(Node: N, ImmOp: 2, DAG)); |
5455 | case Intrinsic::loongarch_lsx_vsrai_d: |
5456 | case Intrinsic::loongarch_lasx_xvsrai_d: |
5457 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5458 | N2: lowerVectorSplatImm<6>(Node: N, ImmOp: 2, DAG)); |
5459 | case Intrinsic::loongarch_lsx_vclz_b: |
5460 | case Intrinsic::loongarch_lsx_vclz_h: |
5461 | case Intrinsic::loongarch_lsx_vclz_w: |
5462 | case Intrinsic::loongarch_lsx_vclz_d: |
5463 | case Intrinsic::loongarch_lasx_xvclz_b: |
5464 | case Intrinsic::loongarch_lasx_xvclz_h: |
5465 | case Intrinsic::loongarch_lasx_xvclz_w: |
5466 | case Intrinsic::loongarch_lasx_xvclz_d: |
5467 | return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
5468 | case Intrinsic::loongarch_lsx_vpcnt_b: |
5469 | case Intrinsic::loongarch_lsx_vpcnt_h: |
5470 | case Intrinsic::loongarch_lsx_vpcnt_w: |
5471 | case Intrinsic::loongarch_lsx_vpcnt_d: |
5472 | case Intrinsic::loongarch_lasx_xvpcnt_b: |
5473 | case Intrinsic::loongarch_lasx_xvpcnt_h: |
5474 | case Intrinsic::loongarch_lasx_xvpcnt_w: |
5475 | case Intrinsic::loongarch_lasx_xvpcnt_d: |
5476 | return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: N->getValueType(ResNo: 0), Operand: N->getOperand(Num: 1)); |
5477 | case Intrinsic::loongarch_lsx_vbitclr_b: |
5478 | case Intrinsic::loongarch_lsx_vbitclr_h: |
5479 | case Intrinsic::loongarch_lsx_vbitclr_w: |
5480 | case Intrinsic::loongarch_lsx_vbitclr_d: |
5481 | case Intrinsic::loongarch_lasx_xvbitclr_b: |
5482 | case Intrinsic::loongarch_lasx_xvbitclr_h: |
5483 | case Intrinsic::loongarch_lasx_xvbitclr_w: |
5484 | case Intrinsic::loongarch_lasx_xvbitclr_d: |
5485 | return lowerVectorBitClear(Node: N, DAG); |
5486 | case Intrinsic::loongarch_lsx_vbitclri_b: |
5487 | case Intrinsic::loongarch_lasx_xvbitclri_b: |
5488 | return lowerVectorBitClearImm<3>(Node: N, DAG); |
5489 | case Intrinsic::loongarch_lsx_vbitclri_h: |
5490 | case Intrinsic::loongarch_lasx_xvbitclri_h: |
5491 | return lowerVectorBitClearImm<4>(Node: N, DAG); |
5492 | case Intrinsic::loongarch_lsx_vbitclri_w: |
5493 | case Intrinsic::loongarch_lasx_xvbitclri_w: |
5494 | return lowerVectorBitClearImm<5>(Node: N, DAG); |
5495 | case Intrinsic::loongarch_lsx_vbitclri_d: |
5496 | case Intrinsic::loongarch_lasx_xvbitclri_d: |
5497 | return lowerVectorBitClearImm<6>(Node: N, DAG); |
5498 | case Intrinsic::loongarch_lsx_vbitset_b: |
5499 | case Intrinsic::loongarch_lsx_vbitset_h: |
5500 | case Intrinsic::loongarch_lsx_vbitset_w: |
5501 | case Intrinsic::loongarch_lsx_vbitset_d: |
5502 | case Intrinsic::loongarch_lasx_xvbitset_b: |
5503 | case Intrinsic::loongarch_lasx_xvbitset_h: |
5504 | case Intrinsic::loongarch_lasx_xvbitset_w: |
5505 | case Intrinsic::loongarch_lasx_xvbitset_d: { |
5506 | EVT VecTy = N->getValueType(ResNo: 0); |
5507 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
5508 | return DAG.getNode( |
5509 | Opcode: ISD::OR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
5510 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
5511 | } |
5512 | case Intrinsic::loongarch_lsx_vbitseti_b: |
5513 | case Intrinsic::loongarch_lasx_xvbitseti_b: |
5514 | return lowerVectorBitSetImm<3>(Node: N, DAG); |
5515 | case Intrinsic::loongarch_lsx_vbitseti_h: |
5516 | case Intrinsic::loongarch_lasx_xvbitseti_h: |
5517 | return lowerVectorBitSetImm<4>(Node: N, DAG); |
5518 | case Intrinsic::loongarch_lsx_vbitseti_w: |
5519 | case Intrinsic::loongarch_lasx_xvbitseti_w: |
5520 | return lowerVectorBitSetImm<5>(Node: N, DAG); |
5521 | case Intrinsic::loongarch_lsx_vbitseti_d: |
5522 | case Intrinsic::loongarch_lasx_xvbitseti_d: |
5523 | return lowerVectorBitSetImm<6>(Node: N, DAG); |
5524 | case Intrinsic::loongarch_lsx_vbitrev_b: |
5525 | case Intrinsic::loongarch_lsx_vbitrev_h: |
5526 | case Intrinsic::loongarch_lsx_vbitrev_w: |
5527 | case Intrinsic::loongarch_lsx_vbitrev_d: |
5528 | case Intrinsic::loongarch_lasx_xvbitrev_b: |
5529 | case Intrinsic::loongarch_lasx_xvbitrev_h: |
5530 | case Intrinsic::loongarch_lasx_xvbitrev_w: |
5531 | case Intrinsic::loongarch_lasx_xvbitrev_d: { |
5532 | EVT VecTy = N->getValueType(ResNo: 0); |
5533 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
5534 | return DAG.getNode( |
5535 | Opcode: ISD::XOR, DL, VT: VecTy, N1: N->getOperand(Num: 1), |
5536 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, N2: truncateVecElts(Node: N, DAG))); |
5537 | } |
5538 | case Intrinsic::loongarch_lsx_vbitrevi_b: |
5539 | case Intrinsic::loongarch_lasx_xvbitrevi_b: |
5540 | return lowerVectorBitRevImm<3>(Node: N, DAG); |
5541 | case Intrinsic::loongarch_lsx_vbitrevi_h: |
5542 | case Intrinsic::loongarch_lasx_xvbitrevi_h: |
5543 | return lowerVectorBitRevImm<4>(Node: N, DAG); |
5544 | case Intrinsic::loongarch_lsx_vbitrevi_w: |
5545 | case Intrinsic::loongarch_lasx_xvbitrevi_w: |
5546 | return lowerVectorBitRevImm<5>(Node: N, DAG); |
5547 | case Intrinsic::loongarch_lsx_vbitrevi_d: |
5548 | case Intrinsic::loongarch_lasx_xvbitrevi_d: |
5549 | return lowerVectorBitRevImm<6>(Node: N, DAG); |
5550 | case Intrinsic::loongarch_lsx_vfadd_s: |
5551 | case Intrinsic::loongarch_lsx_vfadd_d: |
5552 | case Intrinsic::loongarch_lasx_xvfadd_s: |
5553 | case Intrinsic::loongarch_lasx_xvfadd_d: |
5554 | return DAG.getNode(Opcode: ISD::FADD, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5555 | N2: N->getOperand(Num: 2)); |
5556 | case Intrinsic::loongarch_lsx_vfsub_s: |
5557 | case Intrinsic::loongarch_lsx_vfsub_d: |
5558 | case Intrinsic::loongarch_lasx_xvfsub_s: |
5559 | case Intrinsic::loongarch_lasx_xvfsub_d: |
5560 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5561 | N2: N->getOperand(Num: 2)); |
5562 | case Intrinsic::loongarch_lsx_vfmul_s: |
5563 | case Intrinsic::loongarch_lsx_vfmul_d: |
5564 | case Intrinsic::loongarch_lasx_xvfmul_s: |
5565 | case Intrinsic::loongarch_lasx_xvfmul_d: |
5566 | return DAG.getNode(Opcode: ISD::FMUL, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5567 | N2: N->getOperand(Num: 2)); |
5568 | case Intrinsic::loongarch_lsx_vfdiv_s: |
5569 | case Intrinsic::loongarch_lsx_vfdiv_d: |
5570 | case Intrinsic::loongarch_lasx_xvfdiv_s: |
5571 | case Intrinsic::loongarch_lasx_xvfdiv_d: |
5572 | return DAG.getNode(Opcode: ISD::FDIV, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5573 | N2: N->getOperand(Num: 2)); |
5574 | case Intrinsic::loongarch_lsx_vfmadd_s: |
5575 | case Intrinsic::loongarch_lsx_vfmadd_d: |
5576 | case Intrinsic::loongarch_lasx_xvfmadd_s: |
5577 | case Intrinsic::loongarch_lasx_xvfmadd_d: |
5578 | return DAG.getNode(Opcode: ISD::FMA, DL, VT: N->getValueType(ResNo: 0), N1: N->getOperand(Num: 1), |
5579 | N2: N->getOperand(Num: 2), N3: N->getOperand(Num: 3)); |
5580 | case Intrinsic::loongarch_lsx_vinsgr2vr_b: |
5581 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
5582 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
5583 | N3: legalizeIntrinsicImmArg<4>(Node: N, ImmOp: 3, DAG, Subtarget)); |
5584 | case Intrinsic::loongarch_lsx_vinsgr2vr_h: |
5585 | case Intrinsic::loongarch_lasx_xvinsgr2vr_w: |
5586 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
5587 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
5588 | N3: legalizeIntrinsicImmArg<3>(Node: N, ImmOp: 3, DAG, Subtarget)); |
5589 | case Intrinsic::loongarch_lsx_vinsgr2vr_w: |
5590 | case Intrinsic::loongarch_lasx_xvinsgr2vr_d: |
5591 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
5592 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
5593 | N3: legalizeIntrinsicImmArg<2>(Node: N, ImmOp: 3, DAG, Subtarget)); |
5594 | case Intrinsic::loongarch_lsx_vinsgr2vr_d: |
5595 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
5596 | N1: N->getOperand(Num: 1), N2: N->getOperand(Num: 2), |
5597 | N3: legalizeIntrinsicImmArg<1>(Node: N, ImmOp: 3, DAG, Subtarget)); |
5598 | case Intrinsic::loongarch_lsx_vreplgr2vr_b: |
5599 | case Intrinsic::loongarch_lsx_vreplgr2vr_h: |
5600 | case Intrinsic::loongarch_lsx_vreplgr2vr_w: |
5601 | case Intrinsic::loongarch_lsx_vreplgr2vr_d: |
5602 | case Intrinsic::loongarch_lasx_xvreplgr2vr_b: |
5603 | case Intrinsic::loongarch_lasx_xvreplgr2vr_h: |
5604 | case Intrinsic::loongarch_lasx_xvreplgr2vr_w: |
5605 | case Intrinsic::loongarch_lasx_xvreplgr2vr_d: |
5606 | return DAG.getNode(Opcode: LoongArchISD::VREPLGR2VR, DL, VT: N->getValueType(ResNo: 0), |
5607 | Operand: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(), |
5608 | Operand: N->getOperand(Num: 1))); |
5609 | case Intrinsic::loongarch_lsx_vreplve_b: |
5610 | case Intrinsic::loongarch_lsx_vreplve_h: |
5611 | case Intrinsic::loongarch_lsx_vreplve_w: |
5612 | case Intrinsic::loongarch_lsx_vreplve_d: |
5613 | case Intrinsic::loongarch_lasx_xvreplve_b: |
5614 | case Intrinsic::loongarch_lasx_xvreplve_h: |
5615 | case Intrinsic::loongarch_lasx_xvreplve_w: |
5616 | case Intrinsic::loongarch_lasx_xvreplve_d: |
5617 | return DAG.getNode(Opcode: LoongArchISD::VREPLVE, DL, VT: N->getValueType(ResNo: 0), |
5618 | N1: N->getOperand(Num: 1), |
5619 | N2: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getGRLenVT(), |
5620 | Operand: N->getOperand(Num: 2))); |
5621 | } |
5622 | return SDValue(); |
5623 | } |
5624 | |
5625 | static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, |
5626 | TargetLowering::DAGCombinerInfo &DCI, |
5627 | const LoongArchSubtarget &Subtarget) { |
5628 | // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the |
5629 | // conversion is unnecessary and can be replaced with the |
5630 | // MOVFR2GR_S_LA64 operand. |
5631 | SDValue Op0 = N->getOperand(Num: 0); |
5632 | if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64) |
5633 | return Op0.getOperand(i: 0); |
5634 | return SDValue(); |
5635 | } |
5636 | |
5637 | static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, |
5638 | TargetLowering::DAGCombinerInfo &DCI, |
5639 | const LoongArchSubtarget &Subtarget) { |
5640 | // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the |
5641 | // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64 |
5642 | // operand. |
5643 | SDValue Op0 = N->getOperand(Num: 0); |
5644 | if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) { |
5645 | assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) && |
5646 | "Unexpected value type!" ); |
5647 | return Op0.getOperand(i: 0); |
5648 | } |
5649 | return SDValue(); |
5650 | } |
5651 | |
5652 | static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, |
5653 | TargetLowering::DAGCombinerInfo &DCI, |
5654 | const LoongArchSubtarget &Subtarget) { |
5655 | MVT VT = N->getSimpleValueType(ResNo: 0); |
5656 | unsigned NumBits = VT.getScalarSizeInBits(); |
5657 | |
5658 | // Simplify the inputs. |
5659 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5660 | APInt DemandedMask(APInt::getAllOnes(numBits: NumBits)); |
5661 | if (TLI.SimplifyDemandedBits(Op: SDValue(N, 0), DemandedBits: DemandedMask, DCI)) |
5662 | return SDValue(N, 0); |
5663 | |
5664 | return SDValue(); |
5665 | } |
5666 | |
5667 | static SDValue |
5668 | performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, |
5669 | TargetLowering::DAGCombinerInfo &DCI, |
5670 | const LoongArchSubtarget &Subtarget) { |
5671 | SDValue Op0 = N->getOperand(Num: 0); |
5672 | SDLoc DL(N); |
5673 | |
5674 | // If the input to SplitPairF64 is just BuildPairF64 then the operation is |
5675 | // redundant. Instead, use BuildPairF64's operands directly. |
5676 | if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64) |
5677 | return DCI.CombineTo(N, Res0: Op0.getOperand(i: 0), Res1: Op0.getOperand(i: 1)); |
5678 | |
5679 | if (Op0->isUndef()) { |
5680 | SDValue Lo = DAG.getUNDEF(VT: MVT::i32); |
5681 | SDValue Hi = DAG.getUNDEF(VT: MVT::i32); |
5682 | return DCI.CombineTo(N, Res0: Lo, Res1: Hi); |
5683 | } |
5684 | |
5685 | // It's cheaper to materialise two 32-bit integers than to load a double |
5686 | // from the constant pool and transfer it to integer registers through the |
5687 | // stack. |
5688 | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: Op0)) { |
5689 | APInt V = C->getValueAPF().bitcastToAPInt(); |
5690 | SDValue Lo = DAG.getConstant(Val: V.trunc(width: 32), DL, VT: MVT::i32); |
5691 | SDValue Hi = DAG.getConstant(Val: V.lshr(shiftAmt: 32).trunc(width: 32), DL, VT: MVT::i32); |
5692 | return DCI.CombineTo(N, Res0: Lo, Res1: Hi); |
5693 | } |
5694 | |
5695 | return SDValue(); |
5696 | } |
5697 | |
5698 | SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, |
5699 | DAGCombinerInfo &DCI) const { |
5700 | SelectionDAG &DAG = DCI.DAG; |
5701 | switch (N->getOpcode()) { |
5702 | default: |
5703 | break; |
5704 | case ISD::AND: |
5705 | return performANDCombine(N, DAG, DCI, Subtarget); |
5706 | case ISD::OR: |
5707 | return performORCombine(N, DAG, DCI, Subtarget); |
5708 | case ISD::SETCC: |
5709 | return performSETCCCombine(N, DAG, DCI, Subtarget); |
5710 | case ISD::SRL: |
5711 | return performSRLCombine(N, DAG, DCI, Subtarget); |
5712 | case ISD::BITCAST: |
5713 | return performBITCASTCombine(N, DAG, DCI, Subtarget); |
5714 | case LoongArchISD::BITREV_W: |
5715 | return performBITREV_WCombine(N, DAG, DCI, Subtarget); |
5716 | case ISD::INTRINSIC_WO_CHAIN: |
5717 | return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); |
5718 | case LoongArchISD::MOVGR2FR_W_LA64: |
5719 | return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget); |
5720 | case LoongArchISD::MOVFR2GR_S_LA64: |
5721 | return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget); |
5722 | case LoongArchISD::VMSKLTZ: |
5723 | case LoongArchISD::XVMSKLTZ: |
5724 | return performVMSKLTZCombine(N, DAG, DCI, Subtarget); |
5725 | case LoongArchISD::SPLIT_PAIR_F64: |
5726 | return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); |
5727 | } |
5728 | return SDValue(); |
5729 | } |
5730 | |
5731 | static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, |
5732 | MachineBasicBlock *MBB) { |
5733 | if (!ZeroDivCheck) |
5734 | return MBB; |
5735 | |
5736 | // Build instructions: |
5737 | // MBB: |
5738 | // div(or mod) $dst, $dividend, $divisor |
5739 | // bne $divisor, $zero, SinkMBB |
5740 | // BreakMBB: |
5741 | // break 7 // BRK_DIVZERO |
5742 | // SinkMBB: |
5743 | // fallthrough |
5744 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
5745 | MachineFunction::iterator It = ++MBB->getIterator(); |
5746 | MachineFunction *MF = MBB->getParent(); |
5747 | auto BreakMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
5748 | auto SinkMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB); |
5749 | MF->insert(MBBI: It, MBB: BreakMBB); |
5750 | MF->insert(MBBI: It, MBB: SinkMBB); |
5751 | |
5752 | // Transfer the remainder of MBB and its successor edges to SinkMBB. |
5753 | SinkMBB->splice(Where: SinkMBB->end(), Other: MBB, From: std::next(x: MI.getIterator()), To: MBB->end()); |
5754 | SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
5755 | |
5756 | const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); |
5757 | DebugLoc DL = MI.getDebugLoc(); |
5758 | MachineOperand &Divisor = MI.getOperand(i: 2); |
5759 | Register DivisorReg = Divisor.getReg(); |
5760 | |
5761 | // MBB: |
5762 | BuildMI(BB: MBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BNE)) |
5763 | .addReg(RegNo: DivisorReg, flags: getKillRegState(B: Divisor.isKill())) |
5764 | .addReg(RegNo: LoongArch::R0) |
5765 | .addMBB(MBB: SinkMBB); |
5766 | MBB->addSuccessor(Succ: BreakMBB); |
5767 | MBB->addSuccessor(Succ: SinkMBB); |
5768 | |
5769 | // BreakMBB: |
5770 | // See linux header file arch/loongarch/include/uapi/asm/break.h for the |
5771 | // definition of BRK_DIVZERO. |
5772 | BuildMI(BB: BreakMBB, MIMD: DL, MCID: TII.get(Opcode: LoongArch::BREAK)).addImm(Val: 7 /*BRK_DIVZERO*/); |
5773 | BreakMBB->addSuccessor(Succ: SinkMBB); |
5774 | |
5775 | // Clear Divisor's kill flag. |
5776 | Divisor.setIsKill(false); |
5777 | |
5778 | return SinkMBB; |
5779 | } |
5780 | |
5781 | static MachineBasicBlock * |
5782 | emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, |
5783 | const LoongArchSubtarget &Subtarget) { |
5784 | unsigned CondOpc; |
5785 | switch (MI.getOpcode()) { |
5786 | default: |
5787 | llvm_unreachable("Unexpected opcode" ); |
5788 | case LoongArch::PseudoVBZ: |
5789 | CondOpc = LoongArch::VSETEQZ_V; |
5790 | break; |
5791 | case LoongArch::PseudoVBZ_B: |
5792 | CondOpc = LoongArch::VSETANYEQZ_B; |
5793 | break; |
5794 | case LoongArch::PseudoVBZ_H: |
5795 | CondOpc = LoongArch::VSETANYEQZ_H; |
5796 | break; |
5797 | case LoongArch::PseudoVBZ_W: |
5798 | CondOpc = LoongArch::VSETANYEQZ_W; |
5799 | break; |
5800 | case LoongArch::PseudoVBZ_D: |
5801 | CondOpc = LoongArch::VSETANYEQZ_D; |
5802 | break; |
5803 | case LoongArch::PseudoVBNZ: |
5804 | CondOpc = LoongArch::VSETNEZ_V; |
5805 | break; |
5806 | case LoongArch::PseudoVBNZ_B: |
5807 | CondOpc = LoongArch::VSETALLNEZ_B; |
5808 | break; |
5809 | case LoongArch::PseudoVBNZ_H: |
5810 | CondOpc = LoongArch::VSETALLNEZ_H; |
5811 | break; |
5812 | case LoongArch::PseudoVBNZ_W: |
5813 | CondOpc = LoongArch::VSETALLNEZ_W; |
5814 | break; |
5815 | case LoongArch::PseudoVBNZ_D: |
5816 | CondOpc = LoongArch::VSETALLNEZ_D; |
5817 | break; |
5818 | case LoongArch::PseudoXVBZ: |
5819 | CondOpc = LoongArch::XVSETEQZ_V; |
5820 | break; |
5821 | case LoongArch::PseudoXVBZ_B: |
5822 | CondOpc = LoongArch::XVSETANYEQZ_B; |
5823 | break; |
5824 | case LoongArch::PseudoXVBZ_H: |
5825 | CondOpc = LoongArch::XVSETANYEQZ_H; |
5826 | break; |
5827 | case LoongArch::PseudoXVBZ_W: |
5828 | CondOpc = LoongArch::XVSETANYEQZ_W; |
5829 | break; |
5830 | case LoongArch::PseudoXVBZ_D: |
5831 | CondOpc = LoongArch::XVSETANYEQZ_D; |
5832 | break; |
5833 | case LoongArch::PseudoXVBNZ: |
5834 | CondOpc = LoongArch::XVSETNEZ_V; |
5835 | break; |
5836 | case LoongArch::PseudoXVBNZ_B: |
5837 | CondOpc = LoongArch::XVSETALLNEZ_B; |
5838 | break; |
5839 | case LoongArch::PseudoXVBNZ_H: |
5840 | CondOpc = LoongArch::XVSETALLNEZ_H; |
5841 | break; |
5842 | case LoongArch::PseudoXVBNZ_W: |
5843 | CondOpc = LoongArch::XVSETALLNEZ_W; |
5844 | break; |
5845 | case LoongArch::PseudoXVBNZ_D: |
5846 | CondOpc = LoongArch::XVSETALLNEZ_D; |
5847 | break; |
5848 | } |
5849 | |
5850 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
5851 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
5852 | DebugLoc DL = MI.getDebugLoc(); |
5853 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
5854 | MachineFunction::iterator It = ++BB->getIterator(); |
5855 | |
5856 | MachineFunction *F = BB->getParent(); |
5857 | MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
5858 | MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
5859 | MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
5860 | |
5861 | F->insert(MBBI: It, MBB: FalseBB); |
5862 | F->insert(MBBI: It, MBB: TrueBB); |
5863 | F->insert(MBBI: It, MBB: SinkBB); |
5864 | |
5865 | // Transfer the remainder of MBB and its successor edges to Sink. |
5866 | SinkBB->splice(Where: SinkBB->end(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end()); |
5867 | SinkBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
5868 | |
5869 | // Insert the real instruction to BB. |
5870 | Register FCC = MRI.createVirtualRegister(RegClass: &LoongArch::CFRRegClass); |
5871 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: CondOpc), DestReg: FCC).addReg(RegNo: MI.getOperand(i: 1).getReg()); |
5872 | |
5873 | // Insert branch. |
5874 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::BCNEZ)).addReg(RegNo: FCC).addMBB(MBB: TrueBB); |
5875 | BB->addSuccessor(Succ: FalseBB); |
5876 | BB->addSuccessor(Succ: TrueBB); |
5877 | |
5878 | // FalseBB. |
5879 | Register RD1 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
5880 | BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD1) |
5881 | .addReg(RegNo: LoongArch::R0) |
5882 | .addImm(Val: 0); |
5883 | BuildMI(BB: FalseBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PseudoBR)).addMBB(MBB: SinkBB); |
5884 | FalseBB->addSuccessor(Succ: SinkBB); |
5885 | |
5886 | // TrueBB. |
5887 | Register RD2 = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
5888 | BuildMI(BB: TrueBB, MIMD: DL, MCID: TII->get(Opcode: LoongArch::ADDI_W), DestReg: RD2) |
5889 | .addReg(RegNo: LoongArch::R0) |
5890 | .addImm(Val: 1); |
5891 | TrueBB->addSuccessor(Succ: SinkBB); |
5892 | |
5893 | // SinkBB: merge the results. |
5894 | BuildMI(BB&: *SinkBB, I: SinkBB->begin(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::PHI), |
5895 | DestReg: MI.getOperand(i: 0).getReg()) |
5896 | .addReg(RegNo: RD1) |
5897 | .addMBB(MBB: FalseBB) |
5898 | .addReg(RegNo: RD2) |
5899 | .addMBB(MBB: TrueBB); |
5900 | |
5901 | // The pseudo instruction is gone now. |
5902 | MI.eraseFromParent(); |
5903 | return SinkBB; |
5904 | } |
5905 | |
5906 | static MachineBasicBlock * |
5907 | emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, |
5908 | const LoongArchSubtarget &Subtarget) { |
5909 | unsigned InsOp; |
5910 | unsigned HalfSize; |
5911 | switch (MI.getOpcode()) { |
5912 | default: |
5913 | llvm_unreachable("Unexpected opcode" ); |
5914 | case LoongArch::PseudoXVINSGR2VR_B: |
5915 | HalfSize = 16; |
5916 | InsOp = LoongArch::VINSGR2VR_B; |
5917 | break; |
5918 | case LoongArch::PseudoXVINSGR2VR_H: |
5919 | HalfSize = 8; |
5920 | InsOp = LoongArch::VINSGR2VR_H; |
5921 | break; |
5922 | } |
5923 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
5924 | const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; |
5925 | const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; |
5926 | DebugLoc DL = MI.getDebugLoc(); |
5927 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
5928 | // XDst = vector_insert XSrc, Elt, Idx |
5929 | Register XDst = MI.getOperand(i: 0).getReg(); |
5930 | Register XSrc = MI.getOperand(i: 1).getReg(); |
5931 | Register Elt = MI.getOperand(i: 2).getReg(); |
5932 | unsigned Idx = MI.getOperand(i: 3).getImm(); |
5933 | |
5934 | Register ScratchReg1 = XSrc; |
5935 | if (Idx >= HalfSize) { |
5936 | ScratchReg1 = MRI.createVirtualRegister(RegClass: RC); |
5937 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: ScratchReg1) |
5938 | .addReg(RegNo: XSrc) |
5939 | .addReg(RegNo: XSrc) |
5940 | .addImm(Val: 1); |
5941 | } |
5942 | |
5943 | Register ScratchSubReg1 = MRI.createVirtualRegister(RegClass: SubRC); |
5944 | Register ScratchSubReg2 = MRI.createVirtualRegister(RegClass: SubRC); |
5945 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::COPY), DestReg: ScratchSubReg1) |
5946 | .addReg(RegNo: ScratchReg1, flags: 0, SubReg: LoongArch::sub_128); |
5947 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsOp), DestReg: ScratchSubReg2) |
5948 | .addReg(RegNo: ScratchSubReg1) |
5949 | .addReg(RegNo: Elt) |
5950 | .addImm(Val: Idx >= HalfSize ? Idx - HalfSize : Idx); |
5951 | |
5952 | Register ScratchReg2 = XDst; |
5953 | if (Idx >= HalfSize) |
5954 | ScratchReg2 = MRI.createVirtualRegister(RegClass: RC); |
5955 | |
5956 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::SUBREG_TO_REG), DestReg: ScratchReg2) |
5957 | .addImm(Val: 0) |
5958 | .addReg(RegNo: ScratchSubReg2) |
5959 | .addImm(Val: LoongArch::sub_128); |
5960 | |
5961 | if (Idx >= HalfSize) |
5962 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPERMI_Q), DestReg: XDst) |
5963 | .addReg(RegNo: XSrc) |
5964 | .addReg(RegNo: ScratchReg2) |
5965 | .addImm(Val: 2); |
5966 | |
5967 | MI.eraseFromParent(); |
5968 | return BB; |
5969 | } |
5970 | |
5971 | static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI, |
5972 | MachineBasicBlock *BB, |
5973 | const LoongArchSubtarget &Subtarget) { |
5974 | assert(Subtarget.hasExtLSX()); |
5975 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
5976 | const TargetRegisterClass *RC = &LoongArch::LSX128RegClass; |
5977 | DebugLoc DL = MI.getDebugLoc(); |
5978 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
5979 | Register Dst = MI.getOperand(i: 0).getReg(); |
5980 | Register Src = MI.getOperand(i: 1).getReg(); |
5981 | Register ScratchReg1 = MRI.createVirtualRegister(RegClass: RC); |
5982 | Register ScratchReg2 = MRI.createVirtualRegister(RegClass: RC); |
5983 | Register ScratchReg3 = MRI.createVirtualRegister(RegClass: RC); |
5984 | |
5985 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VLDI), DestReg: ScratchReg1).addImm(Val: 0); |
5986 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, |
5987 | MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D |
5988 | : LoongArch::VINSGR2VR_W), |
5989 | DestReg: ScratchReg2) |
5990 | .addReg(RegNo: ScratchReg1) |
5991 | .addReg(RegNo: Src) |
5992 | .addImm(Val: 0); |
5993 | BuildMI( |
5994 | BB&: *BB, I&: MI, MIMD: DL, |
5995 | MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W), |
5996 | DestReg: ScratchReg3) |
5997 | .addReg(RegNo: ScratchReg2); |
5998 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, |
5999 | MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D |
6000 | : LoongArch::VPICKVE2GR_W), |
6001 | DestReg: Dst) |
6002 | .addReg(RegNo: ScratchReg3) |
6003 | .addImm(Val: 0); |
6004 | |
6005 | MI.eraseFromParent(); |
6006 | return BB; |
6007 | } |
6008 | |
6009 | static MachineBasicBlock * |
6010 | emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, |
6011 | const LoongArchSubtarget &Subtarget) { |
6012 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
6013 | const TargetRegisterClass *RC = &LoongArch::LSX128RegClass; |
6014 | const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
6015 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
6016 | Register Dst = MI.getOperand(i: 0).getReg(); |
6017 | Register Src = MI.getOperand(i: 1).getReg(); |
6018 | DebugLoc DL = MI.getDebugLoc(); |
6019 | unsigned EleBits = 8; |
6020 | unsigned NotOpc = 0; |
6021 | unsigned MskOpc; |
6022 | |
6023 | switch (MI.getOpcode()) { |
6024 | default: |
6025 | llvm_unreachable("Unexpected opcode" ); |
6026 | case LoongArch::PseudoVMSKLTZ_B: |
6027 | MskOpc = LoongArch::VMSKLTZ_B; |
6028 | break; |
6029 | case LoongArch::PseudoVMSKLTZ_H: |
6030 | MskOpc = LoongArch::VMSKLTZ_H; |
6031 | EleBits = 16; |
6032 | break; |
6033 | case LoongArch::PseudoVMSKLTZ_W: |
6034 | MskOpc = LoongArch::VMSKLTZ_W; |
6035 | EleBits = 32; |
6036 | break; |
6037 | case LoongArch::PseudoVMSKLTZ_D: |
6038 | MskOpc = LoongArch::VMSKLTZ_D; |
6039 | EleBits = 64; |
6040 | break; |
6041 | case LoongArch::PseudoVMSKGEZ_B: |
6042 | MskOpc = LoongArch::VMSKGEZ_B; |
6043 | break; |
6044 | case LoongArch::PseudoVMSKEQZ_B: |
6045 | MskOpc = LoongArch::VMSKNZ_B; |
6046 | NotOpc = LoongArch::VNOR_V; |
6047 | break; |
6048 | case LoongArch::PseudoVMSKNEZ_B: |
6049 | MskOpc = LoongArch::VMSKNZ_B; |
6050 | break; |
6051 | case LoongArch::PseudoXVMSKLTZ_B: |
6052 | MskOpc = LoongArch::XVMSKLTZ_B; |
6053 | RC = &LoongArch::LASX256RegClass; |
6054 | break; |
6055 | case LoongArch::PseudoXVMSKLTZ_H: |
6056 | MskOpc = LoongArch::XVMSKLTZ_H; |
6057 | RC = &LoongArch::LASX256RegClass; |
6058 | EleBits = 16; |
6059 | break; |
6060 | case LoongArch::PseudoXVMSKLTZ_W: |
6061 | MskOpc = LoongArch::XVMSKLTZ_W; |
6062 | RC = &LoongArch::LASX256RegClass; |
6063 | EleBits = 32; |
6064 | break; |
6065 | case LoongArch::PseudoXVMSKLTZ_D: |
6066 | MskOpc = LoongArch::XVMSKLTZ_D; |
6067 | RC = &LoongArch::LASX256RegClass; |
6068 | EleBits = 64; |
6069 | break; |
6070 | case LoongArch::PseudoXVMSKGEZ_B: |
6071 | MskOpc = LoongArch::XVMSKGEZ_B; |
6072 | RC = &LoongArch::LASX256RegClass; |
6073 | break; |
6074 | case LoongArch::PseudoXVMSKEQZ_B: |
6075 | MskOpc = LoongArch::XVMSKNZ_B; |
6076 | NotOpc = LoongArch::XVNOR_V; |
6077 | RC = &LoongArch::LASX256RegClass; |
6078 | break; |
6079 | case LoongArch::PseudoXVMSKNEZ_B: |
6080 | MskOpc = LoongArch::XVMSKNZ_B; |
6081 | RC = &LoongArch::LASX256RegClass; |
6082 | break; |
6083 | } |
6084 | |
6085 | Register Msk = MRI.createVirtualRegister(RegClass: RC); |
6086 | if (NotOpc) { |
6087 | Register Tmp = MRI.createVirtualRegister(RegClass: RC); |
6088 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Tmp).addReg(RegNo: Src); |
6089 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: NotOpc), DestReg: Msk) |
6090 | .addReg(RegNo: Tmp, flags: RegState::Kill) |
6091 | .addReg(RegNo: Tmp, flags: RegState::Kill); |
6092 | } else { |
6093 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MskOpc), DestReg: Msk).addReg(RegNo: Src); |
6094 | } |
6095 | |
6096 | if (TRI->getRegSizeInBits(RC: *RC) > 128) { |
6097 | Register Lo = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
6098 | Register Hi = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
6099 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Lo) |
6100 | .addReg(RegNo: Msk) |
6101 | .addImm(Val: 0); |
6102 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::XVPICKVE2GR_WU), DestReg: Hi) |
6103 | .addReg(RegNo: Msk, flags: RegState::Kill) |
6104 | .addImm(Val: 4); |
6105 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, |
6106 | MCID: TII->get(Opcode: Subtarget.is64Bit() ? LoongArch::BSTRINS_D |
6107 | : LoongArch::BSTRINS_W), |
6108 | DestReg: Dst) |
6109 | .addReg(RegNo: Lo, flags: RegState::Kill) |
6110 | .addReg(RegNo: Hi, flags: RegState::Kill) |
6111 | .addImm(Val: 256 / EleBits - 1) |
6112 | .addImm(Val: 128 / EleBits); |
6113 | } else { |
6114 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::VPICKVE2GR_HU), DestReg: Dst) |
6115 | .addReg(RegNo: Msk, flags: RegState::Kill) |
6116 | .addImm(Val: 0); |
6117 | } |
6118 | |
6119 | MI.eraseFromParent(); |
6120 | return BB; |
6121 | } |
6122 | |
6123 | static MachineBasicBlock * |
6124 | emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, |
6125 | const LoongArchSubtarget &Subtarget) { |
6126 | assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo && |
6127 | "Unexpected instruction" ); |
6128 | |
6129 | MachineFunction &MF = *BB->getParent(); |
6130 | DebugLoc DL = MI.getDebugLoc(); |
6131 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
6132 | Register LoReg = MI.getOperand(i: 0).getReg(); |
6133 | Register HiReg = MI.getOperand(i: 1).getReg(); |
6134 | Register SrcReg = MI.getOperand(i: 2).getReg(); |
6135 | |
6136 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFR2GR_S_64), DestReg: LoReg).addReg(RegNo: SrcReg); |
6137 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVFRH2GR_S), DestReg: HiReg) |
6138 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: MI.getOperand(i: 2).isKill())); |
6139 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
6140 | return BB; |
6141 | } |
6142 | |
6143 | static MachineBasicBlock * |
6144 | emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, |
6145 | const LoongArchSubtarget &Subtarget) { |
6146 | assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo && |
6147 | "Unexpected instruction" ); |
6148 | |
6149 | MachineFunction &MF = *BB->getParent(); |
6150 | DebugLoc DL = MI.getDebugLoc(); |
6151 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
6152 | MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); |
6153 | Register TmpReg = MRI.createVirtualRegister(RegClass: &LoongArch::FPR64RegClass); |
6154 | Register DstReg = MI.getOperand(i: 0).getReg(); |
6155 | Register LoReg = MI.getOperand(i: 1).getReg(); |
6156 | Register HiReg = MI.getOperand(i: 2).getReg(); |
6157 | |
6158 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FR_W_64), DestReg: TmpReg) |
6159 | .addReg(RegNo: LoReg, flags: getKillRegState(B: MI.getOperand(i: 1).isKill())); |
6160 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: LoongArch::MOVGR2FRH_W), DestReg: DstReg) |
6161 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
6162 | .addReg(RegNo: HiReg, flags: getKillRegState(B: MI.getOperand(i: 2).isKill())); |
6163 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
6164 | return BB; |
6165 | } |
6166 | |
6167 | static bool isSelectPseudo(MachineInstr &MI) { |
6168 | switch (MI.getOpcode()) { |
6169 | default: |
6170 | return false; |
6171 | case LoongArch::Select_GPR_Using_CC_GPR: |
6172 | return true; |
6173 | } |
6174 | } |
6175 | |
6176 | static MachineBasicBlock * |
6177 | emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, |
6178 | const LoongArchSubtarget &Subtarget) { |
6179 | // To "insert" Select_* instructions, we actually have to insert the triangle |
6180 | // control-flow pattern. The incoming instructions know the destination vreg |
6181 | // to set, the condition code register to branch on, the true/false values to |
6182 | // select between, and the condcode to use to select the appropriate branch. |
6183 | // |
6184 | // We produce the following control flow: |
6185 | // HeadMBB |
6186 | // | \ |
6187 | // | IfFalseMBB |
6188 | // | / |
6189 | // TailMBB |
6190 | // |
6191 | // When we find a sequence of selects we attempt to optimize their emission |
6192 | // by sharing the control flow. Currently we only handle cases where we have |
6193 | // multiple selects with the exact same condition (same LHS, RHS and CC). |
6194 | // The selects may be interleaved with other instructions if the other |
6195 | // instructions meet some requirements we deem safe: |
6196 | // - They are not pseudo instructions. |
6197 | // - They are debug instructions. Otherwise, |
6198 | // - They do not have side-effects, do not access memory and their inputs do |
6199 | // not depend on the results of the select pseudo-instructions. |
6200 | // The TrueV/FalseV operands of the selects cannot depend on the result of |
6201 | // previous selects in the sequence. |
6202 | // These conditions could be further relaxed. See the X86 target for a |
6203 | // related approach and more information. |
6204 | |
6205 | Register LHS = MI.getOperand(i: 1).getReg(); |
6206 | Register RHS; |
6207 | if (MI.getOperand(i: 2).isReg()) |
6208 | RHS = MI.getOperand(i: 2).getReg(); |
6209 | auto CC = static_cast<unsigned>(MI.getOperand(i: 3).getImm()); |
6210 | |
6211 | SmallVector<MachineInstr *, 4> SelectDebugValues; |
6212 | SmallSet<Register, 4> SelectDests; |
6213 | SelectDests.insert(V: MI.getOperand(i: 0).getReg()); |
6214 | |
6215 | MachineInstr *LastSelectPseudo = &MI; |
6216 | for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); |
6217 | SequenceMBBI != E; ++SequenceMBBI) { |
6218 | if (SequenceMBBI->isDebugInstr()) |
6219 | continue; |
6220 | if (isSelectPseudo(MI&: *SequenceMBBI)) { |
6221 | if (SequenceMBBI->getOperand(i: 1).getReg() != LHS || |
6222 | !SequenceMBBI->getOperand(i: 2).isReg() || |
6223 | SequenceMBBI->getOperand(i: 2).getReg() != RHS || |
6224 | SequenceMBBI->getOperand(i: 3).getImm() != CC || |
6225 | SelectDests.count(V: SequenceMBBI->getOperand(i: 4).getReg()) || |
6226 | SelectDests.count(V: SequenceMBBI->getOperand(i: 5).getReg())) |
6227 | break; |
6228 | LastSelectPseudo = &*SequenceMBBI; |
6229 | SequenceMBBI->collectDebugValues(DbgValues&: SelectDebugValues); |
6230 | SelectDests.insert(V: SequenceMBBI->getOperand(i: 0).getReg()); |
6231 | continue; |
6232 | } |
6233 | if (SequenceMBBI->hasUnmodeledSideEffects() || |
6234 | SequenceMBBI->mayLoadOrStore() || |
6235 | SequenceMBBI->usesCustomInsertionHook()) |
6236 | break; |
6237 | if (llvm::any_of(Range: SequenceMBBI->operands(), P: [&](MachineOperand &MO) { |
6238 | return MO.isReg() && MO.isUse() && SelectDests.count(V: MO.getReg()); |
6239 | })) |
6240 | break; |
6241 | } |
6242 | |
6243 | const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo(); |
6244 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
6245 | DebugLoc DL = MI.getDebugLoc(); |
6246 | MachineFunction::iterator I = ++BB->getIterator(); |
6247 | |
6248 | MachineBasicBlock *HeadMBB = BB; |
6249 | MachineFunction *F = BB->getParent(); |
6250 | MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
6251 | MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
6252 | |
6253 | F->insert(MBBI: I, MBB: IfFalseMBB); |
6254 | F->insert(MBBI: I, MBB: TailMBB); |
6255 | |
6256 | // Set the call frame size on entry to the new basic blocks. |
6257 | unsigned CallFrameSize = TII.getCallFrameSizeAt(MI&: *LastSelectPseudo); |
6258 | IfFalseMBB->setCallFrameSize(CallFrameSize); |
6259 | TailMBB->setCallFrameSize(CallFrameSize); |
6260 | |
6261 | // Transfer debug instructions associated with the selects to TailMBB. |
6262 | for (MachineInstr *DebugInstr : SelectDebugValues) { |
6263 | TailMBB->push_back(MI: DebugInstr->removeFromParent()); |
6264 | } |
6265 | |
6266 | // Move all instructions after the sequence to TailMBB. |
6267 | TailMBB->splice(Where: TailMBB->end(), Other: HeadMBB, |
6268 | From: std::next(x: LastSelectPseudo->getIterator()), To: HeadMBB->end()); |
6269 | // Update machine-CFG edges by transferring all successors of the current |
6270 | // block to the new block which will contain the Phi nodes for the selects. |
6271 | TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: HeadMBB); |
6272 | // Set the successors for HeadMBB. |
6273 | HeadMBB->addSuccessor(Succ: IfFalseMBB); |
6274 | HeadMBB->addSuccessor(Succ: TailMBB); |
6275 | |
6276 | // Insert appropriate branch. |
6277 | if (MI.getOperand(i: 2).isImm()) |
6278 | BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)) |
6279 | .addReg(RegNo: LHS) |
6280 | .addImm(Val: MI.getOperand(i: 2).getImm()) |
6281 | .addMBB(MBB: TailMBB); |
6282 | else |
6283 | BuildMI(BB: HeadMBB, MIMD: DL, MCID: TII.get(Opcode: CC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: TailMBB); |
6284 | |
6285 | // IfFalseMBB just falls through to TailMBB. |
6286 | IfFalseMBB->addSuccessor(Succ: TailMBB); |
6287 | |
6288 | // Create PHIs for all of the select pseudo-instructions. |
6289 | auto SelectMBBI = MI.getIterator(); |
6290 | auto SelectEnd = std::next(x: LastSelectPseudo->getIterator()); |
6291 | auto InsertionPoint = TailMBB->begin(); |
6292 | while (SelectMBBI != SelectEnd) { |
6293 | auto Next = std::next(x: SelectMBBI); |
6294 | if (isSelectPseudo(MI&: *SelectMBBI)) { |
6295 | // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] |
6296 | BuildMI(BB&: *TailMBB, I: InsertionPoint, MIMD: SelectMBBI->getDebugLoc(), |
6297 | MCID: TII.get(Opcode: LoongArch::PHI), DestReg: SelectMBBI->getOperand(i: 0).getReg()) |
6298 | .addReg(RegNo: SelectMBBI->getOperand(i: 4).getReg()) |
6299 | .addMBB(MBB: HeadMBB) |
6300 | .addReg(RegNo: SelectMBBI->getOperand(i: 5).getReg()) |
6301 | .addMBB(MBB: IfFalseMBB); |
6302 | SelectMBBI->eraseFromParent(); |
6303 | } |
6304 | SelectMBBI = Next; |
6305 | } |
6306 | |
6307 | F->getProperties().resetNoPHIs(); |
6308 | return TailMBB; |
6309 | } |
6310 | |
6311 | MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( |
6312 | MachineInstr &MI, MachineBasicBlock *BB) const { |
6313 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
6314 | DebugLoc DL = MI.getDebugLoc(); |
6315 | |
6316 | switch (MI.getOpcode()) { |
6317 | default: |
6318 | llvm_unreachable("Unexpected instr type to insert" ); |
6319 | case LoongArch::DIV_W: |
6320 | case LoongArch::DIV_WU: |
6321 | case LoongArch::MOD_W: |
6322 | case LoongArch::MOD_WU: |
6323 | case LoongArch::DIV_D: |
6324 | case LoongArch::DIV_DU: |
6325 | case LoongArch::MOD_D: |
6326 | case LoongArch::MOD_DU: |
6327 | return insertDivByZeroTrap(MI, MBB: BB); |
6328 | break; |
6329 | case LoongArch::WRFCSR: { |
6330 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVGR2FCSR), |
6331 | DestReg: LoongArch::FCSR0 + MI.getOperand(i: 0).getImm()) |
6332 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
6333 | MI.eraseFromParent(); |
6334 | return BB; |
6335 | } |
6336 | case LoongArch::RDFCSR: { |
6337 | MachineInstr *ReadFCSR = |
6338 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::MOVFCSR2GR), |
6339 | DestReg: MI.getOperand(i: 0).getReg()) |
6340 | .addReg(RegNo: LoongArch::FCSR0 + MI.getOperand(i: 1).getImm()); |
6341 | ReadFCSR->getOperand(i: 1).setIsUndef(); |
6342 | MI.eraseFromParent(); |
6343 | return BB; |
6344 | } |
6345 | case LoongArch::Select_GPR_Using_CC_GPR: |
6346 | return emitSelectPseudo(MI, BB, Subtarget); |
6347 | case LoongArch::BuildPairF64Pseudo: |
6348 | return emitBuildPairF64Pseudo(MI, BB, Subtarget); |
6349 | case LoongArch::SplitPairF64Pseudo: |
6350 | return emitSplitPairF64Pseudo(MI, BB, Subtarget); |
6351 | case LoongArch::PseudoVBZ: |
6352 | case LoongArch::PseudoVBZ_B: |
6353 | case LoongArch::PseudoVBZ_H: |
6354 | case LoongArch::PseudoVBZ_W: |
6355 | case LoongArch::PseudoVBZ_D: |
6356 | case LoongArch::PseudoVBNZ: |
6357 | case LoongArch::PseudoVBNZ_B: |
6358 | case LoongArch::PseudoVBNZ_H: |
6359 | case LoongArch::PseudoVBNZ_W: |
6360 | case LoongArch::PseudoVBNZ_D: |
6361 | case LoongArch::PseudoXVBZ: |
6362 | case LoongArch::PseudoXVBZ_B: |
6363 | case LoongArch::PseudoXVBZ_H: |
6364 | case LoongArch::PseudoXVBZ_W: |
6365 | case LoongArch::PseudoXVBZ_D: |
6366 | case LoongArch::PseudoXVBNZ: |
6367 | case LoongArch::PseudoXVBNZ_B: |
6368 | case LoongArch::PseudoXVBNZ_H: |
6369 | case LoongArch::PseudoXVBNZ_W: |
6370 | case LoongArch::PseudoXVBNZ_D: |
6371 | return emitVecCondBranchPseudo(MI, BB, Subtarget); |
6372 | case LoongArch::PseudoXVINSGR2VR_B: |
6373 | case LoongArch::PseudoXVINSGR2VR_H: |
6374 | return emitPseudoXVINSGR2VR(MI, BB, Subtarget); |
6375 | case LoongArch::PseudoCTPOP: |
6376 | return emitPseudoCTPOP(MI, BB, Subtarget); |
6377 | case LoongArch::PseudoVMSKLTZ_B: |
6378 | case LoongArch::PseudoVMSKLTZ_H: |
6379 | case LoongArch::PseudoVMSKLTZ_W: |
6380 | case LoongArch::PseudoVMSKLTZ_D: |
6381 | case LoongArch::PseudoVMSKGEZ_B: |
6382 | case LoongArch::PseudoVMSKEQZ_B: |
6383 | case LoongArch::PseudoVMSKNEZ_B: |
6384 | case LoongArch::PseudoXVMSKLTZ_B: |
6385 | case LoongArch::PseudoXVMSKLTZ_H: |
6386 | case LoongArch::PseudoXVMSKLTZ_W: |
6387 | case LoongArch::PseudoXVMSKLTZ_D: |
6388 | case LoongArch::PseudoXVMSKGEZ_B: |
6389 | case LoongArch::PseudoXVMSKEQZ_B: |
6390 | case LoongArch::PseudoXVMSKNEZ_B: |
6391 | return emitPseudoVMSKCOND(MI, BB, Subtarget); |
6392 | case TargetOpcode::STATEPOINT: |
6393 | // STATEPOINT is a pseudo instruction which has no implicit defs/uses |
6394 | // while bl call instruction (where statepoint will be lowered at the |
6395 | // end) has implicit def. This def is early-clobber as it will be set at |
6396 | // the moment of the call and earlier than any use is read. |
6397 | // Add this implicit dead def here as a workaround. |
6398 | MI.addOperand(MF&: *MI.getMF(), |
6399 | Op: MachineOperand::CreateReg( |
6400 | Reg: LoongArch::R1, /*isDef*/ true, |
6401 | /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, |
6402 | /*isUndef*/ false, /*isEarlyClobber*/ true)); |
6403 | if (!Subtarget.is64Bit()) |
6404 | report_fatal_error(reason: "STATEPOINT is only supported on 64-bit targets" ); |
6405 | return emitPatchPoint(MI, MBB: BB); |
6406 | } |
6407 | } |
6408 | |
6409 | bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( |
6410 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
6411 | unsigned *Fast) const { |
6412 | if (!Subtarget.hasUAL()) |
6413 | return false; |
6414 | |
6415 | // TODO: set reasonable speed number. |
6416 | if (Fast) |
6417 | *Fast = 1; |
6418 | return true; |
6419 | } |
6420 | |
6421 | const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { |
6422 | switch ((LoongArchISD::NodeType)Opcode) { |
6423 | case LoongArchISD::FIRST_NUMBER: |
6424 | break; |
6425 | |
6426 | #define NODE_NAME_CASE(node) \ |
6427 | case LoongArchISD::node: \ |
6428 | return "LoongArchISD::" #node; |
6429 | |
6430 | // TODO: Add more target-dependent nodes later. |
6431 | NODE_NAME_CASE(CALL) |
6432 | NODE_NAME_CASE(CALL_MEDIUM) |
6433 | NODE_NAME_CASE(CALL_LARGE) |
6434 | NODE_NAME_CASE(RET) |
6435 | NODE_NAME_CASE(TAIL) |
6436 | NODE_NAME_CASE(TAIL_MEDIUM) |
6437 | NODE_NAME_CASE(TAIL_LARGE) |
6438 | NODE_NAME_CASE(SELECT_CC) |
6439 | NODE_NAME_CASE(SLL_W) |
6440 | NODE_NAME_CASE(SRA_W) |
6441 | NODE_NAME_CASE(SRL_W) |
6442 | NODE_NAME_CASE(BSTRINS) |
6443 | NODE_NAME_CASE(BSTRPICK) |
6444 | NODE_NAME_CASE(MOVGR2FR_W_LA64) |
6445 | NODE_NAME_CASE(MOVFR2GR_S_LA64) |
6446 | NODE_NAME_CASE(FTINT) |
6447 | NODE_NAME_CASE(BUILD_PAIR_F64) |
6448 | NODE_NAME_CASE(SPLIT_PAIR_F64) |
6449 | NODE_NAME_CASE(REVB_2H) |
6450 | NODE_NAME_CASE(REVB_2W) |
6451 | NODE_NAME_CASE(BITREV_4B) |
6452 | NODE_NAME_CASE(BITREV_8B) |
6453 | NODE_NAME_CASE(BITREV_W) |
6454 | NODE_NAME_CASE(ROTR_W) |
6455 | NODE_NAME_CASE(ROTL_W) |
6456 | NODE_NAME_CASE(DIV_W) |
6457 | NODE_NAME_CASE(DIV_WU) |
6458 | NODE_NAME_CASE(MOD_W) |
6459 | NODE_NAME_CASE(MOD_WU) |
6460 | NODE_NAME_CASE(CLZ_W) |
6461 | NODE_NAME_CASE(CTZ_W) |
6462 | NODE_NAME_CASE(DBAR) |
6463 | NODE_NAME_CASE(IBAR) |
6464 | NODE_NAME_CASE(BREAK) |
6465 | NODE_NAME_CASE(SYSCALL) |
6466 | NODE_NAME_CASE(CRC_W_B_W) |
6467 | NODE_NAME_CASE(CRC_W_H_W) |
6468 | NODE_NAME_CASE(CRC_W_W_W) |
6469 | NODE_NAME_CASE(CRC_W_D_W) |
6470 | NODE_NAME_CASE(CRCC_W_B_W) |
6471 | NODE_NAME_CASE(CRCC_W_H_W) |
6472 | NODE_NAME_CASE(CRCC_W_W_W) |
6473 | NODE_NAME_CASE(CRCC_W_D_W) |
6474 | NODE_NAME_CASE(CSRRD) |
6475 | NODE_NAME_CASE(CSRWR) |
6476 | NODE_NAME_CASE(CSRXCHG) |
6477 | NODE_NAME_CASE(IOCSRRD_B) |
6478 | NODE_NAME_CASE(IOCSRRD_H) |
6479 | NODE_NAME_CASE(IOCSRRD_W) |
6480 | NODE_NAME_CASE(IOCSRRD_D) |
6481 | NODE_NAME_CASE(IOCSRWR_B) |
6482 | NODE_NAME_CASE(IOCSRWR_H) |
6483 | NODE_NAME_CASE(IOCSRWR_W) |
6484 | NODE_NAME_CASE(IOCSRWR_D) |
6485 | NODE_NAME_CASE(CPUCFG) |
6486 | NODE_NAME_CASE(MOVGR2FCSR) |
6487 | NODE_NAME_CASE(MOVFCSR2GR) |
6488 | NODE_NAME_CASE(CACOP_D) |
6489 | NODE_NAME_CASE(CACOP_W) |
6490 | NODE_NAME_CASE(VSHUF) |
6491 | NODE_NAME_CASE(VPICKEV) |
6492 | NODE_NAME_CASE(VPICKOD) |
6493 | NODE_NAME_CASE(VPACKEV) |
6494 | NODE_NAME_CASE(VPACKOD) |
6495 | NODE_NAME_CASE(VILVL) |
6496 | NODE_NAME_CASE(VILVH) |
6497 | NODE_NAME_CASE(VSHUF4I) |
6498 | NODE_NAME_CASE(VREPLVEI) |
6499 | NODE_NAME_CASE(VREPLGR2VR) |
6500 | NODE_NAME_CASE(XVPERMI) |
6501 | NODE_NAME_CASE(VPICK_SEXT_ELT) |
6502 | NODE_NAME_CASE(VPICK_ZEXT_ELT) |
6503 | NODE_NAME_CASE(VREPLVE) |
6504 | NODE_NAME_CASE(VALL_ZERO) |
6505 | NODE_NAME_CASE(VANY_ZERO) |
6506 | NODE_NAME_CASE(VALL_NONZERO) |
6507 | NODE_NAME_CASE(VANY_NONZERO) |
6508 | NODE_NAME_CASE(FRECIPE) |
6509 | NODE_NAME_CASE(FRSQRTE) |
6510 | NODE_NAME_CASE(VSLLI) |
6511 | NODE_NAME_CASE(VSRLI) |
6512 | NODE_NAME_CASE(VBSLL) |
6513 | NODE_NAME_CASE(VBSRL) |
6514 | NODE_NAME_CASE(VLDREPL) |
6515 | NODE_NAME_CASE(VMSKLTZ) |
6516 | NODE_NAME_CASE(VMSKGEZ) |
6517 | NODE_NAME_CASE(VMSKEQZ) |
6518 | NODE_NAME_CASE(VMSKNEZ) |
6519 | NODE_NAME_CASE(XVMSKLTZ) |
6520 | NODE_NAME_CASE(XVMSKGEZ) |
6521 | NODE_NAME_CASE(XVMSKEQZ) |
6522 | NODE_NAME_CASE(XVMSKNEZ) |
6523 | } |
6524 | #undef NODE_NAME_CASE |
6525 | return nullptr; |
6526 | } |
6527 | |
6528 | //===----------------------------------------------------------------------===// |
6529 | // Calling Convention Implementation |
6530 | //===----------------------------------------------------------------------===// |
6531 | |
6532 | // Eight general-purpose registers a0-a7 used for passing integer arguments, |
6533 | // with a0-a1 reused to return values. Generally, the GPRs are used to pass |
6534 | // fixed-point arguments, and floating-point arguments when no FPR is available |
6535 | // or with soft float ABI. |
6536 | const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, |
6537 | LoongArch::R7, LoongArch::R8, LoongArch::R9, |
6538 | LoongArch::R10, LoongArch::R11}; |
6539 | // Eight floating-point registers fa0-fa7 used for passing floating-point |
6540 | // arguments, and fa0-fa1 are also used to return values. |
6541 | const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, |
6542 | LoongArch::F3, LoongArch::F4, LoongArch::F5, |
6543 | LoongArch::F6, LoongArch::F7}; |
6544 | // FPR32 and FPR64 alias each other. |
6545 | const MCPhysReg ArgFPR64s[] = { |
6546 | LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, |
6547 | LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; |
6548 | |
6549 | const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, |
6550 | LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, |
6551 | LoongArch::VR6, LoongArch::VR7}; |
6552 | |
6553 | const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, |
6554 | LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, |
6555 | LoongArch::XR6, LoongArch::XR7}; |
6556 | |
6557 | // Pass a 2*GRLen argument that has been split into two GRLen values through |
6558 | // registers or the stack as necessary. |
6559 | static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, |
6560 | CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, |
6561 | unsigned ValNo2, MVT ValVT2, MVT LocVT2, |
6562 | ISD::ArgFlagsTy ArgFlags2) { |
6563 | unsigned GRLenInBytes = GRLen / 8; |
6564 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
6565 | // At least one half can be passed via register. |
6566 | State.addLoc(V: CCValAssign::getReg(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), Reg, |
6567 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
6568 | } else { |
6569 | // Both halves must be passed on the stack, with proper alignment. |
6570 | Align StackAlign = |
6571 | std::max(a: Align(GRLenInBytes), b: ArgFlags1.getNonZeroOrigAlign()); |
6572 | State.addLoc( |
6573 | V: CCValAssign::getMem(ValNo: VA1.getValNo(), ValVT: VA1.getValVT(), |
6574 | Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: StackAlign), |
6575 | LocVT: VA1.getLocVT(), HTP: CCValAssign::Full)); |
6576 | State.addLoc(V: CCValAssign::getMem( |
6577 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
6578 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
6579 | return false; |
6580 | } |
6581 | if (Register Reg = State.AllocateReg(Regs: ArgGPRs)) { |
6582 | // The second half can also be passed via register. |
6583 | State.addLoc( |
6584 | V: CCValAssign::getReg(ValNo: ValNo2, ValVT: ValVT2, Reg, LocVT: LocVT2, HTP: CCValAssign::Full)); |
6585 | } else { |
6586 | // The second half is passed via the stack, without additional alignment. |
6587 | State.addLoc(V: CCValAssign::getMem( |
6588 | ValNo: ValNo2, ValVT: ValVT2, Offset: State.AllocateStack(Size: GRLenInBytes, Alignment: Align(GRLenInBytes)), |
6589 | LocVT: LocVT2, HTP: CCValAssign::Full)); |
6590 | } |
6591 | return false; |
6592 | } |
6593 | |
6594 | // Implements the LoongArch calling convention. Returns true upon failure. |
6595 | static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, |
6596 | unsigned ValNo, MVT ValVT, |
6597 | CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, |
6598 | CCState &State, bool IsFixed, bool IsRet, |
6599 | Type *OrigTy) { |
6600 | unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits(); |
6601 | assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen" ); |
6602 | MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64; |
6603 | MVT LocVT = ValVT; |
6604 | |
6605 | // Any return value split into more than two values can't be returned |
6606 | // directly. |
6607 | if (IsRet && ValNo > 1) |
6608 | return true; |
6609 | |
6610 | // If passing a variadic argument, or if no FPR is available. |
6611 | bool UseGPRForFloat = true; |
6612 | |
6613 | switch (ABI) { |
6614 | default: |
6615 | llvm_unreachable("Unexpected ABI" ); |
6616 | break; |
6617 | case LoongArchABI::ABI_ILP32F: |
6618 | case LoongArchABI::ABI_LP64F: |
6619 | case LoongArchABI::ABI_ILP32D: |
6620 | case LoongArchABI::ABI_LP64D: |
6621 | UseGPRForFloat = !IsFixed; |
6622 | break; |
6623 | case LoongArchABI::ABI_ILP32S: |
6624 | case LoongArchABI::ABI_LP64S: |
6625 | break; |
6626 | } |
6627 | |
6628 | // If this is a variadic argument, the LoongArch calling convention requires |
6629 | // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8 |
6630 | // byte alignment. An aligned register should be used regardless of whether |
6631 | // the original argument was split during legalisation or not. The argument |
6632 | // will not be passed by registers if the original type is larger than |
6633 | // 2*GRLen, so the register alignment rule does not apply. |
6634 | unsigned TwoGRLenInBytes = (2 * GRLen) / 8; |
6635 | if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes && |
6636 | DL.getTypeAllocSize(Ty: OrigTy) == TwoGRLenInBytes) { |
6637 | unsigned RegIdx = State.getFirstUnallocated(Regs: ArgGPRs); |
6638 | // Skip 'odd' register if necessary. |
6639 | if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) |
6640 | State.AllocateReg(Regs: ArgGPRs); |
6641 | } |
6642 | |
6643 | SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); |
6644 | SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = |
6645 | State.getPendingArgFlags(); |
6646 | |
6647 | assert(PendingLocs.size() == PendingArgFlags.size() && |
6648 | "PendingLocs and PendingArgFlags out of sync" ); |
6649 | |
6650 | // FPR32 and FPR64 alias each other. |
6651 | if (State.getFirstUnallocated(Regs: ArgFPR32s) == std::size(ArgFPR32s)) |
6652 | UseGPRForFloat = true; |
6653 | |
6654 | if (UseGPRForFloat && ValVT == MVT::f32) { |
6655 | LocVT = GRLenVT; |
6656 | LocInfo = CCValAssign::BCvt; |
6657 | } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) { |
6658 | LocVT = MVT::i64; |
6659 | LocInfo = CCValAssign::BCvt; |
6660 | } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) { |
6661 | // Handle passing f64 on LA32D with a soft float ABI or when floating point |
6662 | // registers are exhausted. |
6663 | assert(PendingLocs.empty() && "Can't lower f64 if it is split" ); |
6664 | // Depending on available argument GPRS, f64 may be passed in a pair of |
6665 | // GPRs, split between a GPR and the stack, or passed completely on the |
6666 | // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these |
6667 | // cases. |
6668 | MCRegister Reg = State.AllocateReg(Regs: ArgGPRs); |
6669 | if (!Reg) { |
6670 | int64_t StackOffset = State.AllocateStack(Size: 8, Alignment: Align(8)); |
6671 | State.addLoc( |
6672 | V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
6673 | return false; |
6674 | } |
6675 | LocVT = MVT::i32; |
6676 | State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
6677 | MCRegister HiReg = State.AllocateReg(Regs: ArgGPRs); |
6678 | if (HiReg) { |
6679 | State.addLoc( |
6680 | V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: HiReg, LocVT, HTP: LocInfo)); |
6681 | } else { |
6682 | int64_t StackOffset = State.AllocateStack(Size: 4, Alignment: Align(4)); |
6683 | State.addLoc( |
6684 | V: CCValAssign::getCustomMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
6685 | } |
6686 | return false; |
6687 | } |
6688 | |
6689 | // Split arguments might be passed indirectly, so keep track of the pending |
6690 | // values. |
6691 | if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) { |
6692 | LocVT = GRLenVT; |
6693 | LocInfo = CCValAssign::Indirect; |
6694 | PendingLocs.push_back( |
6695 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
6696 | PendingArgFlags.push_back(Elt: ArgFlags); |
6697 | if (!ArgFlags.isSplitEnd()) { |
6698 | return false; |
6699 | } |
6700 | } |
6701 | |
6702 | // If the split argument only had two elements, it should be passed directly |
6703 | // in registers or on the stack. |
6704 | if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() && |
6705 | PendingLocs.size() <= 2) { |
6706 | assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()" ); |
6707 | // Apply the normal calling convention rules to the first half of the |
6708 | // split argument. |
6709 | CCValAssign VA = PendingLocs[0]; |
6710 | ISD::ArgFlagsTy AF = PendingArgFlags[0]; |
6711 | PendingLocs.clear(); |
6712 | PendingArgFlags.clear(); |
6713 | return CC_LoongArchAssign2GRLen(GRLen, State, VA1: VA, ArgFlags1: AF, ValNo2: ValNo, ValVT2: ValVT, LocVT2: LocVT, |
6714 | ArgFlags2: ArgFlags); |
6715 | } |
6716 | |
6717 | // Allocate to a register if possible, or else a stack slot. |
6718 | Register Reg; |
6719 | unsigned StoreSizeBytes = GRLen / 8; |
6720 | Align StackAlign = Align(GRLen / 8); |
6721 | |
6722 | if (ValVT == MVT::f32 && !UseGPRForFloat) |
6723 | Reg = State.AllocateReg(Regs: ArgFPR32s); |
6724 | else if (ValVT == MVT::f64 && !UseGPRForFloat) |
6725 | Reg = State.AllocateReg(Regs: ArgFPR64s); |
6726 | else if (ValVT.is128BitVector()) |
6727 | Reg = State.AllocateReg(Regs: ArgVRs); |
6728 | else if (ValVT.is256BitVector()) |
6729 | Reg = State.AllocateReg(Regs: ArgXRs); |
6730 | else |
6731 | Reg = State.AllocateReg(Regs: ArgGPRs); |
6732 | |
6733 | unsigned StackOffset = |
6734 | Reg ? 0 : State.AllocateStack(Size: StoreSizeBytes, Alignment: StackAlign); |
6735 | |
6736 | // If we reach this point and PendingLocs is non-empty, we must be at the |
6737 | // end of a split argument that must be passed indirectly. |
6738 | if (!PendingLocs.empty()) { |
6739 | assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()" ); |
6740 | assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()" ); |
6741 | for (auto &It : PendingLocs) { |
6742 | if (Reg) |
6743 | It.convertToReg(Reg); |
6744 | else |
6745 | It.convertToMem(Offset: StackOffset); |
6746 | State.addLoc(V: It); |
6747 | } |
6748 | PendingLocs.clear(); |
6749 | PendingArgFlags.clear(); |
6750 | return false; |
6751 | } |
6752 | assert((!UseGPRForFloat || LocVT == GRLenVT) && |
6753 | "Expected an GRLenVT at this stage" ); |
6754 | |
6755 | if (Reg) { |
6756 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
6757 | return false; |
6758 | } |
6759 | |
6760 | // When a floating-point value is passed on the stack, no bit-cast is needed. |
6761 | if (ValVT.isFloatingPoint()) { |
6762 | LocVT = ValVT; |
6763 | LocInfo = CCValAssign::Full; |
6764 | } |
6765 | |
6766 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset: StackOffset, LocVT, HTP: LocInfo)); |
6767 | return false; |
6768 | } |
6769 | |
6770 | void LoongArchTargetLowering::analyzeInputArgs( |
6771 | MachineFunction &MF, CCState &CCInfo, |
6772 | const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, |
6773 | LoongArchCCAssignFn Fn) const { |
6774 | FunctionType *FType = MF.getFunction().getFunctionType(); |
6775 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
6776 | MVT ArgVT = Ins[i].VT; |
6777 | Type *ArgTy = nullptr; |
6778 | if (IsRet) |
6779 | ArgTy = FType->getReturnType(); |
6780 | else if (Ins[i].isOrigArg()) |
6781 | ArgTy = FType->getParamType(i: Ins[i].getOrigArgIndex()); |
6782 | LoongArchABI::ABI ABI = |
6783 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
6784 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags, |
6785 | CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) { |
6786 | LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT |
6787 | << '\n'); |
6788 | llvm_unreachable("" ); |
6789 | } |
6790 | } |
6791 | } |
6792 | |
6793 | void LoongArchTargetLowering::analyzeOutputArgs( |
6794 | MachineFunction &MF, CCState &CCInfo, |
6795 | const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, |
6796 | CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const { |
6797 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
6798 | MVT ArgVT = Outs[i].VT; |
6799 | Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; |
6800 | LoongArchABI::ABI ABI = |
6801 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
6802 | if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags, |
6803 | CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { |
6804 | LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT |
6805 | << "\n" ); |
6806 | llvm_unreachable("" ); |
6807 | } |
6808 | } |
6809 | } |
6810 | |
6811 | // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect |
6812 | // values. |
6813 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, |
6814 | const CCValAssign &VA, const SDLoc &DL) { |
6815 | switch (VA.getLocInfo()) { |
6816 | default: |
6817 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
6818 | case CCValAssign::Full: |
6819 | case CCValAssign::Indirect: |
6820 | break; |
6821 | case CCValAssign::BCvt: |
6822 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
6823 | Val = DAG.getNode(Opcode: LoongArchISD::MOVGR2FR_W_LA64, DL, VT: MVT::f32, Operand: Val); |
6824 | else |
6825 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Val); |
6826 | break; |
6827 | } |
6828 | return Val; |
6829 | } |
6830 | |
6831 | static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, |
6832 | const CCValAssign &VA, const SDLoc &DL, |
6833 | const ISD::InputArg &In, |
6834 | const LoongArchTargetLowering &TLI) { |
6835 | MachineFunction &MF = DAG.getMachineFunction(); |
6836 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
6837 | EVT LocVT = VA.getLocVT(); |
6838 | SDValue Val; |
6839 | const TargetRegisterClass *RC = TLI.getRegClassFor(VT: LocVT.getSimpleVT()); |
6840 | Register VReg = RegInfo.createVirtualRegister(RegClass: RC); |
6841 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
6842 | Val = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
6843 | |
6844 | // If input is sign extended from 32 bits, note it for the OptW pass. |
6845 | if (In.isOrigArg()) { |
6846 | Argument *OrigArg = MF.getFunction().getArg(i: In.getOrigArgIndex()); |
6847 | if (OrigArg->getType()->isIntegerTy()) { |
6848 | unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); |
6849 | // An input zero extended from i31 can also be considered sign extended. |
6850 | if ((BitWidth <= 32 && In.Flags.isSExt()) || |
6851 | (BitWidth < 32 && In.Flags.isZExt())) { |
6852 | LoongArchMachineFunctionInfo *LAFI = |
6853 | MF.getInfo<LoongArchMachineFunctionInfo>(); |
6854 | LAFI->addSExt32Register(Reg: VReg); |
6855 | } |
6856 | } |
6857 | } |
6858 | |
6859 | return convertLocVTToValVT(DAG, Val, VA, DL); |
6860 | } |
6861 | |
6862 | // The caller is responsible for loading the full value if the argument is |
6863 | // passed with CCValAssign::Indirect. |
6864 | static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, |
6865 | const CCValAssign &VA, const SDLoc &DL) { |
6866 | MachineFunction &MF = DAG.getMachineFunction(); |
6867 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6868 | EVT ValVT = VA.getValVT(); |
6869 | int FI = MFI.CreateFixedObject(Size: ValVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
6870 | /*IsImmutable=*/true); |
6871 | SDValue FIN = DAG.getFrameIndex( |
6872 | FI, VT: MVT::getIntegerVT(BitWidth: DAG.getDataLayout().getPointerSizeInBits(AS: 0))); |
6873 | |
6874 | ISD::LoadExtType ExtType; |
6875 | switch (VA.getLocInfo()) { |
6876 | default: |
6877 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
6878 | case CCValAssign::Full: |
6879 | case CCValAssign::Indirect: |
6880 | case CCValAssign::BCvt: |
6881 | ExtType = ISD::NON_EXTLOAD; |
6882 | break; |
6883 | } |
6884 | return DAG.getExtLoad( |
6885 | ExtType, dl: DL, VT: VA.getLocVT(), Chain, Ptr: FIN, |
6886 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), MemVT: ValVT); |
6887 | } |
6888 | |
6889 | static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, |
6890 | const CCValAssign &VA, |
6891 | const CCValAssign &HiVA, |
6892 | const SDLoc &DL) { |
6893 | assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && |
6894 | "Unexpected VA" ); |
6895 | MachineFunction &MF = DAG.getMachineFunction(); |
6896 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6897 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
6898 | |
6899 | assert(VA.isRegLoc() && "Expected register VA assignment" ); |
6900 | |
6901 | Register LoVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
6902 | RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: LoVReg); |
6903 | SDValue Lo = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoVReg, VT: MVT::i32); |
6904 | SDValue Hi; |
6905 | if (HiVA.isMemLoc()) { |
6906 | // Second half of f64 is passed on the stack. |
6907 | int FI = MFI.CreateFixedObject(Size: 4, SPOffset: HiVA.getLocMemOffset(), |
6908 | /*IsImmutable=*/true); |
6909 | SDValue FIN = DAG.getFrameIndex(FI, VT: MVT::i32); |
6910 | Hi = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr: FIN, |
6911 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
6912 | } else { |
6913 | // Second half of f64 is passed in another GPR. |
6914 | Register HiVReg = RegInfo.createVirtualRegister(RegClass: &LoongArch::GPRRegClass); |
6915 | RegInfo.addLiveIn(Reg: HiVA.getLocReg(), vreg: HiVReg); |
6916 | Hi = DAG.getCopyFromReg(Chain, dl: DL, Reg: HiVReg, VT: MVT::i32); |
6917 | } |
6918 | return DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
6919 | } |
6920 | |
6921 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, |
6922 | const CCValAssign &VA, const SDLoc &DL) { |
6923 | EVT LocVT = VA.getLocVT(); |
6924 | |
6925 | switch (VA.getLocInfo()) { |
6926 | default: |
6927 | llvm_unreachable("Unexpected CCValAssign::LocInfo" ); |
6928 | case CCValAssign::Full: |
6929 | break; |
6930 | case CCValAssign::BCvt: |
6931 | if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) |
6932 | Val = DAG.getNode(Opcode: LoongArchISD::MOVFR2GR_S_LA64, DL, VT: MVT::i64, Operand: Val); |
6933 | else |
6934 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: LocVT, Operand: Val); |
6935 | break; |
6936 | } |
6937 | return Val; |
6938 | } |
6939 | |
6940 | static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, |
6941 | CCValAssign::LocInfo LocInfo, |
6942 | ISD::ArgFlagsTy ArgFlags, CCState &State) { |
6943 | if (LocVT == MVT::i32 || LocVT == MVT::i64) { |
6944 | // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim |
6945 | // s0 s1 s2 s3 s4 s5 s6 s7 s8 |
6946 | static const MCPhysReg GPRList[] = { |
6947 | LoongArch::R23, LoongArch::R24, LoongArch::R25, |
6948 | LoongArch::R26, LoongArch::R27, LoongArch::R28, |
6949 | LoongArch::R29, LoongArch::R30, LoongArch::R31}; |
6950 | if (MCRegister Reg = State.AllocateReg(Regs: GPRList)) { |
6951 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
6952 | return false; |
6953 | } |
6954 | } |
6955 | |
6956 | if (LocVT == MVT::f32) { |
6957 | // Pass in STG registers: F1, F2, F3, F4 |
6958 | // fs0,fs1,fs2,fs3 |
6959 | static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25, |
6960 | LoongArch::F26, LoongArch::F27}; |
6961 | if (MCRegister Reg = State.AllocateReg(Regs: FPR32List)) { |
6962 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
6963 | return false; |
6964 | } |
6965 | } |
6966 | |
6967 | if (LocVT == MVT::f64) { |
6968 | // Pass in STG registers: D1, D2, D3, D4 |
6969 | // fs4,fs5,fs6,fs7 |
6970 | static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64, |
6971 | LoongArch::F30_64, LoongArch::F31_64}; |
6972 | if (MCRegister Reg = State.AllocateReg(Regs: FPR64List)) { |
6973 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
6974 | return false; |
6975 | } |
6976 | } |
6977 | |
6978 | report_fatal_error(reason: "No registers left in GHC calling convention" ); |
6979 | return true; |
6980 | } |
6981 | |
6982 | // Transform physical registers into virtual registers. |
6983 | SDValue LoongArchTargetLowering::LowerFormalArguments( |
6984 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
6985 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
6986 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
6987 | |
6988 | MachineFunction &MF = DAG.getMachineFunction(); |
6989 | |
6990 | switch (CallConv) { |
6991 | default: |
6992 | llvm_unreachable("Unsupported calling convention" ); |
6993 | case CallingConv::C: |
6994 | case CallingConv::Fast: |
6995 | break; |
6996 | case CallingConv::GHC: |
6997 | if (!MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicF) || |
6998 | !MF.getSubtarget().hasFeature(Feature: LoongArch::FeatureBasicD)) |
6999 | report_fatal_error( |
7000 | reason: "GHC calling convention requires the F and D extensions" ); |
7001 | } |
7002 | |
7003 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
7004 | MVT GRLenVT = Subtarget.getGRLenVT(); |
7005 | unsigned GRLenInBytes = Subtarget.getGRLen() / 8; |
7006 | // Used with varargs to acumulate store chains. |
7007 | std::vector<SDValue> OutChains; |
7008 | |
7009 | // Assign locations to all of the incoming arguments. |
7010 | SmallVector<CCValAssign> ArgLocs; |
7011 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
7012 | |
7013 | if (CallConv == CallingConv::GHC) |
7014 | CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_LoongArch_GHC); |
7015 | else |
7016 | analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, Fn: CC_LoongArch); |
7017 | |
7018 | for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { |
7019 | CCValAssign &VA = ArgLocs[i]; |
7020 | SDValue ArgValue; |
7021 | // Passing f64 on LA32D with a soft float ABI must be handled as a special |
7022 | // case. |
7023 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
7024 | assert(VA.needsCustom()); |
7025 | ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, HiVA: ArgLocs[++i], DL); |
7026 | } else if (VA.isRegLoc()) |
7027 | ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, In: Ins[InsIdx], TLI: *this); |
7028 | else |
7029 | ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); |
7030 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
7031 | // If the original argument was split and passed by reference, we need to |
7032 | // load all parts of it here (using the same address). |
7033 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
7034 | PtrInfo: MachinePointerInfo())); |
7035 | unsigned ArgIndex = Ins[InsIdx].OrigArgIndex; |
7036 | unsigned ArgPartOffset = Ins[InsIdx].PartOffset; |
7037 | assert(ArgPartOffset == 0); |
7038 | while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) { |
7039 | CCValAssign &PartVA = ArgLocs[i + 1]; |
7040 | unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset; |
7041 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
7042 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, N2: Offset); |
7043 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
7044 | PtrInfo: MachinePointerInfo())); |
7045 | ++i; |
7046 | ++InsIdx; |
7047 | } |
7048 | continue; |
7049 | } |
7050 | InVals.push_back(Elt: ArgValue); |
7051 | } |
7052 | |
7053 | if (IsVarArg) { |
7054 | ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs); |
7055 | unsigned Idx = CCInfo.getFirstUnallocated(Regs: ArgRegs); |
7056 | const TargetRegisterClass *RC = &LoongArch::GPRRegClass; |
7057 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7058 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
7059 | auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>(); |
7060 | |
7061 | // Offset of the first variable argument from stack pointer, and size of |
7062 | // the vararg save area. For now, the varargs save area is either zero or |
7063 | // large enough to hold a0-a7. |
7064 | int VaArgOffset, VarArgsSaveSize; |
7065 | |
7066 | // If all registers are allocated, then all varargs must be passed on the |
7067 | // stack and we don't need to save any argregs. |
7068 | if (ArgRegs.size() == Idx) { |
7069 | VaArgOffset = CCInfo.getStackSize(); |
7070 | VarArgsSaveSize = 0; |
7071 | } else { |
7072 | VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx); |
7073 | VaArgOffset = -VarArgsSaveSize; |
7074 | } |
7075 | |
7076 | // Record the frame index of the first variable argument |
7077 | // which is a value necessary to VASTART. |
7078 | int FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
7079 | LoongArchFI->setVarArgsFrameIndex(FI); |
7080 | |
7081 | // If saving an odd number of registers then create an extra stack slot to |
7082 | // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures |
7083 | // offsets to even-numbered registered remain 2*GRLen-aligned. |
7084 | if (Idx % 2) { |
7085 | MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset - (int)GRLenInBytes, |
7086 | IsImmutable: true); |
7087 | VarArgsSaveSize += GRLenInBytes; |
7088 | } |
7089 | |
7090 | // Copy the integer registers that may have been used for passing varargs |
7091 | // to the vararg save area. |
7092 | for (unsigned I = Idx; I < ArgRegs.size(); |
7093 | ++I, VaArgOffset += GRLenInBytes) { |
7094 | const Register Reg = RegInfo.createVirtualRegister(RegClass: RC); |
7095 | RegInfo.addLiveIn(Reg: ArgRegs[I], vreg: Reg); |
7096 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: GRLenVT); |
7097 | FI = MFI.CreateFixedObject(Size: GRLenInBytes, SPOffset: VaArgOffset, IsImmutable: true); |
7098 | SDValue PtrOff = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
7099 | SDValue Store = DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: PtrOff, |
7100 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
7101 | cast<StoreSDNode>(Val: Store.getNode()) |
7102 | ->getMemOperand() |
7103 | ->setValue((Value *)nullptr); |
7104 | OutChains.push_back(x: Store); |
7105 | } |
7106 | LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); |
7107 | } |
7108 | |
7109 | // All stores are grouped in one node to allow the matching between |
7110 | // the size of Ins and InVals. This only happens for vararg functions. |
7111 | if (!OutChains.empty()) { |
7112 | OutChains.push_back(x: Chain); |
7113 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains); |
7114 | } |
7115 | |
7116 | return Chain; |
7117 | } |
7118 | |
7119 | bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
7120 | return CI->isTailCall(); |
7121 | } |
7122 | |
7123 | // Check if the return value is used as only a return value, as otherwise |
7124 | // we can't perform a tail-call. |
7125 | bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, |
7126 | SDValue &Chain) const { |
7127 | if (N->getNumValues() != 1) |
7128 | return false; |
7129 | if (!N->hasNUsesOfValue(NUses: 1, Value: 0)) |
7130 | return false; |
7131 | |
7132 | SDNode *Copy = *N->user_begin(); |
7133 | if (Copy->getOpcode() != ISD::CopyToReg) |
7134 | return false; |
7135 | |
7136 | // If the ISD::CopyToReg has a glue operand, we conservatively assume it |
7137 | // isn't safe to perform a tail call. |
7138 | if (Copy->getGluedNode()) |
7139 | return false; |
7140 | |
7141 | // The copy must be used by a LoongArchISD::RET, and nothing else. |
7142 | bool HasRet = false; |
7143 | for (SDNode *Node : Copy->users()) { |
7144 | if (Node->getOpcode() != LoongArchISD::RET) |
7145 | return false; |
7146 | HasRet = true; |
7147 | } |
7148 | |
7149 | if (!HasRet) |
7150 | return false; |
7151 | |
7152 | Chain = Copy->getOperand(Num: 0); |
7153 | return true; |
7154 | } |
7155 | |
7156 | // Check whether the call is eligible for tail call optimization. |
7157 | bool LoongArchTargetLowering::isEligibleForTailCallOptimization( |
7158 | CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, |
7159 | const SmallVectorImpl<CCValAssign> &ArgLocs) const { |
7160 | |
7161 | auto CalleeCC = CLI.CallConv; |
7162 | auto &Outs = CLI.Outs; |
7163 | auto &Caller = MF.getFunction(); |
7164 | auto CallerCC = Caller.getCallingConv(); |
7165 | |
7166 | // Do not tail call opt if the stack is used to pass parameters. |
7167 | if (CCInfo.getStackSize() != 0) |
7168 | return false; |
7169 | |
7170 | // Do not tail call opt if any parameters need to be passed indirectly. |
7171 | for (auto &VA : ArgLocs) |
7172 | if (VA.getLocInfo() == CCValAssign::Indirect) |
7173 | return false; |
7174 | |
7175 | // Do not tail call opt if either caller or callee uses struct return |
7176 | // semantics. |
7177 | auto IsCallerStructRet = Caller.hasStructRetAttr(); |
7178 | auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); |
7179 | if (IsCallerStructRet || IsCalleeStructRet) |
7180 | return false; |
7181 | |
7182 | // Do not tail call opt if either the callee or caller has a byval argument. |
7183 | for (auto &Arg : Outs) |
7184 | if (Arg.Flags.isByVal()) |
7185 | return false; |
7186 | |
7187 | // The callee has to preserve all registers the caller needs to preserve. |
7188 | const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
7189 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
7190 | if (CalleeCC != CallerCC) { |
7191 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
7192 | if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved)) |
7193 | return false; |
7194 | } |
7195 | return true; |
7196 | } |
7197 | |
7198 | static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { |
7199 | return DAG.getDataLayout().getPrefTypeAlign( |
7200 | Ty: VT.getTypeForEVT(Context&: *DAG.getContext())); |
7201 | } |
7202 | |
7203 | // Lower a call to a callseq_start + CALL + callseq_end chain, and add input |
7204 | // and output parameter nodes. |
7205 | SDValue |
7206 | LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, |
7207 | SmallVectorImpl<SDValue> &InVals) const { |
7208 | SelectionDAG &DAG = CLI.DAG; |
7209 | SDLoc &DL = CLI.DL; |
7210 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
7211 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
7212 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
7213 | SDValue Chain = CLI.Chain; |
7214 | SDValue Callee = CLI.Callee; |
7215 | CallingConv::ID CallConv = CLI.CallConv; |
7216 | bool IsVarArg = CLI.IsVarArg; |
7217 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
7218 | MVT GRLenVT = Subtarget.getGRLenVT(); |
7219 | bool &IsTailCall = CLI.IsTailCall; |
7220 | |
7221 | MachineFunction &MF = DAG.getMachineFunction(); |
7222 | |
7223 | // Analyze the operands of the call, assigning locations to each operand. |
7224 | SmallVector<CCValAssign> ArgLocs; |
7225 | CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
7226 | |
7227 | if (CallConv == CallingConv::GHC) |
7228 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_LoongArch_GHC); |
7229 | else |
7230 | analyzeOutputArgs(MF, CCInfo&: ArgCCInfo, Outs, /*IsRet=*/false, CLI: &CLI, Fn: CC_LoongArch); |
7231 | |
7232 | // Check if it's really possible to do a tail call. |
7233 | if (IsTailCall) |
7234 | IsTailCall = isEligibleForTailCallOptimization(CCInfo&: ArgCCInfo, CLI, MF, ArgLocs); |
7235 | |
7236 | if (IsTailCall) |
7237 | ++NumTailCalls; |
7238 | else if (CLI.CB && CLI.CB->isMustTailCall()) |
7239 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
7240 | "site marked musttail" ); |
7241 | |
7242 | // Get a count of how many bytes are to be pushed on the stack. |
7243 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
7244 | |
7245 | // Create local copies for byval args. |
7246 | SmallVector<SDValue> ByValArgs; |
7247 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
7248 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
7249 | if (!Flags.isByVal()) |
7250 | continue; |
7251 | |
7252 | SDValue Arg = OutVals[i]; |
7253 | unsigned Size = Flags.getByValSize(); |
7254 | Align Alignment = Flags.getNonZeroByValAlign(); |
7255 | |
7256 | int FI = |
7257 | MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/isSpillSlot: false); |
7258 | SDValue FIPtr = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
7259 | SDValue SizeNode = DAG.getConstant(Val: Size, DL, VT: GRLenVT); |
7260 | |
7261 | Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FIPtr, Src: Arg, Size: SizeNode, Alignment, |
7262 | /*IsVolatile=*/isVol: false, |
7263 | /*AlwaysInline=*/false, /*CI=*/nullptr, OverrideTailCall: std::nullopt, |
7264 | DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
7265 | ByValArgs.push_back(Elt: FIPtr); |
7266 | } |
7267 | |
7268 | if (!IsTailCall) |
7269 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL); |
7270 | |
7271 | // Copy argument values to their designated locations. |
7272 | SmallVector<std::pair<Register, SDValue>> RegsToPass; |
7273 | SmallVector<SDValue> MemOpChains; |
7274 | SDValue StackPtr; |
7275 | for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e; |
7276 | ++i, ++OutIdx) { |
7277 | CCValAssign &VA = ArgLocs[i]; |
7278 | SDValue ArgValue = OutVals[OutIdx]; |
7279 | ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; |
7280 | |
7281 | // Handle passing f64 on LA32D with a soft float ABI as a special case. |
7282 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
7283 | assert(VA.isRegLoc() && "Expected register VA assignment" ); |
7284 | assert(VA.needsCustom()); |
7285 | SDValue SplitF64 = |
7286 | DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL, |
7287 | VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: ArgValue); |
7288 | SDValue Lo = SplitF64.getValue(R: 0); |
7289 | SDValue Hi = SplitF64.getValue(R: 1); |
7290 | |
7291 | Register RegLo = VA.getLocReg(); |
7292 | RegsToPass.push_back(Elt: std::make_pair(x&: RegLo, y&: Lo)); |
7293 | |
7294 | // Get the CCValAssign for the Hi part. |
7295 | CCValAssign &HiVA = ArgLocs[++i]; |
7296 | |
7297 | if (HiVA.isMemLoc()) { |
7298 | // Second half of f64 is passed on the stack. |
7299 | if (!StackPtr.getNode()) |
7300 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT); |
7301 | SDValue Address = |
7302 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
7303 | N2: DAG.getIntPtrConstant(Val: HiVA.getLocMemOffset(), DL)); |
7304 | // Emit the store. |
7305 | MemOpChains.push_back(Elt: DAG.getStore( |
7306 | Chain, dl: DL, Val: Hi, Ptr: Address, |
7307 | PtrInfo: MachinePointerInfo::getStack(MF, Offset: HiVA.getLocMemOffset()))); |
7308 | } else { |
7309 | // Second half of f64 is passed in another GPR. |
7310 | Register RegHigh = HiVA.getLocReg(); |
7311 | RegsToPass.push_back(Elt: std::make_pair(x&: RegHigh, y&: Hi)); |
7312 | } |
7313 | continue; |
7314 | } |
7315 | |
7316 | // Promote the value if needed. |
7317 | // For now, only handle fully promoted and indirect arguments. |
7318 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
7319 | // Store the argument in a stack slot and pass its address. |
7320 | Align StackAlign = |
7321 | std::max(a: getPrefTypeAlign(VT: Outs[OutIdx].ArgVT, DAG), |
7322 | b: getPrefTypeAlign(VT: ArgValue.getValueType(), DAG)); |
7323 | TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); |
7324 | // If the original argument was split and passed by reference, we need to |
7325 | // store the required parts of it here (and pass just one address). |
7326 | unsigned ArgIndex = Outs[OutIdx].OrigArgIndex; |
7327 | unsigned ArgPartOffset = Outs[OutIdx].PartOffset; |
7328 | assert(ArgPartOffset == 0); |
7329 | // Calculate the total size to store. We don't have access to what we're |
7330 | // actually storing other than performing the loop and collecting the |
7331 | // info. |
7332 | SmallVector<std::pair<SDValue, SDValue>> Parts; |
7333 | while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) { |
7334 | SDValue PartValue = OutVals[OutIdx + 1]; |
7335 | unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset; |
7336 | SDValue Offset = DAG.getIntPtrConstant(Val: PartOffset, DL); |
7337 | EVT PartVT = PartValue.getValueType(); |
7338 | |
7339 | StoredSize += PartVT.getStoreSize(); |
7340 | StackAlign = std::max(a: StackAlign, b: getPrefTypeAlign(VT: PartVT, DAG)); |
7341 | Parts.push_back(Elt: std::make_pair(x&: PartValue, y&: Offset)); |
7342 | ++i; |
7343 | ++OutIdx; |
7344 | } |
7345 | SDValue SpillSlot = DAG.CreateStackTemporary(Bytes: StoredSize, Alignment: StackAlign); |
7346 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
7347 | MemOpChains.push_back( |
7348 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
7349 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
7350 | for (const auto &Part : Parts) { |
7351 | SDValue PartValue = Part.first; |
7352 | SDValue PartOffset = Part.second; |
7353 | SDValue Address = |
7354 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, N2: PartOffset); |
7355 | MemOpChains.push_back( |
7356 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
7357 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
7358 | } |
7359 | ArgValue = SpillSlot; |
7360 | } else { |
7361 | ArgValue = convertValVTToLocVT(DAG, Val: ArgValue, VA, DL); |
7362 | } |
7363 | |
7364 | // Use local copy if it is a byval arg. |
7365 | if (Flags.isByVal()) |
7366 | ArgValue = ByValArgs[j++]; |
7367 | |
7368 | if (VA.isRegLoc()) { |
7369 | // Queue up the argument copies and emit them at the end. |
7370 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
7371 | } else { |
7372 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
7373 | assert(!IsTailCall && "Tail call not allowed if stack is used " |
7374 | "for passing parameters" ); |
7375 | |
7376 | // Work out the address of the stack slot. |
7377 | if (!StackPtr.getNode()) |
7378 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, Reg: LoongArch::R3, VT: PtrVT); |
7379 | SDValue Address = |
7380 | DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
7381 | N2: DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL)); |
7382 | |
7383 | // Emit the store. |
7384 | MemOpChains.push_back( |
7385 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
7386 | } |
7387 | } |
7388 | |
7389 | // Join the stores, which are independent of one another. |
7390 | if (!MemOpChains.empty()) |
7391 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains); |
7392 | |
7393 | SDValue Glue; |
7394 | |
7395 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
7396 | for (auto &Reg : RegsToPass) { |
7397 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Reg.first, N: Reg.second, Glue); |
7398 | Glue = Chain.getValue(R: 1); |
7399 | } |
7400 | |
7401 | // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a |
7402 | // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't |
7403 | // split it and then direct call can be matched by PseudoCALL. |
7404 | if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
7405 | const GlobalValue *GV = S->getGlobal(); |
7406 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV) |
7407 | ? LoongArchII::MO_CALL |
7408 | : LoongArchII::MO_CALL_PLT; |
7409 | Callee = DAG.getTargetGlobalAddress(GV: S->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: OpFlags); |
7410 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
7411 | unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV: nullptr) |
7412 | ? LoongArchII::MO_CALL |
7413 | : LoongArchII::MO_CALL_PLT; |
7414 | Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: OpFlags); |
7415 | } |
7416 | |
7417 | // The first call operand is the chain and the second is the target address. |
7418 | SmallVector<SDValue> Ops; |
7419 | Ops.push_back(Elt: Chain); |
7420 | Ops.push_back(Elt: Callee); |
7421 | |
7422 | // Add argument registers to the end of the list so that they are |
7423 | // known live into the call. |
7424 | for (auto &Reg : RegsToPass) |
7425 | Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType())); |
7426 | |
7427 | if (!IsTailCall) { |
7428 | // Add a register mask operand representing the call-preserved registers. |
7429 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
7430 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
7431 | assert(Mask && "Missing call preserved mask for calling convention" ); |
7432 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
7433 | } |
7434 | |
7435 | // Glue the call to the argument copies, if any. |
7436 | if (Glue.getNode()) |
7437 | Ops.push_back(Elt: Glue); |
7438 | |
7439 | // Emit the call. |
7440 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
7441 | unsigned Op; |
7442 | switch (DAG.getTarget().getCodeModel()) { |
7443 | default: |
7444 | report_fatal_error(reason: "Unsupported code model" ); |
7445 | case CodeModel::Small: |
7446 | Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; |
7447 | break; |
7448 | case CodeModel::Medium: |
7449 | assert(Subtarget.is64Bit() && "Medium code model requires LA64" ); |
7450 | Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; |
7451 | break; |
7452 | case CodeModel::Large: |
7453 | assert(Subtarget.is64Bit() && "Large code model requires LA64" ); |
7454 | Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; |
7455 | break; |
7456 | } |
7457 | |
7458 | if (IsTailCall) { |
7459 | MF.getFrameInfo().setHasTailCall(); |
7460 | SDValue Ret = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
7461 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
7462 | return Ret; |
7463 | } |
7464 | |
7465 | Chain = DAG.getNode(Opcode: Op, DL, VTList: NodeTys, Ops); |
7466 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
7467 | Glue = Chain.getValue(R: 1); |
7468 | |
7469 | // Mark the end of the call, which is glued to the call itself. |
7470 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
7471 | Glue = Chain.getValue(R: 1); |
7472 | |
7473 | // Assign locations to each value returned by this call. |
7474 | SmallVector<CCValAssign> RVLocs; |
7475 | CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); |
7476 | analyzeInputArgs(MF, CCInfo&: RetCCInfo, Ins, /*IsRet=*/true, Fn: CC_LoongArch); |
7477 | |
7478 | // Copy all of the result registers out of their specified physreg. |
7479 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
7480 | auto &VA = RVLocs[i]; |
7481 | // Copy the value out. |
7482 | SDValue RetValue = |
7483 | DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue); |
7484 | // Glue the RetValue to the end of the call sequence. |
7485 | Chain = RetValue.getValue(R: 1); |
7486 | Glue = RetValue.getValue(R: 2); |
7487 | |
7488 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
7489 | assert(VA.needsCustom()); |
7490 | SDValue RetValue2 = DAG.getCopyFromReg(Chain, dl: DL, Reg: RVLocs[++i].getLocReg(), |
7491 | VT: MVT::i32, Glue); |
7492 | Chain = RetValue2.getValue(R: 1); |
7493 | Glue = RetValue2.getValue(R: 2); |
7494 | RetValue = DAG.getNode(Opcode: LoongArchISD::BUILD_PAIR_F64, DL, VT: MVT::f64, |
7495 | N1: RetValue, N2: RetValue2); |
7496 | } else |
7497 | RetValue = convertLocVTToValVT(DAG, Val: RetValue, VA, DL); |
7498 | |
7499 | InVals.push_back(Elt: RetValue); |
7500 | } |
7501 | |
7502 | return Chain; |
7503 | } |
7504 | |
7505 | bool LoongArchTargetLowering::CanLowerReturn( |
7506 | CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, |
7507 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, |
7508 | const Type *RetTy) const { |
7509 | SmallVector<CCValAssign> RVLocs; |
7510 | CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); |
7511 | |
7512 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
7513 | LoongArchABI::ABI ABI = |
7514 | MF.getSubtarget<LoongArchSubtarget>().getTargetABI(); |
7515 | if (CC_LoongArch(DL: MF.getDataLayout(), ABI, ValNo: i, ValVT: Outs[i].VT, LocInfo: CCValAssign::Full, |
7516 | ArgFlags: Outs[i].Flags, State&: CCInfo, /*IsFixed=*/true, /*IsRet=*/true, |
7517 | OrigTy: nullptr)) |
7518 | return false; |
7519 | } |
7520 | return true; |
7521 | } |
7522 | |
7523 | SDValue LoongArchTargetLowering::LowerReturn( |
7524 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
7525 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
7526 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
7527 | SelectionDAG &DAG) const { |
7528 | // Stores the assignment of the return value to a location. |
7529 | SmallVector<CCValAssign> RVLocs; |
7530 | |
7531 | // Info about the registers and stack slot. |
7532 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, |
7533 | *DAG.getContext()); |
7534 | |
7535 | analyzeOutputArgs(MF&: DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, |
7536 | CLI: nullptr, Fn: CC_LoongArch); |
7537 | if (CallConv == CallingConv::GHC && !RVLocs.empty()) |
7538 | report_fatal_error(reason: "GHC functions return void only" ); |
7539 | SDValue Glue; |
7540 | SmallVector<SDValue, 4> RetOps(1, Chain); |
7541 | |
7542 | // Copy the result values into the output registers. |
7543 | for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) { |
7544 | SDValue Val = OutVals[OutIdx]; |
7545 | CCValAssign &VA = RVLocs[i]; |
7546 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
7547 | |
7548 | if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { |
7549 | // Handle returning f64 on LA32D with a soft float ABI. |
7550 | assert(VA.isRegLoc() && "Expected return via registers" ); |
7551 | assert(VA.needsCustom()); |
7552 | SDValue SplitF64 = DAG.getNode(Opcode: LoongArchISD::SPLIT_PAIR_F64, DL, |
7553 | VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32), N: Val); |
7554 | SDValue Lo = SplitF64.getValue(R: 0); |
7555 | SDValue Hi = SplitF64.getValue(R: 1); |
7556 | Register RegLo = VA.getLocReg(); |
7557 | Register RegHi = RVLocs[++i].getLocReg(); |
7558 | |
7559 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegLo, N: Lo, Glue); |
7560 | Glue = Chain.getValue(R: 1); |
7561 | RetOps.push_back(Elt: DAG.getRegister(Reg: RegLo, VT: MVT::i32)); |
7562 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegHi, N: Hi, Glue); |
7563 | Glue = Chain.getValue(R: 1); |
7564 | RetOps.push_back(Elt: DAG.getRegister(Reg: RegHi, VT: MVT::i32)); |
7565 | } else { |
7566 | // Handle a 'normal' return. |
7567 | Val = convertValVTToLocVT(DAG, Val, VA, DL); |
7568 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: Val, Glue); |
7569 | |
7570 | // Guarantee that all emitted copies are stuck together. |
7571 | Glue = Chain.getValue(R: 1); |
7572 | RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT())); |
7573 | } |
7574 | } |
7575 | |
7576 | RetOps[0] = Chain; // Update chain. |
7577 | |
7578 | // Add the glue node if we have it. |
7579 | if (Glue.getNode()) |
7580 | RetOps.push_back(Elt: Glue); |
7581 | |
7582 | return DAG.getNode(Opcode: LoongArchISD::RET, DL, VT: MVT::Other, Ops: RetOps); |
7583 | } |
7584 | |
7585 | bool LoongArchTargetLowering::isFPImmVLDILegal(const APFloat &Imm, |
7586 | EVT VT) const { |
7587 | if (!Subtarget.hasExtLSX()) |
7588 | return false; |
7589 | |
7590 | if (VT == MVT::f32) { |
7591 | uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff; |
7592 | return (masked == 0x3e000000 || masked == 0x40000000); |
7593 | } |
7594 | |
7595 | if (VT == MVT::f64) { |
7596 | uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff; |
7597 | return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000); |
7598 | } |
7599 | |
7600 | return false; |
7601 | } |
7602 | |
7603 | bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
7604 | bool ForCodeSize) const { |
7605 | // TODO: Maybe need more checks here after vector extension is supported. |
7606 | if (VT == MVT::f32 && !Subtarget.hasBasicF()) |
7607 | return false; |
7608 | if (VT == MVT::f64 && !Subtarget.hasBasicD()) |
7609 | return false; |
7610 | return (Imm.isZero() || Imm.isExactlyValue(V: 1.0) || isFPImmVLDILegal(Imm, VT)); |
7611 | } |
7612 | |
7613 | bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { |
7614 | return true; |
7615 | } |
7616 | |
7617 | bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const { |
7618 | return true; |
7619 | } |
7620 | |
7621 | bool LoongArchTargetLowering::shouldInsertFencesForAtomic( |
7622 | const Instruction *I) const { |
7623 | if (!Subtarget.is64Bit()) |
7624 | return isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I); |
7625 | |
7626 | if (isa<LoadInst>(Val: I)) |
7627 | return true; |
7628 | |
7629 | // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not |
7630 | // require fences beacuse we can use amswap_db.[w/d]. |
7631 | Type *Ty = I->getOperand(i: 0)->getType(); |
7632 | if (isa<StoreInst>(Val: I) && Ty->isIntegerTy()) { |
7633 | unsigned Size = Ty->getIntegerBitWidth(); |
7634 | return (Size == 8 || Size == 16); |
7635 | } |
7636 | |
7637 | return false; |
7638 | } |
7639 | |
7640 | EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, |
7641 | LLVMContext &Context, |
7642 | EVT VT) const { |
7643 | if (!VT.isVector()) |
7644 | return getPointerTy(DL); |
7645 | return VT.changeVectorElementTypeToInteger(); |
7646 | } |
7647 | |
7648 | bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { |
7649 | // TODO: Support vectors. |
7650 | return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Val: Y); |
7651 | } |
7652 | |
7653 | bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
7654 | const CallInst &I, |
7655 | MachineFunction &MF, |
7656 | unsigned Intrinsic) const { |
7657 | switch (Intrinsic) { |
7658 | default: |
7659 | return false; |
7660 | case Intrinsic::loongarch_masked_atomicrmw_xchg_i32: |
7661 | case Intrinsic::loongarch_masked_atomicrmw_add_i32: |
7662 | case Intrinsic::loongarch_masked_atomicrmw_sub_i32: |
7663 | case Intrinsic::loongarch_masked_atomicrmw_nand_i32: |
7664 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
7665 | Info.memVT = MVT::i32; |
7666 | Info.ptrVal = I.getArgOperand(i: 0); |
7667 | Info.offset = 0; |
7668 | Info.align = Align(4); |
7669 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | |
7670 | MachineMemOperand::MOVolatile; |
7671 | return true; |
7672 | // TODO: Add more Intrinsics later. |
7673 | } |
7674 | } |
7675 | |
7676 | // When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8, |
7677 | // atomicrmw and/or/xor operations with operands less than 32 bits cannot be |
7678 | // expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent |
7679 | // regression, we need to implement it manually. |
7680 | void LoongArchTargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { |
7681 | AtomicRMWInst::BinOp Op = AI->getOperation(); |
7682 | |
7683 | assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || |
7684 | Op == AtomicRMWInst::And) && |
7685 | "Unable to expand" ); |
7686 | unsigned MinWordSize = 4; |
7687 | |
7688 | IRBuilder<> Builder(AI); |
7689 | LLVMContext &Ctx = Builder.getContext(); |
7690 | const DataLayout &DL = AI->getDataLayout(); |
7691 | Type *ValueType = AI->getType(); |
7692 | Type *WordType = Type::getIntNTy(C&: Ctx, N: MinWordSize * 8); |
7693 | |
7694 | Value *Addr = AI->getPointerOperand(); |
7695 | PointerType *PtrTy = cast<PointerType>(Val: Addr->getType()); |
7696 | IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace()); |
7697 | |
7698 | Value *AlignedAddr = Builder.CreateIntrinsic( |
7699 | ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy}, |
7700 | Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))}, FMFSource: nullptr, |
7701 | Name: "AlignedAddr" ); |
7702 | |
7703 | Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy); |
7704 | Value *PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB" ); |
7705 | Value *ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3); |
7706 | ShiftAmt = Builder.CreateTrunc(V: ShiftAmt, DestTy: WordType, Name: "ShiftAmt" ); |
7707 | Value *Mask = Builder.CreateShl( |
7708 | LHS: ConstantInt::get(Ty: WordType, |
7709 | V: (1 << (DL.getTypeStoreSize(Ty: ValueType) * 8)) - 1), |
7710 | RHS: ShiftAmt, Name: "Mask" ); |
7711 | Value *Inv_Mask = Builder.CreateNot(V: Mask, Name: "Inv_Mask" ); |
7712 | Value *ValOperand_Shifted = |
7713 | Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: WordType), |
7714 | RHS: ShiftAmt, Name: "ValOperand_Shifted" ); |
7715 | Value *NewOperand; |
7716 | if (Op == AtomicRMWInst::And) |
7717 | NewOperand = Builder.CreateOr(LHS: ValOperand_Shifted, RHS: Inv_Mask, Name: "AndOperand" ); |
7718 | else |
7719 | NewOperand = ValOperand_Shifted; |
7720 | |
7721 | AtomicRMWInst *NewAI = |
7722 | Builder.CreateAtomicRMW(Op, Ptr: AlignedAddr, Val: NewOperand, Align: Align(MinWordSize), |
7723 | Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID()); |
7724 | |
7725 | Value *Shift = Builder.CreateLShr(LHS: NewAI, RHS: ShiftAmt, Name: "shifted" ); |
7726 | Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: ValueType, Name: "extracted" ); |
7727 | Value *FinalOldResult = Builder.CreateBitCast(V: Trunc, DestTy: ValueType); |
7728 | AI->replaceAllUsesWith(V: FinalOldResult); |
7729 | AI->eraseFromParent(); |
7730 | } |
7731 | |
7732 | TargetLowering::AtomicExpansionKind |
7733 | LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
7734 | // TODO: Add more AtomicRMWInst that needs to be extended. |
7735 | |
7736 | // Since floating-point operation requires a non-trivial set of data |
7737 | // operations, use CmpXChg to expand. |
7738 | if (AI->isFloatingPointOperation() || |
7739 | AI->getOperation() == AtomicRMWInst::UIncWrap || |
7740 | AI->getOperation() == AtomicRMWInst::UDecWrap || |
7741 | AI->getOperation() == AtomicRMWInst::USubCond || |
7742 | AI->getOperation() == AtomicRMWInst::USubSat) |
7743 | return AtomicExpansionKind::CmpXChg; |
7744 | |
7745 | if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() && |
7746 | (AI->getOperation() == AtomicRMWInst::Xchg || |
7747 | AI->getOperation() == AtomicRMWInst::Add || |
7748 | AI->getOperation() == AtomicRMWInst::Sub)) { |
7749 | return AtomicExpansionKind::None; |
7750 | } |
7751 | |
7752 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
7753 | if (Subtarget.hasLAMCAS()) { |
7754 | if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And || |
7755 | AI->getOperation() == AtomicRMWInst::Or || |
7756 | AI->getOperation() == AtomicRMWInst::Xor)) |
7757 | return AtomicExpansionKind::Expand; |
7758 | if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32) |
7759 | return AtomicExpansionKind::CmpXChg; |
7760 | } |
7761 | |
7762 | if (Size == 8 || Size == 16) |
7763 | return AtomicExpansionKind::MaskedIntrinsic; |
7764 | return AtomicExpansionKind::None; |
7765 | } |
7766 | |
7767 | static Intrinsic::ID |
7768 | getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, |
7769 | AtomicRMWInst::BinOp BinOp) { |
7770 | if (GRLen == 64) { |
7771 | switch (BinOp) { |
7772 | default: |
7773 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
7774 | case AtomicRMWInst::Xchg: |
7775 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i64; |
7776 | case AtomicRMWInst::Add: |
7777 | return Intrinsic::loongarch_masked_atomicrmw_add_i64; |
7778 | case AtomicRMWInst::Sub: |
7779 | return Intrinsic::loongarch_masked_atomicrmw_sub_i64; |
7780 | case AtomicRMWInst::Nand: |
7781 | return Intrinsic::loongarch_masked_atomicrmw_nand_i64; |
7782 | case AtomicRMWInst::UMax: |
7783 | return Intrinsic::loongarch_masked_atomicrmw_umax_i64; |
7784 | case AtomicRMWInst::UMin: |
7785 | return Intrinsic::loongarch_masked_atomicrmw_umin_i64; |
7786 | case AtomicRMWInst::Max: |
7787 | return Intrinsic::loongarch_masked_atomicrmw_max_i64; |
7788 | case AtomicRMWInst::Min: |
7789 | return Intrinsic::loongarch_masked_atomicrmw_min_i64; |
7790 | // TODO: support other AtomicRMWInst. |
7791 | } |
7792 | } |
7793 | |
7794 | if (GRLen == 32) { |
7795 | switch (BinOp) { |
7796 | default: |
7797 | llvm_unreachable("Unexpected AtomicRMW BinOp" ); |
7798 | case AtomicRMWInst::Xchg: |
7799 | return Intrinsic::loongarch_masked_atomicrmw_xchg_i32; |
7800 | case AtomicRMWInst::Add: |
7801 | return Intrinsic::loongarch_masked_atomicrmw_add_i32; |
7802 | case AtomicRMWInst::Sub: |
7803 | return Intrinsic::loongarch_masked_atomicrmw_sub_i32; |
7804 | case AtomicRMWInst::Nand: |
7805 | return Intrinsic::loongarch_masked_atomicrmw_nand_i32; |
7806 | case AtomicRMWInst::UMax: |
7807 | return Intrinsic::loongarch_masked_atomicrmw_umax_i32; |
7808 | case AtomicRMWInst::UMin: |
7809 | return Intrinsic::loongarch_masked_atomicrmw_umin_i32; |
7810 | case AtomicRMWInst::Max: |
7811 | return Intrinsic::loongarch_masked_atomicrmw_max_i32; |
7812 | case AtomicRMWInst::Min: |
7813 | return Intrinsic::loongarch_masked_atomicrmw_min_i32; |
7814 | // TODO: support other AtomicRMWInst. |
7815 | } |
7816 | } |
7817 | |
7818 | llvm_unreachable("Unexpected GRLen\n" ); |
7819 | } |
7820 | |
7821 | TargetLowering::AtomicExpansionKind |
7822 | LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR( |
7823 | AtomicCmpXchgInst *CI) const { |
7824 | |
7825 | if (Subtarget.hasLAMCAS()) |
7826 | return AtomicExpansionKind::None; |
7827 | |
7828 | unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
7829 | if (Size == 8 || Size == 16) |
7830 | return AtomicExpansionKind::MaskedIntrinsic; |
7831 | return AtomicExpansionKind::None; |
7832 | } |
7833 | |
7834 | Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( |
7835 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
7836 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
7837 | unsigned GRLen = Subtarget.getGRLen(); |
7838 | AtomicOrdering FailOrd = CI->getFailureOrdering(); |
7839 | Value *FailureOrdering = |
7840 | Builder.getIntN(N: Subtarget.getGRLen(), C: static_cast<uint64_t>(FailOrd)); |
7841 | Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32; |
7842 | if (GRLen == 64) { |
7843 | CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64; |
7844 | CmpVal = Builder.CreateSExt(V: CmpVal, DestTy: Builder.getInt64Ty()); |
7845 | NewVal = Builder.CreateSExt(V: NewVal, DestTy: Builder.getInt64Ty()); |
7846 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
7847 | } |
7848 | Type *Tys[] = {AlignedAddr->getType()}; |
7849 | Value *Result = Builder.CreateIntrinsic( |
7850 | ID: CmpXchgIntrID, Types: Tys, Args: {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering}); |
7851 | if (GRLen == 64) |
7852 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
7853 | return Result; |
7854 | } |
7855 | |
7856 | Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic( |
7857 | IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, |
7858 | Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { |
7859 | // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace |
7860 | // the atomic instruction with an AtomicRMWInst::And/Or with appropriate |
7861 | // mask, as this produces better code than the LL/SC loop emitted by |
7862 | // int_loongarch_masked_atomicrmw_xchg. |
7863 | if (AI->getOperation() == AtomicRMWInst::Xchg && |
7864 | isa<ConstantInt>(Val: AI->getValOperand())) { |
7865 | ConstantInt *CVal = cast<ConstantInt>(Val: AI->getValOperand()); |
7866 | if (CVal->isZero()) |
7867 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::And, Ptr: AlignedAddr, |
7868 | Val: Builder.CreateNot(V: Mask, Name: "Inv_Mask" ), |
7869 | Align: AI->getAlign(), Ordering: Ord); |
7870 | if (CVal->isMinusOne()) |
7871 | return Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: AlignedAddr, Val: Mask, |
7872 | Align: AI->getAlign(), Ordering: Ord); |
7873 | } |
7874 | |
7875 | unsigned GRLen = Subtarget.getGRLen(); |
7876 | Value *Ordering = |
7877 | Builder.getIntN(N: GRLen, C: static_cast<uint64_t>(AI->getOrdering())); |
7878 | Type *Tys[] = {AlignedAddr->getType()}; |
7879 | Function *LlwOpScwLoop = Intrinsic::getOrInsertDeclaration( |
7880 | M: AI->getModule(), |
7881 | id: getIntrinsicForMaskedAtomicRMWBinOp(GRLen, BinOp: AI->getOperation()), Tys); |
7882 | |
7883 | if (GRLen == 64) { |
7884 | Incr = Builder.CreateSExt(V: Incr, DestTy: Builder.getInt64Ty()); |
7885 | Mask = Builder.CreateSExt(V: Mask, DestTy: Builder.getInt64Ty()); |
7886 | ShiftAmt = Builder.CreateSExt(V: ShiftAmt, DestTy: Builder.getInt64Ty()); |
7887 | } |
7888 | |
7889 | Value *Result; |
7890 | |
7891 | // Must pass the shift amount needed to sign extend the loaded value prior |
7892 | // to performing a signed comparison for min/max. ShiftAmt is the number of |
7893 | // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which |
7894 | // is the number of bits to left+right shift the value in order to |
7895 | // sign-extend. |
7896 | if (AI->getOperation() == AtomicRMWInst::Min || |
7897 | AI->getOperation() == AtomicRMWInst::Max) { |
7898 | const DataLayout &DL = AI->getDataLayout(); |
7899 | unsigned ValWidth = |
7900 | DL.getTypeStoreSizeInBits(Ty: AI->getValOperand()->getType()); |
7901 | Value *SextShamt = |
7902 | Builder.CreateSub(LHS: Builder.getIntN(N: GRLen, C: GRLen - ValWidth), RHS: ShiftAmt); |
7903 | Result = Builder.CreateCall(Callee: LlwOpScwLoop, |
7904 | Args: {AlignedAddr, Incr, Mask, SextShamt, Ordering}); |
7905 | } else { |
7906 | Result = |
7907 | Builder.CreateCall(Callee: LlwOpScwLoop, Args: {AlignedAddr, Incr, Mask, Ordering}); |
7908 | } |
7909 | |
7910 | if (GRLen == 64) |
7911 | Result = Builder.CreateTrunc(V: Result, DestTy: Builder.getInt32Ty()); |
7912 | return Result; |
7913 | } |
7914 | |
7915 | bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( |
7916 | const MachineFunction &MF, EVT VT) const { |
7917 | VT = VT.getScalarType(); |
7918 | |
7919 | if (!VT.isSimple()) |
7920 | return false; |
7921 | |
7922 | switch (VT.getSimpleVT().SimpleTy) { |
7923 | case MVT::f32: |
7924 | case MVT::f64: |
7925 | return true; |
7926 | default: |
7927 | break; |
7928 | } |
7929 | |
7930 | return false; |
7931 | } |
7932 | |
7933 | Register LoongArchTargetLowering::getExceptionPointerRegister( |
7934 | const Constant *PersonalityFn) const { |
7935 | return LoongArch::R4; |
7936 | } |
7937 | |
7938 | Register LoongArchTargetLowering::getExceptionSelectorRegister( |
7939 | const Constant *PersonalityFn) const { |
7940 | return LoongArch::R5; |
7941 | } |
7942 | |
7943 | //===----------------------------------------------------------------------===// |
7944 | // Target Optimization Hooks |
7945 | //===----------------------------------------------------------------------===// |
7946 | |
7947 | static int getEstimateRefinementSteps(EVT VT, |
7948 | const LoongArchSubtarget &Subtarget) { |
7949 | // Feature FRECIPE instrucions relative accuracy is 2^-14. |
7950 | // IEEE float has 23 digits and double has 52 digits. |
7951 | int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1; |
7952 | return RefinementSteps; |
7953 | } |
7954 | |
7955 | SDValue LoongArchTargetLowering::getSqrtEstimate(SDValue Operand, |
7956 | SelectionDAG &DAG, int Enabled, |
7957 | int &RefinementSteps, |
7958 | bool &UseOneConstNR, |
7959 | bool Reciprocal) const { |
7960 | if (Subtarget.hasFrecipe()) { |
7961 | SDLoc DL(Operand); |
7962 | EVT VT = Operand.getValueType(); |
7963 | |
7964 | if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) || |
7965 | (VT == MVT::v4f32 && Subtarget.hasExtLSX()) || |
7966 | (VT == MVT::v2f64 && Subtarget.hasExtLSX()) || |
7967 | (VT == MVT::v8f32 && Subtarget.hasExtLASX()) || |
7968 | (VT == MVT::v4f64 && Subtarget.hasExtLASX())) { |
7969 | |
7970 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
7971 | RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); |
7972 | |
7973 | SDValue Estimate = DAG.getNode(Opcode: LoongArchISD::FRSQRTE, DL, VT, Operand); |
7974 | if (Reciprocal) |
7975 | Estimate = DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Operand, N2: Estimate); |
7976 | |
7977 | return Estimate; |
7978 | } |
7979 | } |
7980 | |
7981 | return SDValue(); |
7982 | } |
7983 | |
7984 | SDValue LoongArchTargetLowering::getRecipEstimate(SDValue Operand, |
7985 | SelectionDAG &DAG, |
7986 | int Enabled, |
7987 | int &RefinementSteps) const { |
7988 | if (Subtarget.hasFrecipe()) { |
7989 | SDLoc DL(Operand); |
7990 | EVT VT = Operand.getValueType(); |
7991 | |
7992 | if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) || |
7993 | (VT == MVT::v4f32 && Subtarget.hasExtLSX()) || |
7994 | (VT == MVT::v2f64 && Subtarget.hasExtLSX()) || |
7995 | (VT == MVT::v8f32 && Subtarget.hasExtLASX()) || |
7996 | (VT == MVT::v4f64 && Subtarget.hasExtLASX())) { |
7997 | |
7998 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
7999 | RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); |
8000 | |
8001 | return DAG.getNode(Opcode: LoongArchISD::FRECIPE, DL, VT, Operand); |
8002 | } |
8003 | } |
8004 | |
8005 | return SDValue(); |
8006 | } |
8007 | |
8008 | //===----------------------------------------------------------------------===// |
8009 | // LoongArch Inline Assembly Support |
8010 | //===----------------------------------------------------------------------===// |
8011 | |
8012 | LoongArchTargetLowering::ConstraintType |
8013 | LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { |
8014 | // LoongArch specific constraints in GCC: config/loongarch/constraints.md |
8015 | // |
8016 | // 'f': A floating-point register (if available). |
8017 | // 'k': A memory operand whose address is formed by a base register and |
8018 | // (optionally scaled) index register. |
8019 | // 'l': A signed 16-bit constant. |
8020 | // 'm': A memory operand whose address is formed by a base register and |
8021 | // offset that is suitable for use in instructions with the same |
8022 | // addressing mode as st.w and ld.w. |
8023 | // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg |
8024 | // instruction) |
8025 | // 'I': A signed 12-bit constant (for arithmetic instructions). |
8026 | // 'J': Integer zero. |
8027 | // 'K': An unsigned 12-bit constant (for logic instructions). |
8028 | // "ZB": An address that is held in a general-purpose register. The offset is |
8029 | // zero. |
8030 | // "ZC": A memory operand whose address is formed by a base register and |
8031 | // offset that is suitable for use in instructions with the same |
8032 | // addressing mode as ll.w and sc.w. |
8033 | if (Constraint.size() == 1) { |
8034 | switch (Constraint[0]) { |
8035 | default: |
8036 | break; |
8037 | case 'f': |
8038 | case 'q': |
8039 | return C_RegisterClass; |
8040 | case 'l': |
8041 | case 'I': |
8042 | case 'J': |
8043 | case 'K': |
8044 | return C_Immediate; |
8045 | case 'k': |
8046 | return C_Memory; |
8047 | } |
8048 | } |
8049 | |
8050 | if (Constraint == "ZC" || Constraint == "ZB" ) |
8051 | return C_Memory; |
8052 | |
8053 | // 'm' is handled here. |
8054 | return TargetLowering::getConstraintType(Constraint); |
8055 | } |
8056 | |
8057 | InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint( |
8058 | StringRef ConstraintCode) const { |
8059 | return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode) |
8060 | .Case(S: "k" , Value: InlineAsm::ConstraintCode::k) |
8061 | .Case(S: "ZB" , Value: InlineAsm::ConstraintCode::ZB) |
8062 | .Case(S: "ZC" , Value: InlineAsm::ConstraintCode::ZC) |
8063 | .Default(Value: TargetLowering::getInlineAsmMemConstraint(ConstraintCode)); |
8064 | } |
8065 | |
8066 | std::pair<unsigned, const TargetRegisterClass *> |
8067 | LoongArchTargetLowering::getRegForInlineAsmConstraint( |
8068 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
8069 | // First, see if this is a constraint that directly corresponds to a LoongArch |
8070 | // register class. |
8071 | if (Constraint.size() == 1) { |
8072 | switch (Constraint[0]) { |
8073 | case 'r': |
8074 | // TODO: Support fixed vectors up to GRLen? |
8075 | if (VT.isVector()) |
8076 | break; |
8077 | return std::make_pair(x: 0U, y: &LoongArch::GPRRegClass); |
8078 | case 'q': |
8079 | return std::make_pair(x: 0U, y: &LoongArch::GPRNoR0R1RegClass); |
8080 | case 'f': |
8081 | if (Subtarget.hasBasicF() && VT == MVT::f32) |
8082 | return std::make_pair(x: 0U, y: &LoongArch::FPR32RegClass); |
8083 | if (Subtarget.hasBasicD() && VT == MVT::f64) |
8084 | return std::make_pair(x: 0U, y: &LoongArch::FPR64RegClass); |
8085 | if (Subtarget.hasExtLSX() && |
8086 | TRI->isTypeLegalForClass(RC: LoongArch::LSX128RegClass, T: VT)) |
8087 | return std::make_pair(x: 0U, y: &LoongArch::LSX128RegClass); |
8088 | if (Subtarget.hasExtLASX() && |
8089 | TRI->isTypeLegalForClass(RC: LoongArch::LASX256RegClass, T: VT)) |
8090 | return std::make_pair(x: 0U, y: &LoongArch::LASX256RegClass); |
8091 | break; |
8092 | default: |
8093 | break; |
8094 | } |
8095 | } |
8096 | |
8097 | // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen |
8098 | // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm |
8099 | // constraints while the official register name is prefixed with a '$'. So we |
8100 | // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.) |
8101 | // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is |
8102 | // case insensitive, so no need to convert the constraint to upper case here. |
8103 | // |
8104 | // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly |
8105 | // decode the usage of register name aliases into their official names. And |
8106 | // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use |
8107 | // official register names. |
8108 | if (Constraint.starts_with(Prefix: "{$r" ) || Constraint.starts_with(Prefix: "{$f" ) || |
8109 | Constraint.starts_with(Prefix: "{$vr" ) || Constraint.starts_with(Prefix: "{$xr" )) { |
8110 | bool IsFP = Constraint[2] == 'f'; |
8111 | std::pair<StringRef, StringRef> Temp = Constraint.split(Separator: '$'); |
8112 | std::pair<unsigned, const TargetRegisterClass *> R; |
8113 | R = TargetLowering::getRegForInlineAsmConstraint( |
8114 | TRI, Constraint: join_items(Separator: "" , Items&: Temp.first, Items&: Temp.second), VT); |
8115 | // Match those names to the widest floating point register type available. |
8116 | if (IsFP) { |
8117 | unsigned RegNo = R.first; |
8118 | if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) { |
8119 | if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) { |
8120 | unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64; |
8121 | return std::make_pair(x&: DReg, y: &LoongArch::FPR64RegClass); |
8122 | } |
8123 | } |
8124 | } |
8125 | return R; |
8126 | } |
8127 | |
8128 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
8129 | } |
8130 | |
8131 | void LoongArchTargetLowering::LowerAsmOperandForConstraint( |
8132 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
8133 | SelectionDAG &DAG) const { |
8134 | // Currently only support length 1 constraints. |
8135 | if (Constraint.size() == 1) { |
8136 | switch (Constraint[0]) { |
8137 | case 'l': |
8138 | // Validate & create a 16-bit signed immediate operand. |
8139 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
8140 | uint64_t CVal = C->getSExtValue(); |
8141 | if (isInt<16>(x: CVal)) |
8142 | Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op), |
8143 | VT: Subtarget.getGRLenVT())); |
8144 | } |
8145 | return; |
8146 | case 'I': |
8147 | // Validate & create a 12-bit signed immediate operand. |
8148 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
8149 | uint64_t CVal = C->getSExtValue(); |
8150 | if (isInt<12>(x: CVal)) |
8151 | Ops.push_back(x: DAG.getSignedTargetConstant(Val: CVal, DL: SDLoc(Op), |
8152 | VT: Subtarget.getGRLenVT())); |
8153 | } |
8154 | return; |
8155 | case 'J': |
8156 | // Validate & create an integer zero operand. |
8157 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
8158 | if (C->getZExtValue() == 0) |
8159 | Ops.push_back( |
8160 | x: DAG.getTargetConstant(Val: 0, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
8161 | return; |
8162 | case 'K': |
8163 | // Validate & create a 12-bit unsigned immediate operand. |
8164 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) { |
8165 | uint64_t CVal = C->getZExtValue(); |
8166 | if (isUInt<12>(x: CVal)) |
8167 | Ops.push_back( |
8168 | x: DAG.getTargetConstant(Val: CVal, DL: SDLoc(Op), VT: Subtarget.getGRLenVT())); |
8169 | } |
8170 | return; |
8171 | default: |
8172 | break; |
8173 | } |
8174 | } |
8175 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
8176 | } |
8177 | |
8178 | #define GET_REGISTER_MATCHER |
8179 | #include "LoongArchGenAsmMatcher.inc" |
8180 | |
8181 | Register |
8182 | LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
8183 | const MachineFunction &MF) const { |
8184 | std::pair<StringRef, StringRef> Name = StringRef(RegName).split(Separator: '$'); |
8185 | std::string NewRegName = Name.second.str(); |
8186 | Register Reg = MatchRegisterAltName(Name: NewRegName); |
8187 | if (!Reg) |
8188 | Reg = MatchRegisterName(Name: NewRegName); |
8189 | if (!Reg) |
8190 | return Reg; |
8191 | BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF); |
8192 | if (!ReservedRegs.test(Idx: Reg)) |
8193 | report_fatal_error(reason: Twine("Trying to obtain non-reserved register \"" + |
8194 | StringRef(RegName) + "\"." )); |
8195 | return Reg; |
8196 | } |
8197 | |
8198 | bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, |
8199 | EVT VT, SDValue C) const { |
8200 | // TODO: Support vectors. |
8201 | if (!VT.isScalarInteger()) |
8202 | return false; |
8203 | |
8204 | // Omit the optimization if the data size exceeds GRLen. |
8205 | if (VT.getSizeInBits() > Subtarget.getGRLen()) |
8206 | return false; |
8207 | |
8208 | if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) { |
8209 | const APInt &Imm = ConstNode->getAPIntValue(); |
8210 | // Break MUL into (SLLI + ADD/SUB) or ALSL. |
8211 | if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || |
8212 | (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) |
8213 | return true; |
8214 | // Break MUL into (ALSL x, (SLLI x, imm0), imm1). |
8215 | if (ConstNode->hasOneUse() && |
8216 | ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || |
8217 | (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2())) |
8218 | return true; |
8219 | // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)), |
8220 | // in which the immediate has two set bits. Or Break (MUL x, imm) |
8221 | // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate |
8222 | // equals to (1 << s0) - (1 << s1). |
8223 | if (ConstNode->hasOneUse() && !(Imm.sge(RHS: -2048) && Imm.sle(RHS: 4095))) { |
8224 | unsigned Shifts = Imm.countr_zero(); |
8225 | // Reject immediates which can be composed via a single LUI. |
8226 | if (Shifts >= 12) |
8227 | return false; |
8228 | // Reject multiplications can be optimized to |
8229 | // (SLLI (ALSL x, x, 1/2/3/4), s). |
8230 | APInt ImmPop = Imm.ashr(ShiftAmt: Shifts); |
8231 | if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17) |
8232 | return false; |
8233 | // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`, |
8234 | // since it needs one more instruction than other 3 cases. |
8235 | APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true); |
8236 | if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() || |
8237 | (ImmSmall - Imm).isPowerOf2()) |
8238 | return true; |
8239 | } |
8240 | } |
8241 | |
8242 | return false; |
8243 | } |
8244 | |
8245 | bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
8246 | const AddrMode &AM, |
8247 | Type *Ty, unsigned AS, |
8248 | Instruction *I) const { |
8249 | // LoongArch has four basic addressing modes: |
8250 | // 1. reg |
8251 | // 2. reg + 12-bit signed offset |
8252 | // 3. reg + 14-bit signed offset left-shifted by 2 |
8253 | // 4. reg1 + reg2 |
8254 | // TODO: Add more checks after support vector extension. |
8255 | |
8256 | // No global is ever allowed as a base. |
8257 | if (AM.BaseGV) |
8258 | return false; |
8259 | |
8260 | // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2 |
8261 | // with `UAL` feature. |
8262 | if (!isInt<12>(x: AM.BaseOffs) && |
8263 | !(isShiftedInt<14, 2>(x: AM.BaseOffs) && Subtarget.hasUAL())) |
8264 | return false; |
8265 | |
8266 | switch (AM.Scale) { |
8267 | case 0: |
8268 | // "r+i" or just "i", depending on HasBaseReg. |
8269 | break; |
8270 | case 1: |
8271 | // "r+r+i" is not allowed. |
8272 | if (AM.HasBaseReg && AM.BaseOffs) |
8273 | return false; |
8274 | // Otherwise we have "r+r" or "r+i". |
8275 | break; |
8276 | case 2: |
8277 | // "2*r+r" or "2*r+i" is not allowed. |
8278 | if (AM.HasBaseReg || AM.BaseOffs) |
8279 | return false; |
8280 | // Allow "2*r" as "r+r". |
8281 | break; |
8282 | default: |
8283 | return false; |
8284 | } |
8285 | |
8286 | return true; |
8287 | } |
8288 | |
8289 | bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
8290 | return isInt<12>(x: Imm); |
8291 | } |
8292 | |
8293 | bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
8294 | return isInt<12>(x: Imm); |
8295 | } |
8296 | |
8297 | bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
8298 | // Zexts are free if they can be combined with a load. |
8299 | // Don't advertise i32->i64 zextload as being free for LA64. It interacts |
8300 | // poorly with type legalization of compares preferring sext. |
8301 | if (auto *LD = dyn_cast<LoadSDNode>(Val)) { |
8302 | EVT MemVT = LD->getMemoryVT(); |
8303 | if ((MemVT == MVT::i8 || MemVT == MVT::i16) && |
8304 | (LD->getExtensionType() == ISD::NON_EXTLOAD || |
8305 | LD->getExtensionType() == ISD::ZEXTLOAD)) |
8306 | return true; |
8307 | } |
8308 | |
8309 | return TargetLowering::isZExtFree(Val, VT2); |
8310 | } |
8311 | |
8312 | bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, |
8313 | EVT DstVT) const { |
8314 | return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; |
8315 | } |
8316 | |
8317 | bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const { |
8318 | return Subtarget.is64Bit() && CI->getType()->isIntegerTy(Bitwidth: 32); |
8319 | } |
8320 | |
8321 | bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { |
8322 | // TODO: Support vectors. |
8323 | if (Y.getValueType().isVector()) |
8324 | return false; |
8325 | |
8326 | return !isa<ConstantSDNode>(Val: Y); |
8327 | } |
8328 | |
8329 | ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { |
8330 | // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension. |
8331 | return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; |
8332 | } |
8333 | |
8334 | bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall( |
8335 | Type *Ty, bool IsSigned) const { |
8336 | if (Subtarget.is64Bit() && Ty->isIntegerTy(Bitwidth: 32)) |
8337 | return true; |
8338 | |
8339 | return IsSigned; |
8340 | } |
8341 | |
8342 | bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { |
8343 | // Return false to suppress the unnecessary extensions if the LibCall |
8344 | // arguments or return value is a float narrower than GRLEN on a soft FP ABI. |
8345 | if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() && |
8346 | Type.getSizeInBits() < Subtarget.getGRLen())) |
8347 | return false; |
8348 | return true; |
8349 | } |
8350 | |
8351 | // memcpy, and other memory intrinsics, typically tries to use wider load/store |
8352 | // if the source/dest is aligned and the copy size is large enough. We therefore |
8353 | // want to align such objects passed to memory intrinsics. |
8354 | bool LoongArchTargetLowering::shouldAlignPointerArgs(CallInst *CI, |
8355 | unsigned &MinSize, |
8356 | Align &PrefAlign) const { |
8357 | if (!isa<MemIntrinsic>(Val: CI)) |
8358 | return false; |
8359 | |
8360 | if (Subtarget.is64Bit()) { |
8361 | MinSize = 8; |
8362 | PrefAlign = Align(8); |
8363 | } else { |
8364 | MinSize = 4; |
8365 | PrefAlign = Align(4); |
8366 | } |
8367 | |
8368 | return true; |
8369 | } |
8370 | |
8371 | TargetLoweringBase::LegalizeTypeAction |
8372 | LoongArchTargetLowering::getPreferredVectorAction(MVT VT) const { |
8373 | if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && |
8374 | VT.getVectorElementType() != MVT::i1) |
8375 | return TypeWidenVector; |
8376 | |
8377 | return TargetLoweringBase::getPreferredVectorAction(VT); |
8378 | } |
8379 | |
8380 | bool LoongArchTargetLowering::splitValueIntoRegisterParts( |
8381 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
8382 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
8383 | bool IsABIRegCopy = CC.has_value(); |
8384 | EVT ValueVT = Val.getValueType(); |
8385 | |
8386 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
8387 | PartVT == MVT::f32) { |
8388 | // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float |
8389 | // nan, and cast to f32. |
8390 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i16, Operand: Val); |
8391 | Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Val); |
8392 | Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Val, |
8393 | N2: DAG.getConstant(Val: 0xFFFF0000, DL, VT: MVT::i32)); |
8394 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f32, Operand: Val); |
8395 | Parts[0] = Val; |
8396 | return true; |
8397 | } |
8398 | |
8399 | return false; |
8400 | } |
8401 | |
8402 | SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue( |
8403 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
8404 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
8405 | bool IsABIRegCopy = CC.has_value(); |
8406 | |
8407 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && |
8408 | PartVT == MVT::f32) { |
8409 | SDValue Val = Parts[0]; |
8410 | |
8411 | // Cast the f32 to i32, truncate to i16, and cast back to [b]f16. |
8412 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i32, Operand: Val); |
8413 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i16, Operand: Val); |
8414 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
8415 | return Val; |
8416 | } |
8417 | |
8418 | return SDValue(); |
8419 | } |
8420 | |
8421 | MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
8422 | CallingConv::ID CC, |
8423 | EVT VT) const { |
8424 | // Use f32 to pass f16. |
8425 | if (VT == MVT::f16 && Subtarget.hasBasicF()) |
8426 | return MVT::f32; |
8427 | |
8428 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
8429 | } |
8430 | |
8431 | unsigned LoongArchTargetLowering::getNumRegistersForCallingConv( |
8432 | LLVMContext &Context, CallingConv::ID CC, EVT VT) const { |
8433 | // Use f32 to pass f16. |
8434 | if (VT == MVT::f16 && Subtarget.hasBasicF()) |
8435 | return 1; |
8436 | |
8437 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
8438 | } |
8439 | |
8440 | bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode( |
8441 | SDValue Op, const APInt &OriginalDemandedBits, |
8442 | const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, |
8443 | unsigned Depth) const { |
8444 | EVT VT = Op.getValueType(); |
8445 | unsigned BitWidth = OriginalDemandedBits.getBitWidth(); |
8446 | unsigned Opc = Op.getOpcode(); |
8447 | switch (Opc) { |
8448 | default: |
8449 | break; |
8450 | case LoongArchISD::VMSKLTZ: |
8451 | case LoongArchISD::XVMSKLTZ: { |
8452 | SDValue Src = Op.getOperand(i: 0); |
8453 | MVT SrcVT = Src.getSimpleValueType(); |
8454 | unsigned SrcBits = SrcVT.getScalarSizeInBits(); |
8455 | unsigned NumElts = SrcVT.getVectorNumElements(); |
8456 | |
8457 | // If we don't need the sign bits at all just return zero. |
8458 | if (OriginalDemandedBits.countr_zero() >= NumElts) |
8459 | return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT)); |
8460 | |
8461 | // Only demand the vector elements of the sign bits we need. |
8462 | APInt KnownUndef, KnownZero; |
8463 | APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(width: NumElts); |
8464 | if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, |
8465 | TLO, Depth: Depth + 1)) |
8466 | return true; |
8467 | |
8468 | Known.Zero = KnownZero.zext(width: BitWidth); |
8469 | Known.Zero.setHighBits(BitWidth - NumElts); |
8470 | |
8471 | // [X]VMSKLTZ only uses the MSB from each vector element. |
8472 | KnownBits KnownSrc; |
8473 | APInt DemandedSrcBits = APInt::getSignMask(BitWidth: SrcBits); |
8474 | if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, Known&: KnownSrc, TLO, |
8475 | Depth: Depth + 1)) |
8476 | return true; |
8477 | |
8478 | if (KnownSrc.One[SrcBits - 1]) |
8479 | Known.One.setLowBits(NumElts); |
8480 | else if (KnownSrc.Zero[SrcBits - 1]) |
8481 | Known.Zero.setLowBits(NumElts); |
8482 | |
8483 | // Attempt to avoid multi-use ops if we don't need anything from it. |
8484 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
8485 | Op: Src, DemandedBits: DemandedSrcBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1)) |
8486 | return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT, Operand: NewSrc)); |
8487 | return false; |
8488 | } |
8489 | } |
8490 | |
8491 | return TargetLowering::SimplifyDemandedBitsForTargetNode( |
8492 | Op, DemandedBits: OriginalDemandedBits, DemandedElts: OriginalDemandedElts, Known, TLO, Depth); |
8493 | } |
8494 | |