1 | //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Subclass of MipsTargetLowering specialized for mips32/64. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MipsSEISelLowering.h" |
14 | #include "MipsMachineFunction.h" |
15 | #include "MipsRegisterInfo.h" |
16 | #include "MipsSubtarget.h" |
17 | #include "llvm/ADT/APInt.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/CodeGen/CallingConvLower.h" |
21 | #include "llvm/CodeGen/ISDOpcodes.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineInstr.h" |
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
26 | #include "llvm/CodeGen/MachineMemOperand.h" |
27 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
28 | #include "llvm/CodeGen/SelectionDAG.h" |
29 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
30 | #include "llvm/CodeGen/TargetInstrInfo.h" |
31 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
32 | #include "llvm/CodeGen/ValueTypes.h" |
33 | #include "llvm/CodeGenTypes/MachineValueType.h" |
34 | #include "llvm/IR/DebugLoc.h" |
35 | #include "llvm/IR/Intrinsics.h" |
36 | #include "llvm/IR/IntrinsicsMips.h" |
37 | #include "llvm/Support/Casting.h" |
38 | #include "llvm/Support/CommandLine.h" |
39 | #include "llvm/Support/Debug.h" |
40 | #include "llvm/Support/ErrorHandling.h" |
41 | #include "llvm/Support/MathExtras.h" |
42 | #include "llvm/Support/raw_ostream.h" |
43 | #include "llvm/TargetParser/Triple.h" |
44 | #include <algorithm> |
45 | #include <cassert> |
46 | #include <cstdint> |
47 | #include <iterator> |
48 | #include <utility> |
49 | |
50 | using namespace llvm; |
51 | |
52 | #define DEBUG_TYPE "mips-isel" |
53 | |
54 | static cl::opt<bool> |
55 | UseMipsTailCalls("mips-tail-calls" , cl::Hidden, |
56 | cl::desc("MIPS: permit tail calls." ), cl::init(Val: false)); |
57 | |
58 | static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1" , cl::init(Val: false), |
59 | cl::desc("Expand double precision loads and " |
60 | "stores to their single precision " |
61 | "counterparts" )); |
62 | |
63 | MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, |
64 | const MipsSubtarget &STI) |
65 | : MipsTargetLowering(TM, STI) { |
66 | // Set up the register classes |
67 | addRegisterClass(VT: MVT::i32, RC: &Mips::GPR32RegClass); |
68 | |
69 | if (Subtarget.isGP64bit()) |
70 | addRegisterClass(VT: MVT::i64, RC: &Mips::GPR64RegClass); |
71 | |
72 | if (Subtarget.hasDSP() || Subtarget.hasMSA()) { |
73 | // Expand all truncating stores and extending loads. |
74 | for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { |
75 | for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { |
76 | setTruncStoreAction(ValVT: VT0, MemVT: VT1, Action: Expand); |
77 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand); |
78 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand); |
79 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand); |
80 | } |
81 | } |
82 | } |
83 | |
84 | if (Subtarget.hasDSP()) { |
85 | MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; |
86 | |
87 | for (const auto &VecTy : VecTys) { |
88 | addRegisterClass(VT: VecTy, RC: &Mips::DSPRRegClass); |
89 | |
90 | // Expand all builtin opcodes. |
91 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
92 | setOperationAction(Op: Opc, VT: VecTy, Action: Expand); |
93 | |
94 | setOperationAction(Op: ISD::ADD, VT: VecTy, Action: Legal); |
95 | setOperationAction(Op: ISD::SUB, VT: VecTy, Action: Legal); |
96 | setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Legal); |
97 | setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Legal); |
98 | setOperationAction(Op: ISD::BITCAST, VT: VecTy, Action: Legal); |
99 | } |
100 | |
101 | setTargetDAGCombine( |
102 | {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); |
103 | |
104 | if (Subtarget.hasMips32r2()) { |
105 | setOperationAction(Op: ISD::ADDC, VT: MVT::i32, Action: Legal); |
106 | setOperationAction(Op: ISD::ADDE, VT: MVT::i32, Action: Legal); |
107 | } |
108 | } |
109 | |
110 | if (Subtarget.hasDSPR2()) |
111 | setOperationAction(Op: ISD::MUL, VT: MVT::v2i16, Action: Legal); |
112 | |
113 | if (Subtarget.hasMSA()) { |
114 | addMSAIntType(Ty: MVT::v16i8, RC: &Mips::MSA128BRegClass); |
115 | addMSAIntType(Ty: MVT::v8i16, RC: &Mips::MSA128HRegClass); |
116 | addMSAIntType(Ty: MVT::v4i32, RC: &Mips::MSA128WRegClass); |
117 | addMSAIntType(Ty: MVT::v2i64, RC: &Mips::MSA128DRegClass); |
118 | addMSAFloatType(Ty: MVT::v8f16, RC: &Mips::MSA128HRegClass); |
119 | addMSAFloatType(Ty: MVT::v4f32, RC: &Mips::MSA128WRegClass); |
120 | addMSAFloatType(Ty: MVT::v2f64, RC: &Mips::MSA128DRegClass); |
121 | |
122 | // f16 is a storage-only type, always promote it to f32. |
123 | addRegisterClass(VT: MVT::f16, RC: &Mips::MSA128HRegClass); |
124 | setOperationAction(Op: ISD::SETCC, VT: MVT::f16, Action: Promote); |
125 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f16, Action: Promote); |
126 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f16, Action: Promote); |
127 | setOperationAction(Op: ISD::SELECT, VT: MVT::f16, Action: Promote); |
128 | setOperationAction(Op: ISD::FADD, VT: MVT::f16, Action: Promote); |
129 | setOperationAction(Op: ISD::FSUB, VT: MVT::f16, Action: Promote); |
130 | setOperationAction(Op: ISD::FMUL, VT: MVT::f16, Action: Promote); |
131 | setOperationAction(Op: ISD::FDIV, VT: MVT::f16, Action: Promote); |
132 | setOperationAction(Op: ISD::FREM, VT: MVT::f16, Action: Promote); |
133 | setOperationAction(Op: ISD::FMA, VT: MVT::f16, Action: Promote); |
134 | setOperationAction(Op: ISD::FNEG, VT: MVT::f16, Action: Promote); |
135 | setOperationAction(Op: ISD::FABS, VT: MVT::f16, Action: Promote); |
136 | setOperationAction(Op: ISD::FCEIL, VT: MVT::f16, Action: Promote); |
137 | setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f16, Action: Promote); |
138 | setOperationAction(Op: ISD::FCOS, VT: MVT::f16, Action: Promote); |
139 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f16, Action: Promote); |
140 | setOperationAction(Op: ISD::FFLOOR, VT: MVT::f16, Action: Promote); |
141 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f16, Action: Promote); |
142 | setOperationAction(Op: ISD::FPOW, VT: MVT::f16, Action: Promote); |
143 | setOperationAction(Op: ISD::FPOWI, VT: MVT::f16, Action: Promote); |
144 | setOperationAction(Op: ISD::FRINT, VT: MVT::f16, Action: Promote); |
145 | setOperationAction(Op: ISD::FSIN, VT: MVT::f16, Action: Promote); |
146 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f16, Action: Promote); |
147 | setOperationAction(Op: ISD::FSQRT, VT: MVT::f16, Action: Promote); |
148 | setOperationAction(Op: ISD::FEXP, VT: MVT::f16, Action: Promote); |
149 | setOperationAction(Op: ISD::FEXP2, VT: MVT::f16, Action: Promote); |
150 | setOperationAction(Op: ISD::FLOG, VT: MVT::f16, Action: Promote); |
151 | setOperationAction(Op: ISD::FLOG2, VT: MVT::f16, Action: Promote); |
152 | setOperationAction(Op: ISD::FLOG10, VT: MVT::f16, Action: Promote); |
153 | setOperationAction(Op: ISD::FROUND, VT: MVT::f16, Action: Promote); |
154 | setOperationAction(Op: ISD::FTRUNC, VT: MVT::f16, Action: Promote); |
155 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f16, Action: Promote); |
156 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f16, Action: Promote); |
157 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f16, Action: Promote); |
158 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f16, Action: Promote); |
159 | |
160 | setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); |
161 | } |
162 | |
163 | if (!Subtarget.useSoftFloat()) { |
164 | addRegisterClass(VT: MVT::f32, RC: &Mips::FGR32RegClass); |
165 | |
166 | // When dealing with single precision only, use libcalls |
167 | if (!Subtarget.isSingleFloat()) { |
168 | if (Subtarget.isFP64bit()) |
169 | addRegisterClass(VT: MVT::f64, RC: &Mips::FGR64RegClass); |
170 | else |
171 | addRegisterClass(VT: MVT::f64, RC: &Mips::AFGR64RegClass); |
172 | } |
173 | } |
174 | |
175 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Custom); |
176 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Custom); |
177 | setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Custom); |
178 | setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Custom); |
179 | |
180 | if (Subtarget.hasCnMips()) |
181 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal); |
182 | else if (Subtarget.isGP64bit()) |
183 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom); |
184 | |
185 | if (Subtarget.isGP64bit()) { |
186 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom); |
187 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom); |
188 | setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom); |
189 | setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom); |
190 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Custom); |
191 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Custom); |
192 | } |
193 | |
194 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom); |
195 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom); |
196 | |
197 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Custom); |
198 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Custom); |
199 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
200 | if (Subtarget.hasMips32r6()) { |
201 | setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Legal); |
202 | setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Legal); |
203 | } else { |
204 | setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom); |
205 | setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Custom); |
206 | } |
207 | |
208 | setTargetDAGCombine(ISD::MUL); |
209 | |
210 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
211 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom); |
212 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom); |
213 | |
214 | if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && |
215 | !Subtarget.hasMips64()) { |
216 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom); |
217 | } |
218 | |
219 | if (NoDPLoadStore) { |
220 | setOperationAction(Op: ISD::LOAD, VT: MVT::f64, Action: Custom); |
221 | setOperationAction(Op: ISD::STORE, VT: MVT::f64, Action: Custom); |
222 | } |
223 | |
224 | if (Subtarget.hasMips32r6()) { |
225 | // MIPS32r6 replaces the accumulator-based multiplies with a three register |
226 | // instruction |
227 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand); |
228 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand); |
229 | setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Legal); |
230 | setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Legal); |
231 | setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Legal); |
232 | |
233 | // MIPS32r6 replaces the accumulator-based division/remainder with separate |
234 | // three register division and remainder instructions. |
235 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand); |
236 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand); |
237 | setOperationAction(Op: ISD::SDIV, VT: MVT::i32, Action: Legal); |
238 | setOperationAction(Op: ISD::UDIV, VT: MVT::i32, Action: Legal); |
239 | setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal); |
240 | setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal); |
241 | |
242 | // MIPS32r6 replaces conditional moves with an equivalent that removes the |
243 | // need for three GPR read ports. |
244 | setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Legal); |
245 | setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Legal); |
246 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Expand); |
247 | |
248 | setOperationAction(Op: ISD::SETCC, VT: MVT::f32, Action: Legal); |
249 | setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Legal); |
250 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand); |
251 | |
252 | assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6" ); |
253 | setOperationAction(Op: ISD::SETCC, VT: MVT::f64, Action: Legal); |
254 | setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Custom); |
255 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand); |
256 | |
257 | setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Legal); |
258 | |
259 | // Floating point > and >= are supported via < and <= |
260 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand); |
261 | setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand); |
262 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f32, Action: Expand); |
263 | setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand); |
264 | |
265 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand); |
266 | setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f64, Action: Expand); |
267 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f64, Action: Expand); |
268 | setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand); |
269 | } |
270 | |
271 | if (Subtarget.hasMips64r6()) { |
272 | // MIPS64r6 replaces the accumulator-based multiplies with a three register |
273 | // instruction |
274 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand); |
275 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand); |
276 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal); |
277 | setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Legal); |
278 | setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Legal); |
279 | |
280 | // MIPS32r6 replaces the accumulator-based division/remainder with separate |
281 | // three register division and remainder instructions. |
282 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand); |
283 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand); |
284 | setOperationAction(Op: ISD::SDIV, VT: MVT::i64, Action: Legal); |
285 | setOperationAction(Op: ISD::UDIV, VT: MVT::i64, Action: Legal); |
286 | setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal); |
287 | setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal); |
288 | |
289 | // MIPS64r6 replaces conditional moves with an equivalent that removes the |
290 | // need for three GPR read ports. |
291 | setOperationAction(Op: ISD::SETCC, VT: MVT::i64, Action: Legal); |
292 | setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Legal); |
293 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: Expand); |
294 | } |
295 | |
296 | computeRegisterProperties(TRI: Subtarget.getRegisterInfo()); |
297 | } |
298 | |
299 | const MipsTargetLowering * |
300 | llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, |
301 | const MipsSubtarget &STI) { |
302 | return new MipsSETargetLowering(TM, STI); |
303 | } |
304 | |
305 | const TargetRegisterClass * |
306 | MipsSETargetLowering::getRepRegClassFor(MVT VT) const { |
307 | if (VT == MVT::Untyped) |
308 | return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; |
309 | |
310 | return TargetLowering::getRepRegClassFor(VT); |
311 | } |
312 | |
313 | // Enable MSA support for the given integer type and Register class. |
314 | void MipsSETargetLowering:: |
315 | addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { |
316 | addRegisterClass(VT: Ty, RC); |
317 | |
318 | // Expand all builtin opcodes. |
319 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
320 | setOperationAction(Op: Opc, VT: Ty, Action: Expand); |
321 | |
322 | setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal); |
323 | setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal); |
324 | setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal); |
325 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Custom); |
326 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal); |
327 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom); |
328 | setOperationAction(Op: ISD::UNDEF, VT: Ty, Action: Legal); |
329 | |
330 | setOperationAction(Op: ISD::ADD, VT: Ty, Action: Legal); |
331 | setOperationAction(Op: ISD::AND, VT: Ty, Action: Legal); |
332 | setOperationAction(Op: ISD::CTLZ, VT: Ty, Action: Legal); |
333 | setOperationAction(Op: ISD::CTPOP, VT: Ty, Action: Legal); |
334 | setOperationAction(Op: ISD::MUL, VT: Ty, Action: Legal); |
335 | setOperationAction(Op: ISD::OR, VT: Ty, Action: Legal); |
336 | setOperationAction(Op: ISD::SDIV, VT: Ty, Action: Legal); |
337 | setOperationAction(Op: ISD::SREM, VT: Ty, Action: Legal); |
338 | setOperationAction(Op: ISD::SHL, VT: Ty, Action: Legal); |
339 | setOperationAction(Op: ISD::SRA, VT: Ty, Action: Legal); |
340 | setOperationAction(Op: ISD::SRL, VT: Ty, Action: Legal); |
341 | setOperationAction(Op: ISD::SUB, VT: Ty, Action: Legal); |
342 | setOperationAction(Op: ISD::SMAX, VT: Ty, Action: Legal); |
343 | setOperationAction(Op: ISD::SMIN, VT: Ty, Action: Legal); |
344 | setOperationAction(Op: ISD::UDIV, VT: Ty, Action: Legal); |
345 | setOperationAction(Op: ISD::UREM, VT: Ty, Action: Legal); |
346 | setOperationAction(Op: ISD::UMAX, VT: Ty, Action: Legal); |
347 | setOperationAction(Op: ISD::UMIN, VT: Ty, Action: Legal); |
348 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: Ty, Action: Custom); |
349 | setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal); |
350 | setOperationAction(Op: ISD::XOR, VT: Ty, Action: Legal); |
351 | |
352 | if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { |
353 | setOperationAction(Op: ISD::FP_TO_SINT, VT: Ty, Action: Legal); |
354 | setOperationAction(Op: ISD::FP_TO_UINT, VT: Ty, Action: Legal); |
355 | setOperationAction(Op: ISD::SINT_TO_FP, VT: Ty, Action: Legal); |
356 | setOperationAction(Op: ISD::UINT_TO_FP, VT: Ty, Action: Legal); |
357 | } |
358 | |
359 | setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal); |
360 | setCondCodeAction(CCs: ISD::SETNE, VT: Ty, Action: Expand); |
361 | setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand); |
362 | setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand); |
363 | setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand); |
364 | setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand); |
365 | } |
366 | |
367 | // Enable MSA support for the given floating-point type and Register class. |
368 | void MipsSETargetLowering:: |
369 | addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { |
370 | addRegisterClass(VT: Ty, RC); |
371 | |
372 | // Expand all builtin opcodes. |
373 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
374 | setOperationAction(Op: Opc, VT: Ty, Action: Expand); |
375 | |
376 | setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal); |
377 | setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal); |
378 | setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal); |
379 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Legal); |
380 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal); |
381 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom); |
382 | |
383 | if (Ty != MVT::v8f16) { |
384 | setOperationAction(Op: ISD::FABS, VT: Ty, Action: Legal); |
385 | setOperationAction(Op: ISD::FADD, VT: Ty, Action: Legal); |
386 | setOperationAction(Op: ISD::FDIV, VT: Ty, Action: Legal); |
387 | setOperationAction(Op: ISD::FEXP2, VT: Ty, Action: Legal); |
388 | setOperationAction(Op: ISD::FLOG2, VT: Ty, Action: Legal); |
389 | setOperationAction(Op: ISD::FMA, VT: Ty, Action: Legal); |
390 | setOperationAction(Op: ISD::FMUL, VT: Ty, Action: Legal); |
391 | setOperationAction(Op: ISD::FRINT, VT: Ty, Action: Legal); |
392 | setOperationAction(Op: ISD::FSQRT, VT: Ty, Action: Legal); |
393 | setOperationAction(Op: ISD::FSUB, VT: Ty, Action: Legal); |
394 | setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal); |
395 | |
396 | setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal); |
397 | setCondCodeAction(CCs: ISD::SETOGE, VT: Ty, Action: Expand); |
398 | setCondCodeAction(CCs: ISD::SETOGT, VT: Ty, Action: Expand); |
399 | setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand); |
400 | setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand); |
401 | setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand); |
402 | setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand); |
403 | } |
404 | } |
405 | |
406 | SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
407 | if(!Subtarget.hasMips32r6()) |
408 | return MipsTargetLowering::LowerOperation(Op, DAG); |
409 | |
410 | EVT ResTy = Op->getValueType(ResNo: 0); |
411 | SDLoc DL(Op); |
412 | |
413 | // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the |
414 | // floating point register are undefined. Not really an issue as sel.d, which |
415 | // is produced from an FSELECT node, only looks at bit 0. |
416 | SDValue Tmp = DAG.getNode(Opcode: MipsISD::MTC1_D64, DL, VT: MVT::f64, Operand: Op->getOperand(Num: 0)); |
417 | return DAG.getNode(Opcode: MipsISD::FSELECT, DL, VT: ResTy, N1: Tmp, N2: Op->getOperand(Num: 1), |
418 | N3: Op->getOperand(Num: 2)); |
419 | } |
420 | |
421 | bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( |
422 | EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { |
423 | MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; |
424 | |
425 | if (Subtarget.systemSupportsUnalignedAccess()) { |
426 | // MIPS32r6/MIPS64r6 is required to support unaligned access. It's |
427 | // implementation defined whether this is handled by hardware, software, or |
428 | // a hybrid of the two but it's expected that most implementations will |
429 | // handle the majority of cases in hardware. |
430 | if (Fast) |
431 | *Fast = 1; |
432 | return true; |
433 | } else if (Subtarget.hasMips32r6()) { |
434 | return false; |
435 | } |
436 | |
437 | switch (SVT) { |
438 | case MVT::i64: |
439 | case MVT::i32: |
440 | if (Fast) |
441 | *Fast = 1; |
442 | return true; |
443 | default: |
444 | return false; |
445 | } |
446 | } |
447 | |
448 | SDValue MipsSETargetLowering::LowerOperation(SDValue Op, |
449 | SelectionDAG &DAG) const { |
450 | switch(Op.getOpcode()) { |
451 | case ISD::LOAD: return lowerLOAD(Op, DAG); |
452 | case ISD::STORE: return lowerSTORE(Op, DAG); |
453 | case ISD::SMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: true, DAG); |
454 | case ISD::UMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: true, HasHi: true, DAG); |
455 | case ISD::MULHS: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: false, HasHi: true, DAG); |
456 | case ISD::MULHU: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: false, HasHi: true, DAG); |
457 | case ISD::MUL: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: false, DAG); |
458 | case ISD::SDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRem, HasLo: true, HasHi: true, DAG); |
459 | case ISD::UDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRemU, HasLo: true, HasHi: true, |
460 | DAG); |
461 | case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
462 | case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); |
463 | case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); |
464 | case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
465 | case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); |
466 | case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); |
467 | case ISD::SELECT: return lowerSELECT(Op, DAG); |
468 | case ISD::BITCAST: return lowerBITCAST(Op, DAG); |
469 | } |
470 | |
471 | return MipsTargetLowering::LowerOperation(Op, DAG); |
472 | } |
473 | |
474 | // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT |
475 | // |
476 | // Performs the following transformations: |
477 | // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its |
478 | // sign/zero-extension is completely overwritten by the new one performed by |
479 | // the ISD::AND. |
480 | // - Removes redundant zero extensions performed by an ISD::AND. |
481 | static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, |
482 | TargetLowering::DAGCombinerInfo &DCI, |
483 | const MipsSubtarget &Subtarget) { |
484 | if (!Subtarget.hasMSA()) |
485 | return SDValue(); |
486 | |
487 | SDValue Op0 = N->getOperand(Num: 0); |
488 | SDValue Op1 = N->getOperand(Num: 1); |
489 | unsigned Op0Opcode = Op0->getOpcode(); |
490 | |
491 | // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) |
492 | // where $d + 1 == 2^n and n == 32 |
493 | // or $d + 1 == 2^n and n <= 32 and ZExt |
494 | // -> (MipsVExtractZExt $a, $b, $c) |
495 | if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || |
496 | Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { |
497 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Val&: Op1); |
498 | |
499 | if (!Mask) |
500 | return SDValue(); |
501 | |
502 | int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); |
503 | |
504 | if (Log2IfPositive <= 0) |
505 | return SDValue(); // Mask+1 is not a power of 2 |
506 | |
507 | SDValue Op0Op2 = Op0->getOperand(Num: 2); |
508 | EVT ExtendTy = cast<VTSDNode>(Val&: Op0Op2)->getVT(); |
509 | unsigned ExtendTySize = ExtendTy.getSizeInBits(); |
510 | unsigned Log2 = Log2IfPositive; |
511 | |
512 | if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || |
513 | Log2 == ExtendTySize) { |
514 | SDValue Ops[] = { Op0->getOperand(Num: 0), Op0->getOperand(Num: 1), Op0Op2 }; |
515 | return DAG.getNode(Opcode: MipsISD::VEXTRACT_ZEXT_ELT, DL: SDLoc(Op0), |
516 | VTList: Op0->getVTList(), |
517 | Ops: ArrayRef(Ops, Op0->getNumOperands())); |
518 | } |
519 | } |
520 | |
521 | return SDValue(); |
522 | } |
523 | |
524 | // Determine if the specified node is a constant vector splat. |
525 | // |
526 | // Returns true and sets Imm if: |
527 | // * N is a ISD::BUILD_VECTOR representing a constant splat |
528 | // |
529 | // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The |
530 | // differences are that it assumes the MSA has already been checked and the |
531 | // arbitrary requirement for a maximum of 32-bit integers isn't applied (and |
532 | // must not be in order for binsri.d to be selectable). |
533 | static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { |
534 | BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(Val: N.getNode()); |
535 | |
536 | if (!Node) |
537 | return false; |
538 | |
539 | APInt SplatValue, SplatUndef; |
540 | unsigned SplatBitSize; |
541 | bool HasAnyUndefs; |
542 | |
543 | if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
544 | MinSplatBits: 8, isBigEndian: !IsLittleEndian)) |
545 | return false; |
546 | |
547 | Imm = SplatValue; |
548 | |
549 | return true; |
550 | } |
551 | |
552 | // Test whether the given node is an all-ones build_vector. |
553 | static bool isVectorAllOnes(SDValue N) { |
554 | // Look through bitcasts. Endianness doesn't matter because we are looking |
555 | // for an all-ones value. |
556 | if (N->getOpcode() == ISD::BITCAST) |
557 | N = N->getOperand(Num: 0); |
558 | |
559 | BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val&: N); |
560 | |
561 | if (!BVN) |
562 | return false; |
563 | |
564 | APInt SplatValue, SplatUndef; |
565 | unsigned SplatBitSize; |
566 | bool HasAnyUndefs; |
567 | |
568 | // Endianness doesn't matter in this context because we are looking for |
569 | // an all-ones value. |
570 | if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) |
571 | return SplatValue.isAllOnes(); |
572 | |
573 | return false; |
574 | } |
575 | |
576 | // Test whether N is the bitwise inverse of OfNode. |
577 | static bool isBitwiseInverse(SDValue N, SDValue OfNode) { |
578 | if (N->getOpcode() != ISD::XOR) |
579 | return false; |
580 | |
581 | if (isVectorAllOnes(N: N->getOperand(Num: 0))) |
582 | return N->getOperand(Num: 1) == OfNode; |
583 | |
584 | if (isVectorAllOnes(N: N->getOperand(Num: 1))) |
585 | return N->getOperand(Num: 0) == OfNode; |
586 | |
587 | return false; |
588 | } |
589 | |
590 | // Perform combines where ISD::OR is the root node. |
591 | // |
592 | // Performs the following transformations: |
593 | // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) |
594 | // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit |
595 | // vector type. |
596 | static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, |
597 | TargetLowering::DAGCombinerInfo &DCI, |
598 | const MipsSubtarget &Subtarget) { |
599 | if (!Subtarget.hasMSA()) |
600 | return SDValue(); |
601 | |
602 | EVT Ty = N->getValueType(ResNo: 0); |
603 | |
604 | if (!Ty.is128BitVector()) |
605 | return SDValue(); |
606 | |
607 | SDValue Op0 = N->getOperand(Num: 0); |
608 | SDValue Op1 = N->getOperand(Num: 1); |
609 | |
610 | if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { |
611 | SDValue Op0Op0 = Op0->getOperand(Num: 0); |
612 | SDValue Op0Op1 = Op0->getOperand(Num: 1); |
613 | SDValue Op1Op0 = Op1->getOperand(Num: 0); |
614 | SDValue Op1Op1 = Op1->getOperand(Num: 1); |
615 | bool IsLittleEndian = !Subtarget.isLittle(); |
616 | |
617 | SDValue IfSet, IfClr, Cond; |
618 | bool IsConstantMask = false; |
619 | APInt Mask, InvMask; |
620 | |
621 | // If Op0Op0 is an appropriate mask, try to find it's inverse in either |
622 | // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while |
623 | // looking. |
624 | // IfClr will be set if we find a valid match. |
625 | if (isVSplat(N: Op0Op0, Imm&: Mask, IsLittleEndian)) { |
626 | Cond = Op0Op0; |
627 | IfSet = Op0Op1; |
628 | |
629 | if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) && |
630 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
631 | IfClr = Op1Op1; |
632 | else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) && |
633 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
634 | IfClr = Op1Op0; |
635 | |
636 | IsConstantMask = true; |
637 | } |
638 | |
639 | // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same |
640 | // thing again using this mask. |
641 | // IfClr will be set if we find a valid match. |
642 | if (!IfClr.getNode() && isVSplat(N: Op0Op1, Imm&: Mask, IsLittleEndian)) { |
643 | Cond = Op0Op1; |
644 | IfSet = Op0Op0; |
645 | |
646 | if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) && |
647 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
648 | IfClr = Op1Op1; |
649 | else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) && |
650 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
651 | IfClr = Op1Op0; |
652 | |
653 | IsConstantMask = true; |
654 | } |
655 | |
656 | // If IfClr is not yet set, try looking for a non-constant match. |
657 | // IfClr will be set if we find a valid match amongst the eight |
658 | // possibilities. |
659 | if (!IfClr.getNode()) { |
660 | if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op0)) { |
661 | Cond = Op1Op0; |
662 | IfSet = Op1Op1; |
663 | IfClr = Op0Op1; |
664 | } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op0)) { |
665 | Cond = Op1Op0; |
666 | IfSet = Op1Op1; |
667 | IfClr = Op0Op0; |
668 | } else if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op1)) { |
669 | Cond = Op1Op1; |
670 | IfSet = Op1Op0; |
671 | IfClr = Op0Op1; |
672 | } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op1)) { |
673 | Cond = Op1Op1; |
674 | IfSet = Op1Op0; |
675 | IfClr = Op0Op0; |
676 | } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op0)) { |
677 | Cond = Op0Op0; |
678 | IfSet = Op0Op1; |
679 | IfClr = Op1Op1; |
680 | } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op0)) { |
681 | Cond = Op0Op0; |
682 | IfSet = Op0Op1; |
683 | IfClr = Op1Op0; |
684 | } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op1)) { |
685 | Cond = Op0Op1; |
686 | IfSet = Op0Op0; |
687 | IfClr = Op1Op1; |
688 | } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op1)) { |
689 | Cond = Op0Op1; |
690 | IfSet = Op0Op0; |
691 | IfClr = Op1Op0; |
692 | } |
693 | } |
694 | |
695 | // At this point, IfClr will be set if we have a valid match. |
696 | if (!IfClr.getNode()) |
697 | return SDValue(); |
698 | |
699 | assert(Cond.getNode() && IfSet.getNode()); |
700 | |
701 | // Fold degenerate cases. |
702 | if (IsConstantMask) { |
703 | if (Mask.isAllOnes()) |
704 | return IfSet; |
705 | else if (Mask == 0) |
706 | return IfClr; |
707 | } |
708 | |
709 | // Transform the DAG into an equivalent VSELECT. |
710 | return DAG.getNode(Opcode: ISD::VSELECT, DL: SDLoc(N), VT: Ty, N1: Cond, N2: IfSet, N3: IfClr); |
711 | } |
712 | |
713 | return SDValue(); |
714 | } |
715 | |
716 | static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, |
717 | SelectionDAG &DAG, |
718 | const MipsSubtarget &Subtarget) { |
719 | // Estimate the number of operations the below transform will turn a |
720 | // constant multiply into. The number is approximately equal to the minimal |
721 | // number of powers of two that constant can be broken down to by adding |
722 | // or subtracting them. |
723 | // |
724 | // If we have taken more than 12[1] / 8[2] steps to attempt the |
725 | // optimization for a native sized value, it is more than likely that this |
726 | // optimization will make things worse. |
727 | // |
728 | // [1] MIPS64 requires 6 instructions at most to materialize any constant, |
729 | // multiplication requires at least 4 cycles, but another cycle (or two) |
730 | // to retrieve the result from the HI/LO registers. |
731 | // |
732 | // [2] For MIPS32, more than 8 steps is expensive as the constant could be |
733 | // materialized in 2 instructions, multiplication requires at least 4 |
734 | // cycles, but another cycle (or two) to retrieve the result from the |
735 | // HI/LO registers. |
736 | // |
737 | // TODO: |
738 | // - MaxSteps needs to consider the `VT` of the constant for the current |
739 | // target. |
740 | // - Consider to perform this optimization after type legalization. |
741 | // That allows to remove a workaround for types not supported natively. |
742 | // - Take in account `-Os, -Oz` flags because this optimization |
743 | // increases code size. |
744 | unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; |
745 | |
746 | SmallVector<APInt, 16> WorkStack(1, C); |
747 | unsigned Steps = 0; |
748 | unsigned BitWidth = C.getBitWidth(); |
749 | |
750 | while (!WorkStack.empty()) { |
751 | APInt Val = WorkStack.pop_back_val(); |
752 | |
753 | if (Val == 0 || Val == 1) |
754 | continue; |
755 | |
756 | if (Steps >= MaxSteps) |
757 | return false; |
758 | |
759 | if (Val.isPowerOf2()) { |
760 | ++Steps; |
761 | continue; |
762 | } |
763 | |
764 | APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); |
765 | APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) |
766 | : APInt(BitWidth, 1) << C.ceilLogBase2(); |
767 | if ((Val - Floor).ule(RHS: Ceil - Val)) { |
768 | WorkStack.push_back(Elt: Floor); |
769 | WorkStack.push_back(Elt: Val - Floor); |
770 | } else { |
771 | WorkStack.push_back(Elt: Ceil); |
772 | WorkStack.push_back(Elt: Ceil - Val); |
773 | } |
774 | |
775 | ++Steps; |
776 | } |
777 | |
778 | // If the value being multiplied is not supported natively, we have to pay |
779 | // an additional legalization cost, conservatively assume an increase in the |
780 | // cost of 3 instructions per step. This values for this heuristic were |
781 | // determined experimentally. |
782 | unsigned RegisterSize = DAG.getTargetLoweringInfo() |
783 | .getRegisterType(Context&: *DAG.getContext(), VT) |
784 | .getSizeInBits(); |
785 | Steps *= (VT.getSizeInBits() != RegisterSize) * 3; |
786 | if (Steps > 27) |
787 | return false; |
788 | |
789 | return true; |
790 | } |
791 | |
792 | static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, |
793 | EVT ShiftTy, SelectionDAG &DAG) { |
794 | // Return 0. |
795 | if (C == 0) |
796 | return DAG.getConstant(Val: 0, DL, VT); |
797 | |
798 | // Return x. |
799 | if (C == 1) |
800 | return X; |
801 | |
802 | // If c is power of 2, return (shl x, log2(c)). |
803 | if (C.isPowerOf2()) |
804 | return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, |
805 | N2: DAG.getConstant(Val: C.logBase2(), DL, VT: ShiftTy)); |
806 | |
807 | unsigned BitWidth = C.getBitWidth(); |
808 | APInt Floor = APInt(BitWidth, 1) << C.logBase2(); |
809 | APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : |
810 | APInt(BitWidth, 1) << C.ceilLogBase2(); |
811 | |
812 | // If |c - floor_c| <= |c - ceil_c|, |
813 | // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), |
814 | // return (add constMult(x, floor_c), constMult(x, c - floor_c)). |
815 | if ((C - Floor).ule(RHS: Ceil - C)) { |
816 | SDValue Op0 = genConstMult(X, C: Floor, DL, VT, ShiftTy, DAG); |
817 | SDValue Op1 = genConstMult(X, C: C - Floor, DL, VT, ShiftTy, DAG); |
818 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1); |
819 | } |
820 | |
821 | // If |c - floor_c| > |c - ceil_c|, |
822 | // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). |
823 | SDValue Op0 = genConstMult(X, C: Ceil, DL, VT, ShiftTy, DAG); |
824 | SDValue Op1 = genConstMult(X, C: Ceil - C, DL, VT, ShiftTy, DAG); |
825 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1); |
826 | } |
827 | |
828 | static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, |
829 | const TargetLowering::DAGCombinerInfo &DCI, |
830 | const MipsSETargetLowering *TL, |
831 | const MipsSubtarget &Subtarget) { |
832 | EVT VT = N->getValueType(ResNo: 0); |
833 | |
834 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) |
835 | if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( |
836 | C: C->getAPIntValue(), VT, DAG, Subtarget)) |
837 | return genConstMult(X: N->getOperand(Num: 0), C: C->getAPIntValue(), DL: SDLoc(N), VT, |
838 | ShiftTy: TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), |
839 | DAG); |
840 | |
841 | return SDValue(N, 0); |
842 | } |
843 | |
844 | static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, |
845 | SelectionDAG &DAG, |
846 | const MipsSubtarget &Subtarget) { |
847 | // See if this is a vector splat immediate node. |
848 | APInt SplatValue, SplatUndef; |
849 | unsigned SplatBitSize; |
850 | bool HasAnyUndefs; |
851 | unsigned EltSize = Ty.getScalarSizeInBits(); |
852 | BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 1)); |
853 | |
854 | if (!Subtarget.hasDSP()) |
855 | return SDValue(); |
856 | |
857 | if (!BV || |
858 | !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
859 | MinSplatBits: EltSize, isBigEndian: !Subtarget.isLittle()) || |
860 | (SplatBitSize != EltSize) || |
861 | (SplatValue.getZExtValue() >= EltSize)) |
862 | return SDValue(); |
863 | |
864 | SDLoc DL(N); |
865 | return DAG.getNode(Opcode: Opc, DL, VT: Ty, N1: N->getOperand(Num: 0), |
866 | N2: DAG.getConstant(Val: SplatValue.getZExtValue(), DL, VT: MVT::i32)); |
867 | } |
868 | |
869 | static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, |
870 | TargetLowering::DAGCombinerInfo &DCI, |
871 | const MipsSubtarget &Subtarget) { |
872 | EVT Ty = N->getValueType(ResNo: 0); |
873 | |
874 | if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) |
875 | return SDValue(); |
876 | |
877 | return performDSPShiftCombine(Opc: MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); |
878 | } |
879 | |
880 | // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold |
881 | // constant splats into MipsISD::SHRA_DSP for DSPr2. |
882 | // |
883 | // Performs the following transformations: |
884 | // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its |
885 | // sign/zero-extension is completely overwritten by the new one performed by |
886 | // the ISD::SRA and ISD::SHL nodes. |
887 | // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL |
888 | // sequence. |
889 | // |
890 | // See performDSPShiftCombine for more information about the transformation |
891 | // used for DSPr2. |
892 | static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, |
893 | TargetLowering::DAGCombinerInfo &DCI, |
894 | const MipsSubtarget &Subtarget) { |
895 | EVT Ty = N->getValueType(ResNo: 0); |
896 | |
897 | if (Subtarget.hasMSA()) { |
898 | SDValue Op0 = N->getOperand(Num: 0); |
899 | SDValue Op1 = N->getOperand(Num: 1); |
900 | |
901 | // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) |
902 | // where $d + sizeof($c) == 32 |
903 | // or $d + sizeof($c) <= 32 and SExt |
904 | // -> (MipsVExtractSExt $a, $b, $c) |
905 | if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(Num: 1)) { |
906 | SDValue Op0Op0 = Op0->getOperand(Num: 0); |
907 | ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Val&: Op1); |
908 | |
909 | if (!ShAmount) |
910 | return SDValue(); |
911 | |
912 | if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && |
913 | Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) |
914 | return SDValue(); |
915 | |
916 | EVT ExtendTy = cast<VTSDNode>(Val: Op0Op0->getOperand(Num: 2))->getVT(); |
917 | unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); |
918 | |
919 | if (TotalBits == 32 || |
920 | (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && |
921 | TotalBits <= 32)) { |
922 | SDValue Ops[] = { Op0Op0->getOperand(Num: 0), Op0Op0->getOperand(Num: 1), |
923 | Op0Op0->getOperand(Num: 2) }; |
924 | return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL: SDLoc(Op0Op0), |
925 | VTList: Op0Op0->getVTList(), |
926 | Ops: ArrayRef(Ops, Op0Op0->getNumOperands())); |
927 | } |
928 | } |
929 | } |
930 | |
931 | if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) |
932 | return SDValue(); |
933 | |
934 | return performDSPShiftCombine(Opc: MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); |
935 | } |
936 | |
937 | |
938 | static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, |
939 | TargetLowering::DAGCombinerInfo &DCI, |
940 | const MipsSubtarget &Subtarget) { |
941 | EVT Ty = N->getValueType(ResNo: 0); |
942 | |
943 | if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) |
944 | return SDValue(); |
945 | |
946 | return performDSPShiftCombine(Opc: MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); |
947 | } |
948 | |
949 | static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { |
950 | bool IsV216 = (Ty == MVT::v2i16); |
951 | |
952 | switch (CC) { |
953 | case ISD::SETEQ: |
954 | case ISD::SETNE: return true; |
955 | case ISD::SETLT: |
956 | case ISD::SETLE: |
957 | case ISD::SETGT: |
958 | case ISD::SETGE: return IsV216; |
959 | case ISD::SETULT: |
960 | case ISD::SETULE: |
961 | case ISD::SETUGT: |
962 | case ISD::SETUGE: return !IsV216; |
963 | default: return false; |
964 | } |
965 | } |
966 | |
967 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { |
968 | EVT Ty = N->getValueType(ResNo: 0); |
969 | |
970 | if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) |
971 | return SDValue(); |
972 | |
973 | if (!isLegalDSPCondCode(Ty, CC: cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get())) |
974 | return SDValue(); |
975 | |
976 | return DAG.getNode(Opcode: MipsISD::SETCC_DSP, DL: SDLoc(N), VT: Ty, N1: N->getOperand(Num: 0), |
977 | N2: N->getOperand(Num: 1), N3: N->getOperand(Num: 2)); |
978 | } |
979 | |
980 | static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { |
981 | EVT Ty = N->getValueType(ResNo: 0); |
982 | |
983 | if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { |
984 | SDValue SetCC = N->getOperand(Num: 0); |
985 | |
986 | if (SetCC.getOpcode() != MipsISD::SETCC_DSP) |
987 | return SDValue(); |
988 | |
989 | return DAG.getNode(Opcode: MipsISD::SELECT_CC_DSP, DL: SDLoc(N), VT: Ty, |
990 | N1: SetCC.getOperand(i: 0), N2: SetCC.getOperand(i: 1), |
991 | N3: N->getOperand(Num: 1), N4: N->getOperand(Num: 2), N5: SetCC.getOperand(i: 2)); |
992 | } |
993 | |
994 | return SDValue(); |
995 | } |
996 | |
997 | static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, |
998 | const MipsSubtarget &Subtarget) { |
999 | EVT Ty = N->getValueType(ResNo: 0); |
1000 | |
1001 | if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { |
1002 | // Try the following combines: |
1003 | // (xor (or $a, $b), (build_vector allones)) |
1004 | // (xor (or $a, $b), (bitcast (build_vector allones))) |
1005 | SDValue Op0 = N->getOperand(Num: 0); |
1006 | SDValue Op1 = N->getOperand(Num: 1); |
1007 | SDValue NotOp; |
1008 | |
1009 | if (ISD::isBuildVectorAllOnes(N: Op0.getNode())) |
1010 | NotOp = Op1; |
1011 | else if (ISD::isBuildVectorAllOnes(N: Op1.getNode())) |
1012 | NotOp = Op0; |
1013 | else |
1014 | return SDValue(); |
1015 | |
1016 | if (NotOp->getOpcode() == ISD::OR) |
1017 | return DAG.getNode(Opcode: MipsISD::VNOR, DL: SDLoc(N), VT: Ty, N1: NotOp->getOperand(Num: 0), |
1018 | N2: NotOp->getOperand(Num: 1)); |
1019 | } |
1020 | |
1021 | return SDValue(); |
1022 | } |
1023 | |
1024 | SDValue |
1025 | MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { |
1026 | SelectionDAG &DAG = DCI.DAG; |
1027 | SDValue Val; |
1028 | |
1029 | switch (N->getOpcode()) { |
1030 | case ISD::AND: |
1031 | Val = performANDCombine(N, DAG, DCI, Subtarget); |
1032 | break; |
1033 | case ISD::OR: |
1034 | Val = performORCombine(N, DAG, DCI, Subtarget); |
1035 | break; |
1036 | case ISD::MUL: |
1037 | return performMULCombine(N, DAG, DCI, TL: this, Subtarget); |
1038 | case ISD::SHL: |
1039 | Val = performSHLCombine(N, DAG, DCI, Subtarget); |
1040 | break; |
1041 | case ISD::SRA: |
1042 | return performSRACombine(N, DAG, DCI, Subtarget); |
1043 | case ISD::SRL: |
1044 | return performSRLCombine(N, DAG, DCI, Subtarget); |
1045 | case ISD::VSELECT: |
1046 | return performVSELECTCombine(N, DAG); |
1047 | case ISD::XOR: |
1048 | Val = performXORCombine(N, DAG, Subtarget); |
1049 | break; |
1050 | case ISD::SETCC: |
1051 | Val = performSETCCCombine(N, DAG); |
1052 | break; |
1053 | } |
1054 | |
1055 | if (Val.getNode()) { |
1056 | LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n" ; |
1057 | N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n" ; |
1058 | Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n" ); |
1059 | return Val; |
1060 | } |
1061 | |
1062 | return MipsTargetLowering::PerformDAGCombine(N, DCI); |
1063 | } |
1064 | |
1065 | MachineBasicBlock * |
1066 | MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
1067 | MachineBasicBlock *BB) const { |
1068 | switch (MI.getOpcode()) { |
1069 | default: |
1070 | return MipsTargetLowering::EmitInstrWithCustomInserter(MI, MBB: BB); |
1071 | case Mips::BPOSGE32_PSEUDO: |
1072 | return emitBPOSGE32(MI, BB); |
1073 | case Mips::SNZ_B_PSEUDO: |
1074 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_B); |
1075 | case Mips::SNZ_H_PSEUDO: |
1076 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_H); |
1077 | case Mips::SNZ_W_PSEUDO: |
1078 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_W); |
1079 | case Mips::SNZ_D_PSEUDO: |
1080 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_D); |
1081 | case Mips::SNZ_V_PSEUDO: |
1082 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_V); |
1083 | case Mips::SZ_B_PSEUDO: |
1084 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_B); |
1085 | case Mips::SZ_H_PSEUDO: |
1086 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_H); |
1087 | case Mips::SZ_W_PSEUDO: |
1088 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_W); |
1089 | case Mips::SZ_D_PSEUDO: |
1090 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_D); |
1091 | case Mips::SZ_V_PSEUDO: |
1092 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_V); |
1093 | case Mips::COPY_FW_PSEUDO: |
1094 | return emitCOPY_FW(MI, BB); |
1095 | case Mips::COPY_FD_PSEUDO: |
1096 | return emitCOPY_FD(MI, BB); |
1097 | case Mips::INSERT_FW_PSEUDO: |
1098 | return emitINSERT_FW(MI, BB); |
1099 | case Mips::INSERT_FD_PSEUDO: |
1100 | return emitINSERT_FD(MI, BB); |
1101 | case Mips::INSERT_B_VIDX_PSEUDO: |
1102 | case Mips::INSERT_B_VIDX64_PSEUDO: |
1103 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 1, IsFP: false); |
1104 | case Mips::INSERT_H_VIDX_PSEUDO: |
1105 | case Mips::INSERT_H_VIDX64_PSEUDO: |
1106 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 2, IsFP: false); |
1107 | case Mips::INSERT_W_VIDX_PSEUDO: |
1108 | case Mips::INSERT_W_VIDX64_PSEUDO: |
1109 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: false); |
1110 | case Mips::INSERT_D_VIDX_PSEUDO: |
1111 | case Mips::INSERT_D_VIDX64_PSEUDO: |
1112 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: false); |
1113 | case Mips::INSERT_FW_VIDX_PSEUDO: |
1114 | case Mips::INSERT_FW_VIDX64_PSEUDO: |
1115 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: true); |
1116 | case Mips::INSERT_FD_VIDX_PSEUDO: |
1117 | case Mips::INSERT_FD_VIDX64_PSEUDO: |
1118 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: true); |
1119 | case Mips::FILL_FW_PSEUDO: |
1120 | return emitFILL_FW(MI, BB); |
1121 | case Mips::FILL_FD_PSEUDO: |
1122 | return emitFILL_FD(MI, BB); |
1123 | case Mips::FEXP2_W_1_PSEUDO: |
1124 | return emitFEXP2_W_1(MI, BB); |
1125 | case Mips::FEXP2_D_1_PSEUDO: |
1126 | return emitFEXP2_D_1(MI, BB); |
1127 | case Mips::ST_F16: |
1128 | return emitST_F16_PSEUDO(MI, BB); |
1129 | case Mips::LD_F16: |
1130 | return emitLD_F16_PSEUDO(MI, BB); |
1131 | case Mips::MSA_FP_EXTEND_W_PSEUDO: |
1132 | return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: false); |
1133 | case Mips::MSA_FP_ROUND_W_PSEUDO: |
1134 | return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: false); |
1135 | case Mips::MSA_FP_EXTEND_D_PSEUDO: |
1136 | return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: true); |
1137 | case Mips::MSA_FP_ROUND_D_PSEUDO: |
1138 | return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: true); |
1139 | } |
1140 | } |
1141 | |
1142 | bool MipsSETargetLowering::isEligibleForTailCallOptimization( |
1143 | const CCState &CCInfo, unsigned NextStackOffset, |
1144 | const MipsFunctionInfo &FI) const { |
1145 | if (!UseMipsTailCalls) |
1146 | return false; |
1147 | |
1148 | // Exception has to be cleared with eret. |
1149 | if (FI.isISR()) |
1150 | return false; |
1151 | |
1152 | // Return false if either the callee or caller has a byval argument. |
1153 | if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) |
1154 | return false; |
1155 | |
1156 | // Return true if the callee's argument area is no larger than the |
1157 | // caller's. |
1158 | return NextStackOffset <= FI.getIncomingArgSize(); |
1159 | } |
1160 | |
1161 | void MipsSETargetLowering:: |
1162 | getOpndList(SmallVectorImpl<SDValue> &Ops, |
1163 | std::deque<std::pair<unsigned, SDValue>> &RegsToPass, |
1164 | bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, |
1165 | bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, |
1166 | SDValue Chain) const { |
1167 | Ops.push_back(Elt: Callee); |
1168 | MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, |
1169 | InternalLinkage, IsCallReloc, CLI, Callee, |
1170 | Chain); |
1171 | } |
1172 | |
1173 | SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { |
1174 | LoadSDNode &Nd = *cast<LoadSDNode>(Val&: Op); |
1175 | |
1176 | if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) |
1177 | return MipsTargetLowering::lowerLOAD(Op, DAG); |
1178 | |
1179 | // Replace a double precision load with two i32 loads and a buildpair64. |
1180 | SDLoc DL(Op); |
1181 | SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); |
1182 | EVT PtrVT = Ptr.getValueType(); |
1183 | |
1184 | // i32 load from lower address. |
1185 | SDValue Lo = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo: MachinePointerInfo(), |
1186 | Alignment: Nd.getAlign(), MMOFlags: Nd.getMemOperand()->getFlags()); |
1187 | |
1188 | // i32 load from higher address. |
1189 | Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT)); |
1190 | SDValue Hi = DAG.getLoad( |
1191 | VT: MVT::i32, dl: DL, Chain: Lo.getValue(R: 1), Ptr, PtrInfo: MachinePointerInfo(), |
1192 | Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), MMOFlags: Nd.getMemOperand()->getFlags()); |
1193 | |
1194 | if (!Subtarget.isLittle()) |
1195 | std::swap(a&: Lo, b&: Hi); |
1196 | |
1197 | SDValue BP = DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
1198 | SDValue Ops[2] = {BP, Hi.getValue(R: 1)}; |
1199 | return DAG.getMergeValues(Ops, dl: DL); |
1200 | } |
1201 | |
1202 | SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { |
1203 | StoreSDNode &Nd = *cast<StoreSDNode>(Val&: Op); |
1204 | |
1205 | if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) |
1206 | return MipsTargetLowering::lowerSTORE(Op, DAG); |
1207 | |
1208 | // Replace a double precision store with two extractelement64s and i32 stores. |
1209 | SDLoc DL(Op); |
1210 | SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); |
1211 | EVT PtrVT = Ptr.getValueType(); |
1212 | SDValue Lo = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, |
1213 | N1: Val, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
1214 | SDValue Hi = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, |
1215 | N1: Val, N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32)); |
1216 | |
1217 | if (!Subtarget.isLittle()) |
1218 | std::swap(a&: Lo, b&: Hi); |
1219 | |
1220 | // i32 store to lower address. |
1221 | Chain = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: MachinePointerInfo(), Alignment: Nd.getAlign(), |
1222 | MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo()); |
1223 | |
1224 | // i32 store to higher address. |
1225 | Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT)); |
1226 | return DAG.getStore(Chain, dl: DL, Val: Hi, Ptr, PtrInfo: MachinePointerInfo(), |
1227 | Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), |
1228 | MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo()); |
1229 | } |
1230 | |
1231 | SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, |
1232 | SelectionDAG &DAG) const { |
1233 | SDLoc DL(Op); |
1234 | MVT Src = Op.getOperand(i: 0).getValueType().getSimpleVT(); |
1235 | MVT Dest = Op.getValueType().getSimpleVT(); |
1236 | |
1237 | // Bitcast i64 to double. |
1238 | if (Src == MVT::i64 && Dest == MVT::f64) { |
1239 | SDValue Lo, Hi; |
1240 | std::tie(args&: Lo, args&: Hi) = |
1241 | DAG.SplitScalar(N: Op.getOperand(i: 0), DL, LoVT: MVT::i32, HiVT: MVT::i32); |
1242 | return DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
1243 | } |
1244 | |
1245 | // Bitcast double to i64. |
1246 | if (Src == MVT::f64 && Dest == MVT::i64) { |
1247 | SDValue Lo = |
1248 | DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0), |
1249 | N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
1250 | SDValue Hi = |
1251 | DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0), |
1252 | N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32)); |
1253 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi); |
1254 | } |
1255 | |
1256 | // Skip other cases of bitcast and use default lowering. |
1257 | return SDValue(); |
1258 | } |
1259 | |
1260 | SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, |
1261 | bool HasLo, bool HasHi, |
1262 | SelectionDAG &DAG) const { |
1263 | // MIPS32r6/MIPS64r6 removed accumulator based multiplies. |
1264 | assert(!Subtarget.hasMips32r6()); |
1265 | |
1266 | EVT Ty = Op.getOperand(i: 0).getValueType(); |
1267 | SDLoc DL(Op); |
1268 | SDValue Mult = DAG.getNode(Opcode: NewOpc, DL, VT: MVT::Untyped, |
1269 | N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1)); |
1270 | SDValue Lo, Hi; |
1271 | |
1272 | if (HasLo) |
1273 | Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: Ty, Operand: Mult); |
1274 | if (HasHi) |
1275 | Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: Ty, Operand: Mult); |
1276 | |
1277 | if (!HasLo || !HasHi) |
1278 | return HasLo ? Lo : Hi; |
1279 | |
1280 | SDValue Vals[] = { Lo, Hi }; |
1281 | return DAG.getMergeValues(Ops: Vals, dl: DL); |
1282 | } |
1283 | |
1284 | static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { |
1285 | SDValue InLo, InHi; |
1286 | std::tie(args&: InLo, args&: InHi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
1287 | return DAG.getNode(Opcode: MipsISD::MTLOHI, DL, VT: MVT::Untyped, N1: InLo, N2: InHi); |
1288 | } |
1289 | |
1290 | static SDValue (SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { |
1291 | SDValue Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: MVT::i32, Operand: Op); |
1292 | SDValue Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: MVT::i32, Operand: Op); |
1293 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi); |
1294 | } |
1295 | |
1296 | // This function expands mips intrinsic nodes which have 64-bit input operands |
1297 | // or output values. |
1298 | // |
1299 | // out64 = intrinsic-node in64 |
1300 | // => |
1301 | // lo = copy (extract-element (in64, 0)) |
1302 | // hi = copy (extract-element (in64, 1)) |
1303 | // mips-specific-node |
1304 | // v0 = copy lo |
1305 | // v1 = copy hi |
1306 | // out64 = merge-values (v0, v1) |
1307 | // |
1308 | static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { |
1309 | SDLoc DL(Op); |
1310 | bool HasChainIn = Op->getOperand(Num: 0).getValueType() == MVT::Other; |
1311 | SmallVector<SDValue, 3> Ops; |
1312 | unsigned OpNo = 0; |
1313 | |
1314 | // See if Op has a chain input. |
1315 | if (HasChainIn) |
1316 | Ops.push_back(Elt: Op->getOperand(Num: OpNo++)); |
1317 | |
1318 | // The next operand is the intrinsic opcode. |
1319 | assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); |
1320 | |
1321 | // See if the next operand has type i64. |
1322 | SDValue Opnd = Op->getOperand(Num: ++OpNo), In64; |
1323 | |
1324 | if (Opnd.getValueType() == MVT::i64) |
1325 | In64 = initAccumulator(In: Opnd, DL, DAG); |
1326 | else |
1327 | Ops.push_back(Elt: Opnd); |
1328 | |
1329 | // Push the remaining operands. |
1330 | for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) |
1331 | Ops.push_back(Elt: Op->getOperand(Num: OpNo)); |
1332 | |
1333 | // Add In64 to the end of the list. |
1334 | if (In64.getNode()) |
1335 | Ops.push_back(Elt: In64); |
1336 | |
1337 | // Scan output. |
1338 | SmallVector<EVT, 2> ResTys; |
1339 | |
1340 | for (EVT Ty : Op->values()) |
1341 | ResTys.push_back(Elt: (Ty == MVT::i64) ? MVT::Untyped : Ty); |
1342 | |
1343 | // Create node. |
1344 | SDValue Val = DAG.getNode(Opcode: Opc, DL, ResultTys: ResTys, Ops); |
1345 | SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Op: Val, DL, DAG) : Val; |
1346 | |
1347 | if (!HasChainIn) |
1348 | return Out; |
1349 | |
1350 | assert(Val->getValueType(1) == MVT::Other); |
1351 | SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; |
1352 | return DAG.getMergeValues(Ops: Vals, dl: DL); |
1353 | } |
1354 | |
1355 | // Lower an MSA copy intrinsic into the specified SelectionDAG node |
1356 | static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { |
1357 | SDLoc DL(Op); |
1358 | SDValue Vec = Op->getOperand(Num: 1); |
1359 | SDValue Idx = Op->getOperand(Num: 2); |
1360 | EVT ResTy = Op->getValueType(ResNo: 0); |
1361 | EVT EltTy = Vec->getValueType(ResNo: 0).getVectorElementType(); |
1362 | |
1363 | SDValue Result = DAG.getNode(Opcode: Opc, DL, VT: ResTy, N1: Vec, N2: Idx, |
1364 | N3: DAG.getValueType(EltTy)); |
1365 | |
1366 | return Result; |
1367 | } |
1368 | |
1369 | static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { |
1370 | EVT ResVecTy = Op->getValueType(ResNo: 0); |
1371 | EVT ViaVecTy = ResVecTy; |
1372 | bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); |
1373 | SDLoc DL(Op); |
1374 | |
1375 | // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and |
1376 | // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating |
1377 | // lanes. |
1378 | SDValue LaneA = Op->getOperand(Num: OpNr); |
1379 | SDValue LaneB; |
1380 | |
1381 | if (ResVecTy == MVT::v2i64) { |
1382 | // In case of the index being passed as an immediate value, set the upper |
1383 | // lane to 0 so that the splati.d instruction can be matched. |
1384 | if (isa<ConstantSDNode>(Val: LaneA)) |
1385 | LaneB = DAG.getConstant(Val: 0, DL, VT: MVT::i32); |
1386 | // Having the index passed in a register, set the upper lane to the same |
1387 | // value as the lower - this results in the BUILD_VECTOR node not being |
1388 | // expanded through stack. This way we are able to pattern match the set of |
1389 | // nodes created here to splat.d. |
1390 | else |
1391 | LaneB = LaneA; |
1392 | ViaVecTy = MVT::v4i32; |
1393 | if(BigEndian) |
1394 | std::swap(a&: LaneA, b&: LaneB); |
1395 | } else |
1396 | LaneB = LaneA; |
1397 | |
1398 | SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, |
1399 | LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; |
1400 | |
1401 | SDValue Result = DAG.getBuildVector( |
1402 | VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements())); |
1403 | |
1404 | if (ViaVecTy != ResVecTy) { |
1405 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ViaVecTy); |
1406 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVecTy, |
1407 | Operand: DAG.getNode(Opcode: ISD::AND, DL, VT: ViaVecTy, N1: Result, N2: One)); |
1408 | } |
1409 | |
1410 | return Result; |
1411 | } |
1412 | |
1413 | static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, |
1414 | bool IsSigned = false) { |
1415 | auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp)); |
1416 | return DAG.getConstant( |
1417 | Val: APInt(Op->getValueType(ResNo: 0).getScalarType().getSizeInBits(), |
1418 | IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), |
1419 | DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0)); |
1420 | } |
1421 | |
1422 | static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, |
1423 | bool BigEndian, SelectionDAG &DAG) { |
1424 | EVT ViaVecTy = VecTy; |
1425 | SDValue SplatValueA = SplatValue; |
1426 | SDValue SplatValueB = SplatValue; |
1427 | SDLoc DL(SplatValue); |
1428 | |
1429 | if (VecTy == MVT::v2i64) { |
1430 | // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. |
1431 | ViaVecTy = MVT::v4i32; |
1432 | |
1433 | SplatValueA = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValue); |
1434 | SplatValueB = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SplatValue, |
1435 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i32)); |
1436 | SplatValueB = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValueB); |
1437 | } |
1438 | |
1439 | // We currently hold the parts in little endian order. Swap them if |
1440 | // necessary. |
1441 | if (BigEndian) |
1442 | std::swap(a&: SplatValueA, b&: SplatValueB); |
1443 | |
1444 | SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, |
1445 | SplatValueA, SplatValueB, SplatValueA, SplatValueB, |
1446 | SplatValueA, SplatValueB, SplatValueA, SplatValueB, |
1447 | SplatValueA, SplatValueB, SplatValueA, SplatValueB }; |
1448 | |
1449 | SDValue Result = DAG.getBuildVector( |
1450 | VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements())); |
1451 | |
1452 | if (VecTy != ViaVecTy) |
1453 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecTy, Operand: Result); |
1454 | |
1455 | return Result; |
1456 | } |
1457 | |
1458 | static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, |
1459 | unsigned Opc, SDValue Imm, |
1460 | bool BigEndian) { |
1461 | EVT VecTy = Op->getValueType(ResNo: 0); |
1462 | SDValue Exp2Imm; |
1463 | SDLoc DL(Op); |
1464 | |
1465 | // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it |
1466 | // here for now. |
1467 | if (VecTy == MVT::v2i64) { |
1468 | if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Val&: Imm)) { |
1469 | APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); |
1470 | |
1471 | SDValue BitImmHiOp = DAG.getConstant(Val: BitImm.lshr(shiftAmt: 32).trunc(width: 32), DL, |
1472 | VT: MVT::i32); |
1473 | SDValue BitImmLoOp = DAG.getConstant(Val: BitImm.trunc(width: 32), DL, VT: MVT::i32); |
1474 | |
1475 | if (BigEndian) |
1476 | std::swap(a&: BitImmLoOp, b&: BitImmHiOp); |
1477 | |
1478 | Exp2Imm = DAG.getNode( |
1479 | Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, |
1480 | Operand: DAG.getBuildVector(VT: MVT::v4i32, DL, |
1481 | Ops: {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); |
1482 | } |
1483 | } |
1484 | |
1485 | if (!Exp2Imm.getNode()) { |
1486 | // We couldnt constant fold, do a vector shift instead |
1487 | |
1488 | // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since |
1489 | // only values 0-63 are valid. |
1490 | if (VecTy == MVT::v2i64) |
1491 | Imm = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Imm); |
1492 | |
1493 | Exp2Imm = getBuildVectorSplat(VecTy, SplatValue: Imm, BigEndian, DAG); |
1494 | |
1495 | Exp2Imm = DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: DAG.getConstant(Val: 1, DL, VT: VecTy), |
1496 | N2: Exp2Imm); |
1497 | } |
1498 | |
1499 | return DAG.getNode(Opcode: Opc, DL, VT: VecTy, N1: Op->getOperand(Num: 1), N2: Exp2Imm); |
1500 | } |
1501 | |
1502 | static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { |
1503 | SDLoc DL(Op); |
1504 | EVT ResTy = Op->getValueType(ResNo: 0); |
1505 | SDValue Vec = Op->getOperand(Num: 2); |
1506 | bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); |
1507 | MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; |
1508 | SDValue ConstValue = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, |
1509 | DL, VT: ResEltTy); |
1510 | SDValue SplatVec = getBuildVectorSplat(VecTy: ResTy, SplatValue: ConstValue, BigEndian, DAG); |
1511 | |
1512 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: SplatVec); |
1513 | } |
1514 | |
1515 | static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { |
1516 | EVT ResTy = Op->getValueType(ResNo: 0); |
1517 | SDLoc DL(Op); |
1518 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy); |
1519 | SDValue Bit = DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Op, DAG)); |
1520 | |
1521 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), |
1522 | N2: DAG.getNOT(DL, Val: Bit, VT: ResTy)); |
1523 | } |
1524 | |
1525 | static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { |
1526 | SDLoc DL(Op); |
1527 | EVT ResTy = Op->getValueType(ResNo: 0); |
1528 | APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) |
1529 | << Op->getConstantOperandAPInt(Num: 2); |
1530 | SDValue BitMask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy); |
1531 | |
1532 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), N2: BitMask); |
1533 | } |
1534 | |
1535 | SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
1536 | SelectionDAG &DAG) const { |
1537 | SDLoc DL(Op); |
1538 | unsigned Intrinsic = Op->getConstantOperandVal(Num: 0); |
1539 | switch (Intrinsic) { |
1540 | default: |
1541 | return SDValue(); |
1542 | case Intrinsic::mips_shilo: |
1543 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::SHILO); |
1544 | case Intrinsic::mips_dpau_h_qbl: |
1545 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBL); |
1546 | case Intrinsic::mips_dpau_h_qbr: |
1547 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBR); |
1548 | case Intrinsic::mips_dpsu_h_qbl: |
1549 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBL); |
1550 | case Intrinsic::mips_dpsu_h_qbr: |
1551 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBR); |
1552 | case Intrinsic::mips_dpa_w_ph: |
1553 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPA_W_PH); |
1554 | case Intrinsic::mips_dps_w_ph: |
1555 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPS_W_PH); |
1556 | case Intrinsic::mips_dpax_w_ph: |
1557 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAX_W_PH); |
1558 | case Intrinsic::mips_dpsx_w_ph: |
1559 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSX_W_PH); |
1560 | case Intrinsic::mips_mulsa_w_ph: |
1561 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSA_W_PH); |
1562 | case Intrinsic::mips_mult: |
1563 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::Mult); |
1564 | case Intrinsic::mips_multu: |
1565 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::Multu); |
1566 | case Intrinsic::mips_madd: |
1567 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAdd); |
1568 | case Intrinsic::mips_maddu: |
1569 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAddu); |
1570 | case Intrinsic::mips_msub: |
1571 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSub); |
1572 | case Intrinsic::mips_msubu: |
1573 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSubu); |
1574 | case Intrinsic::mips_addv_b: |
1575 | case Intrinsic::mips_addv_h: |
1576 | case Intrinsic::mips_addv_w: |
1577 | case Intrinsic::mips_addv_d: |
1578 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1579 | N2: Op->getOperand(Num: 2)); |
1580 | case Intrinsic::mips_addvi_b: |
1581 | case Intrinsic::mips_addvi_h: |
1582 | case Intrinsic::mips_addvi_w: |
1583 | case Intrinsic::mips_addvi_d: |
1584 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1585 | N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
1586 | case Intrinsic::mips_and_v: |
1587 | return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1588 | N2: Op->getOperand(Num: 2)); |
1589 | case Intrinsic::mips_andi_b: |
1590 | return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1591 | N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
1592 | case Intrinsic::mips_bclr_b: |
1593 | case Intrinsic::mips_bclr_h: |
1594 | case Intrinsic::mips_bclr_w: |
1595 | case Intrinsic::mips_bclr_d: |
1596 | return lowerMSABitClear(Op, DAG); |
1597 | case Intrinsic::mips_bclri_b: |
1598 | case Intrinsic::mips_bclri_h: |
1599 | case Intrinsic::mips_bclri_w: |
1600 | case Intrinsic::mips_bclri_d: |
1601 | return lowerMSABitClearImm(Op, DAG); |
1602 | case Intrinsic::mips_binsli_b: |
1603 | case Intrinsic::mips_binsli_h: |
1604 | case Intrinsic::mips_binsli_w: |
1605 | case Intrinsic::mips_binsli_d: { |
1606 | // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) |
1607 | EVT VecTy = Op->getValueType(ResNo: 0); |
1608 | EVT EltTy = VecTy.getVectorElementType(); |
1609 | if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits()) |
1610 | report_fatal_error(reason: "Immediate out of range" ); |
1611 | APInt Mask = APInt::getHighBitsSet(numBits: EltTy.getSizeInBits(), |
1612 | hiBitsSet: Op->getConstantOperandVal(Num: 3) + 1); |
1613 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy, |
1614 | N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true), |
1615 | N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1)); |
1616 | } |
1617 | case Intrinsic::mips_binsri_b: |
1618 | case Intrinsic::mips_binsri_h: |
1619 | case Intrinsic::mips_binsri_w: |
1620 | case Intrinsic::mips_binsri_d: { |
1621 | // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) |
1622 | EVT VecTy = Op->getValueType(ResNo: 0); |
1623 | EVT EltTy = VecTy.getVectorElementType(); |
1624 | if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits()) |
1625 | report_fatal_error(reason: "Immediate out of range" ); |
1626 | APInt Mask = APInt::getLowBitsSet(numBits: EltTy.getSizeInBits(), |
1627 | loBitsSet: Op->getConstantOperandVal(Num: 3) + 1); |
1628 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy, |
1629 | N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true), |
1630 | N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1)); |
1631 | } |
1632 | case Intrinsic::mips_bmnz_v: |
1633 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3), |
1634 | N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1)); |
1635 | case Intrinsic::mips_bmnzi_b: |
1636 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1637 | N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 2), |
1638 | N3: Op->getOperand(Num: 1)); |
1639 | case Intrinsic::mips_bmz_v: |
1640 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3), |
1641 | N2: Op->getOperand(Num: 1), N3: Op->getOperand(Num: 2)); |
1642 | case Intrinsic::mips_bmzi_b: |
1643 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1644 | N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 1), |
1645 | N3: Op->getOperand(Num: 2)); |
1646 | case Intrinsic::mips_bneg_b: |
1647 | case Intrinsic::mips_bneg_h: |
1648 | case Intrinsic::mips_bneg_w: |
1649 | case Intrinsic::mips_bneg_d: { |
1650 | EVT VecTy = Op->getValueType(ResNo: 0); |
1651 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
1652 | |
1653 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: VecTy, N1: Op->getOperand(Num: 1), |
1654 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, |
1655 | N2: truncateVecElts(Op, DAG))); |
1656 | } |
1657 | case Intrinsic::mips_bnegi_b: |
1658 | case Intrinsic::mips_bnegi_h: |
1659 | case Intrinsic::mips_bnegi_w: |
1660 | case Intrinsic::mips_bnegi_d: |
1661 | return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::XOR, Imm: Op->getOperand(Num: 2), |
1662 | BigEndian: !Subtarget.isLittle()); |
1663 | case Intrinsic::mips_bnz_b: |
1664 | case Intrinsic::mips_bnz_h: |
1665 | case Intrinsic::mips_bnz_w: |
1666 | case Intrinsic::mips_bnz_d: |
1667 | return DAG.getNode(Opcode: MipsISD::VALL_NONZERO, DL, VT: Op->getValueType(ResNo: 0), |
1668 | Operand: Op->getOperand(Num: 1)); |
1669 | case Intrinsic::mips_bnz_v: |
1670 | return DAG.getNode(Opcode: MipsISD::VANY_NONZERO, DL, VT: Op->getValueType(ResNo: 0), |
1671 | Operand: Op->getOperand(Num: 1)); |
1672 | case Intrinsic::mips_bsel_v: |
1673 | // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) |
1674 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1675 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), |
1676 | N3: Op->getOperand(Num: 2)); |
1677 | case Intrinsic::mips_bseli_b: |
1678 | // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) |
1679 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1680 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 3, DAG), |
1681 | N3: Op->getOperand(Num: 2)); |
1682 | case Intrinsic::mips_bset_b: |
1683 | case Intrinsic::mips_bset_h: |
1684 | case Intrinsic::mips_bset_w: |
1685 | case Intrinsic::mips_bset_d: { |
1686 | EVT VecTy = Op->getValueType(ResNo: 0); |
1687 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
1688 | |
1689 | return DAG.getNode(Opcode: ISD::OR, DL, VT: VecTy, N1: Op->getOperand(Num: 1), |
1690 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, |
1691 | N2: truncateVecElts(Op, DAG))); |
1692 | } |
1693 | case Intrinsic::mips_bseti_b: |
1694 | case Intrinsic::mips_bseti_h: |
1695 | case Intrinsic::mips_bseti_w: |
1696 | case Intrinsic::mips_bseti_d: |
1697 | return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::OR, Imm: Op->getOperand(Num: 2), |
1698 | BigEndian: !Subtarget.isLittle()); |
1699 | case Intrinsic::mips_bz_b: |
1700 | case Intrinsic::mips_bz_h: |
1701 | case Intrinsic::mips_bz_w: |
1702 | case Intrinsic::mips_bz_d: |
1703 | return DAG.getNode(Opcode: MipsISD::VALL_ZERO, DL, VT: Op->getValueType(ResNo: 0), |
1704 | Operand: Op->getOperand(Num: 1)); |
1705 | case Intrinsic::mips_bz_v: |
1706 | return DAG.getNode(Opcode: MipsISD::VANY_ZERO, DL, VT: Op->getValueType(ResNo: 0), |
1707 | Operand: Op->getOperand(Num: 1)); |
1708 | case Intrinsic::mips_ceq_b: |
1709 | case Intrinsic::mips_ceq_h: |
1710 | case Intrinsic::mips_ceq_w: |
1711 | case Intrinsic::mips_ceq_d: |
1712 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1713 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETEQ); |
1714 | case Intrinsic::mips_ceqi_b: |
1715 | case Intrinsic::mips_ceqi_h: |
1716 | case Intrinsic::mips_ceqi_w: |
1717 | case Intrinsic::mips_ceqi_d: |
1718 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1719 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETEQ); |
1720 | case Intrinsic::mips_cle_s_b: |
1721 | case Intrinsic::mips_cle_s_h: |
1722 | case Intrinsic::mips_cle_s_w: |
1723 | case Intrinsic::mips_cle_s_d: |
1724 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1725 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETLE); |
1726 | case Intrinsic::mips_clei_s_b: |
1727 | case Intrinsic::mips_clei_s_h: |
1728 | case Intrinsic::mips_clei_s_w: |
1729 | case Intrinsic::mips_clei_s_d: |
1730 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1731 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLE); |
1732 | case Intrinsic::mips_cle_u_b: |
1733 | case Intrinsic::mips_cle_u_h: |
1734 | case Intrinsic::mips_cle_u_w: |
1735 | case Intrinsic::mips_cle_u_d: |
1736 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1737 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE); |
1738 | case Intrinsic::mips_clei_u_b: |
1739 | case Intrinsic::mips_clei_u_h: |
1740 | case Intrinsic::mips_clei_u_w: |
1741 | case Intrinsic::mips_clei_u_d: |
1742 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1743 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULE); |
1744 | case Intrinsic::mips_clt_s_b: |
1745 | case Intrinsic::mips_clt_s_h: |
1746 | case Intrinsic::mips_clt_s_w: |
1747 | case Intrinsic::mips_clt_s_d: |
1748 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1749 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETLT); |
1750 | case Intrinsic::mips_clti_s_b: |
1751 | case Intrinsic::mips_clti_s_h: |
1752 | case Intrinsic::mips_clti_s_w: |
1753 | case Intrinsic::mips_clti_s_d: |
1754 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1755 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLT); |
1756 | case Intrinsic::mips_clt_u_b: |
1757 | case Intrinsic::mips_clt_u_h: |
1758 | case Intrinsic::mips_clt_u_w: |
1759 | case Intrinsic::mips_clt_u_d: |
1760 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1761 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT); |
1762 | case Intrinsic::mips_clti_u_b: |
1763 | case Intrinsic::mips_clti_u_h: |
1764 | case Intrinsic::mips_clti_u_w: |
1765 | case Intrinsic::mips_clti_u_d: |
1766 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1767 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULT); |
1768 | case Intrinsic::mips_copy_s_b: |
1769 | case Intrinsic::mips_copy_s_h: |
1770 | case Intrinsic::mips_copy_s_w: |
1771 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT); |
1772 | case Intrinsic::mips_copy_s_d: |
1773 | if (Subtarget.hasMips64()) |
1774 | // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. |
1775 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT); |
1776 | else { |
1777 | // Lower into the generic EXTRACT_VECTOR_ELT node and let the type |
1778 | // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. |
1779 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op), |
1780 | VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1781 | N2: Op->getOperand(Num: 2)); |
1782 | } |
1783 | case Intrinsic::mips_copy_u_b: |
1784 | case Intrinsic::mips_copy_u_h: |
1785 | case Intrinsic::mips_copy_u_w: |
1786 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT); |
1787 | case Intrinsic::mips_copy_u_d: |
1788 | if (Subtarget.hasMips64()) |
1789 | // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. |
1790 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT); |
1791 | else { |
1792 | // Lower into the generic EXTRACT_VECTOR_ELT node and let the type |
1793 | // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. |
1794 | // Note: When i64 is illegal, this results in copy_s.w instructions |
1795 | // instead of copy_u.w instructions. This makes no difference to the |
1796 | // behaviour since i64 is only illegal when the register file is 32-bit. |
1797 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op), |
1798 | VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1799 | N2: Op->getOperand(Num: 2)); |
1800 | } |
1801 | case Intrinsic::mips_div_s_b: |
1802 | case Intrinsic::mips_div_s_h: |
1803 | case Intrinsic::mips_div_s_w: |
1804 | case Intrinsic::mips_div_s_d: |
1805 | return DAG.getNode(Opcode: ISD::SDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1806 | N2: Op->getOperand(Num: 2)); |
1807 | case Intrinsic::mips_div_u_b: |
1808 | case Intrinsic::mips_div_u_h: |
1809 | case Intrinsic::mips_div_u_w: |
1810 | case Intrinsic::mips_div_u_d: |
1811 | return DAG.getNode(Opcode: ISD::UDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1812 | N2: Op->getOperand(Num: 2)); |
1813 | case Intrinsic::mips_fadd_w: |
1814 | case Intrinsic::mips_fadd_d: |
1815 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1816 | return DAG.getNode(Opcode: ISD::FADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1817 | N2: Op->getOperand(Num: 2)); |
1818 | // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away |
1819 | case Intrinsic::mips_fceq_w: |
1820 | case Intrinsic::mips_fceq_d: |
1821 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1822 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETOEQ); |
1823 | case Intrinsic::mips_fcle_w: |
1824 | case Intrinsic::mips_fcle_d: |
1825 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1826 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLE); |
1827 | case Intrinsic::mips_fclt_w: |
1828 | case Intrinsic::mips_fclt_d: |
1829 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1830 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLT); |
1831 | case Intrinsic::mips_fcne_w: |
1832 | case Intrinsic::mips_fcne_d: |
1833 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1834 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETONE); |
1835 | case Intrinsic::mips_fcor_w: |
1836 | case Intrinsic::mips_fcor_d: |
1837 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1838 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETO); |
1839 | case Intrinsic::mips_fcueq_w: |
1840 | case Intrinsic::mips_fcueq_d: |
1841 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1842 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETUEQ); |
1843 | case Intrinsic::mips_fcule_w: |
1844 | case Intrinsic::mips_fcule_d: |
1845 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1846 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE); |
1847 | case Intrinsic::mips_fcult_w: |
1848 | case Intrinsic::mips_fcult_d: |
1849 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1850 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT); |
1851 | case Intrinsic::mips_fcun_w: |
1852 | case Intrinsic::mips_fcun_d: |
1853 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1854 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETUO); |
1855 | case Intrinsic::mips_fcune_w: |
1856 | case Intrinsic::mips_fcune_d: |
1857 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1858 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETUNE); |
1859 | case Intrinsic::mips_fdiv_w: |
1860 | case Intrinsic::mips_fdiv_d: |
1861 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1862 | return DAG.getNode(Opcode: ISD::FDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1863 | N2: Op->getOperand(Num: 2)); |
1864 | case Intrinsic::mips_ffint_u_w: |
1865 | case Intrinsic::mips_ffint_u_d: |
1866 | return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0), |
1867 | Operand: Op->getOperand(Num: 1)); |
1868 | case Intrinsic::mips_ffint_s_w: |
1869 | case Intrinsic::mips_ffint_s_d: |
1870 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0), |
1871 | Operand: Op->getOperand(Num: 1)); |
1872 | case Intrinsic::mips_fill_b: |
1873 | case Intrinsic::mips_fill_h: |
1874 | case Intrinsic::mips_fill_w: |
1875 | case Intrinsic::mips_fill_d: { |
1876 | EVT ResTy = Op->getValueType(ResNo: 0); |
1877 | SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), |
1878 | Op->getOperand(Num: 1)); |
1879 | |
1880 | // If ResTy is v2i64 then the type legalizer will break this node down into |
1881 | // an equivalent v4i32. |
1882 | return DAG.getBuildVector(VT: ResTy, DL, Ops); |
1883 | } |
1884 | case Intrinsic::mips_fexp2_w: |
1885 | case Intrinsic::mips_fexp2_d: { |
1886 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1887 | EVT ResTy = Op->getValueType(ResNo: 0); |
1888 | return DAG.getNode( |
1889 | Opcode: ISD::FMUL, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
1890 | N2: DAG.getNode(Opcode: ISD::FEXP2, DL: SDLoc(Op), VT: ResTy, Operand: Op->getOperand(Num: 2))); |
1891 | } |
1892 | case Intrinsic::mips_flog2_w: |
1893 | case Intrinsic::mips_flog2_d: |
1894 | return DAG.getNode(Opcode: ISD::FLOG2, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
1895 | case Intrinsic::mips_fmadd_w: |
1896 | case Intrinsic::mips_fmadd_d: |
1897 | return DAG.getNode(Opcode: ISD::FMA, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0), |
1898 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3)); |
1899 | case Intrinsic::mips_fmul_w: |
1900 | case Intrinsic::mips_fmul_d: |
1901 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1902 | return DAG.getNode(Opcode: ISD::FMUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1903 | N2: Op->getOperand(Num: 2)); |
1904 | case Intrinsic::mips_fmsub_w: |
1905 | case Intrinsic::mips_fmsub_d: { |
1906 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1907 | return DAG.getNode(Opcode: MipsISD::FMS, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0), |
1908 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3)); |
1909 | } |
1910 | case Intrinsic::mips_frint_w: |
1911 | case Intrinsic::mips_frint_d: |
1912 | return DAG.getNode(Opcode: ISD::FRINT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
1913 | case Intrinsic::mips_fsqrt_w: |
1914 | case Intrinsic::mips_fsqrt_d: |
1915 | return DAG.getNode(Opcode: ISD::FSQRT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
1916 | case Intrinsic::mips_fsub_w: |
1917 | case Intrinsic::mips_fsub_d: |
1918 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1919 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1920 | N2: Op->getOperand(Num: 2)); |
1921 | case Intrinsic::mips_ftrunc_u_w: |
1922 | case Intrinsic::mips_ftrunc_u_d: |
1923 | return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL, VT: Op->getValueType(ResNo: 0), |
1924 | Operand: Op->getOperand(Num: 1)); |
1925 | case Intrinsic::mips_ftrunc_s_w: |
1926 | case Intrinsic::mips_ftrunc_s_d: |
1927 | return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL, VT: Op->getValueType(ResNo: 0), |
1928 | Operand: Op->getOperand(Num: 1)); |
1929 | case Intrinsic::mips_ilvev_b: |
1930 | case Intrinsic::mips_ilvev_h: |
1931 | case Intrinsic::mips_ilvev_w: |
1932 | case Intrinsic::mips_ilvev_d: |
1933 | return DAG.getNode(Opcode: MipsISD::ILVEV, DL, VT: Op->getValueType(ResNo: 0), |
1934 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1935 | case Intrinsic::mips_ilvl_b: |
1936 | case Intrinsic::mips_ilvl_h: |
1937 | case Intrinsic::mips_ilvl_w: |
1938 | case Intrinsic::mips_ilvl_d: |
1939 | return DAG.getNode(Opcode: MipsISD::ILVL, DL, VT: Op->getValueType(ResNo: 0), |
1940 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1941 | case Intrinsic::mips_ilvod_b: |
1942 | case Intrinsic::mips_ilvod_h: |
1943 | case Intrinsic::mips_ilvod_w: |
1944 | case Intrinsic::mips_ilvod_d: |
1945 | return DAG.getNode(Opcode: MipsISD::ILVOD, DL, VT: Op->getValueType(ResNo: 0), |
1946 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1947 | case Intrinsic::mips_ilvr_b: |
1948 | case Intrinsic::mips_ilvr_h: |
1949 | case Intrinsic::mips_ilvr_w: |
1950 | case Intrinsic::mips_ilvr_d: |
1951 | return DAG.getNode(Opcode: MipsISD::ILVR, DL, VT: Op->getValueType(ResNo: 0), |
1952 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1953 | case Intrinsic::mips_insert_b: |
1954 | case Intrinsic::mips_insert_h: |
1955 | case Intrinsic::mips_insert_w: |
1956 | case Intrinsic::mips_insert_d: |
1957 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0), |
1958 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), N3: Op->getOperand(Num: 2)); |
1959 | case Intrinsic::mips_insve_b: |
1960 | case Intrinsic::mips_insve_h: |
1961 | case Intrinsic::mips_insve_w: |
1962 | case Intrinsic::mips_insve_d: { |
1963 | // Report an error for out of range values. |
1964 | int64_t Max; |
1965 | switch (Intrinsic) { |
1966 | case Intrinsic::mips_insve_b: Max = 15; break; |
1967 | case Intrinsic::mips_insve_h: Max = 7; break; |
1968 | case Intrinsic::mips_insve_w: Max = 3; break; |
1969 | case Intrinsic::mips_insve_d: Max = 1; break; |
1970 | default: llvm_unreachable("Unmatched intrinsic" ); |
1971 | } |
1972 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
1973 | if (Value < 0 || Value > Max) |
1974 | report_fatal_error(reason: "Immediate out of range" ); |
1975 | return DAG.getNode(Opcode: MipsISD::INSVE, DL, VT: Op->getValueType(ResNo: 0), |
1976 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3), |
1977 | N4: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
1978 | } |
1979 | case Intrinsic::mips_ldi_b: |
1980 | case Intrinsic::mips_ldi_h: |
1981 | case Intrinsic::mips_ldi_w: |
1982 | case Intrinsic::mips_ldi_d: |
1983 | return lowerMSASplatImm(Op, ImmOp: 1, DAG, IsSigned: true); |
1984 | case Intrinsic::mips_lsa: |
1985 | case Intrinsic::mips_dlsa: { |
1986 | EVT ResTy = Op->getValueType(ResNo: 0); |
1987 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
1988 | N2: DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Op), VT: ResTy, |
1989 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3))); |
1990 | } |
1991 | case Intrinsic::mips_maddv_b: |
1992 | case Intrinsic::mips_maddv_h: |
1993 | case Intrinsic::mips_maddv_w: |
1994 | case Intrinsic::mips_maddv_d: { |
1995 | EVT ResTy = Op->getValueType(ResNo: 0); |
1996 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
1997 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy, |
1998 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3))); |
1999 | } |
2000 | case Intrinsic::mips_max_s_b: |
2001 | case Intrinsic::mips_max_s_h: |
2002 | case Intrinsic::mips_max_s_w: |
2003 | case Intrinsic::mips_max_s_d: |
2004 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0), |
2005 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2006 | case Intrinsic::mips_max_u_b: |
2007 | case Intrinsic::mips_max_u_h: |
2008 | case Intrinsic::mips_max_u_w: |
2009 | case Intrinsic::mips_max_u_d: |
2010 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0), |
2011 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2012 | case Intrinsic::mips_maxi_s_b: |
2013 | case Intrinsic::mips_maxi_s_h: |
2014 | case Intrinsic::mips_maxi_s_w: |
2015 | case Intrinsic::mips_maxi_s_d: |
2016 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0), |
2017 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true)); |
2018 | case Intrinsic::mips_maxi_u_b: |
2019 | case Intrinsic::mips_maxi_u_h: |
2020 | case Intrinsic::mips_maxi_u_w: |
2021 | case Intrinsic::mips_maxi_u_d: |
2022 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0), |
2023 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2024 | case Intrinsic::mips_min_s_b: |
2025 | case Intrinsic::mips_min_s_h: |
2026 | case Intrinsic::mips_min_s_w: |
2027 | case Intrinsic::mips_min_s_d: |
2028 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0), |
2029 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2030 | case Intrinsic::mips_min_u_b: |
2031 | case Intrinsic::mips_min_u_h: |
2032 | case Intrinsic::mips_min_u_w: |
2033 | case Intrinsic::mips_min_u_d: |
2034 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0), |
2035 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2036 | case Intrinsic::mips_mini_s_b: |
2037 | case Intrinsic::mips_mini_s_h: |
2038 | case Intrinsic::mips_mini_s_w: |
2039 | case Intrinsic::mips_mini_s_d: |
2040 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0), |
2041 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true)); |
2042 | case Intrinsic::mips_mini_u_b: |
2043 | case Intrinsic::mips_mini_u_h: |
2044 | case Intrinsic::mips_mini_u_w: |
2045 | case Intrinsic::mips_mini_u_d: |
2046 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0), |
2047 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2048 | case Intrinsic::mips_mod_s_b: |
2049 | case Intrinsic::mips_mod_s_h: |
2050 | case Intrinsic::mips_mod_s_w: |
2051 | case Intrinsic::mips_mod_s_d: |
2052 | return DAG.getNode(Opcode: ISD::SREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2053 | N2: Op->getOperand(Num: 2)); |
2054 | case Intrinsic::mips_mod_u_b: |
2055 | case Intrinsic::mips_mod_u_h: |
2056 | case Intrinsic::mips_mod_u_w: |
2057 | case Intrinsic::mips_mod_u_d: |
2058 | return DAG.getNode(Opcode: ISD::UREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2059 | N2: Op->getOperand(Num: 2)); |
2060 | case Intrinsic::mips_mulv_b: |
2061 | case Intrinsic::mips_mulv_h: |
2062 | case Intrinsic::mips_mulv_w: |
2063 | case Intrinsic::mips_mulv_d: |
2064 | return DAG.getNode(Opcode: ISD::MUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2065 | N2: Op->getOperand(Num: 2)); |
2066 | case Intrinsic::mips_msubv_b: |
2067 | case Intrinsic::mips_msubv_h: |
2068 | case Intrinsic::mips_msubv_w: |
2069 | case Intrinsic::mips_msubv_d: { |
2070 | EVT ResTy = Op->getValueType(ResNo: 0); |
2071 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
2072 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy, |
2073 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3))); |
2074 | } |
2075 | case Intrinsic::mips_nlzc_b: |
2076 | case Intrinsic::mips_nlzc_h: |
2077 | case Intrinsic::mips_nlzc_w: |
2078 | case Intrinsic::mips_nlzc_d: |
2079 | return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
2080 | case Intrinsic::mips_nor_v: { |
2081 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), |
2082 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2083 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
2084 | } |
2085 | case Intrinsic::mips_nori_b: { |
2086 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), |
2087 | N1: Op->getOperand(Num: 1), |
2088 | N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2089 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
2090 | } |
2091 | case Intrinsic::mips_or_v: |
2092 | return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2093 | N2: Op->getOperand(Num: 2)); |
2094 | case Intrinsic::mips_ori_b: |
2095 | return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), |
2096 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2097 | case Intrinsic::mips_pckev_b: |
2098 | case Intrinsic::mips_pckev_h: |
2099 | case Intrinsic::mips_pckev_w: |
2100 | case Intrinsic::mips_pckev_d: |
2101 | return DAG.getNode(Opcode: MipsISD::PCKEV, DL, VT: Op->getValueType(ResNo: 0), |
2102 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2103 | case Intrinsic::mips_pckod_b: |
2104 | case Intrinsic::mips_pckod_h: |
2105 | case Intrinsic::mips_pckod_w: |
2106 | case Intrinsic::mips_pckod_d: |
2107 | return DAG.getNode(Opcode: MipsISD::PCKOD, DL, VT: Op->getValueType(ResNo: 0), |
2108 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2109 | case Intrinsic::mips_pcnt_b: |
2110 | case Intrinsic::mips_pcnt_h: |
2111 | case Intrinsic::mips_pcnt_w: |
2112 | case Intrinsic::mips_pcnt_d: |
2113 | return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
2114 | case Intrinsic::mips_sat_s_b: |
2115 | case Intrinsic::mips_sat_s_h: |
2116 | case Intrinsic::mips_sat_s_w: |
2117 | case Intrinsic::mips_sat_s_d: |
2118 | case Intrinsic::mips_sat_u_b: |
2119 | case Intrinsic::mips_sat_u_h: |
2120 | case Intrinsic::mips_sat_u_w: |
2121 | case Intrinsic::mips_sat_u_d: { |
2122 | // Report an error for out of range values. |
2123 | int64_t Max; |
2124 | switch (Intrinsic) { |
2125 | case Intrinsic::mips_sat_s_b: |
2126 | case Intrinsic::mips_sat_u_b: Max = 7; break; |
2127 | case Intrinsic::mips_sat_s_h: |
2128 | case Intrinsic::mips_sat_u_h: Max = 15; break; |
2129 | case Intrinsic::mips_sat_s_w: |
2130 | case Intrinsic::mips_sat_u_w: Max = 31; break; |
2131 | case Intrinsic::mips_sat_s_d: |
2132 | case Intrinsic::mips_sat_u_d: Max = 63; break; |
2133 | default: llvm_unreachable("Unmatched intrinsic" ); |
2134 | } |
2135 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2136 | if (Value < 0 || Value > Max) |
2137 | report_fatal_error(reason: "Immediate out of range" ); |
2138 | return SDValue(); |
2139 | } |
2140 | case Intrinsic::mips_shf_b: |
2141 | case Intrinsic::mips_shf_h: |
2142 | case Intrinsic::mips_shf_w: { |
2143 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2144 | if (Value < 0 || Value > 255) |
2145 | report_fatal_error(reason: "Immediate out of range" ); |
2146 | return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: Op->getValueType(ResNo: 0), |
2147 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 1)); |
2148 | } |
2149 | case Intrinsic::mips_sldi_b: |
2150 | case Intrinsic::mips_sldi_h: |
2151 | case Intrinsic::mips_sldi_w: |
2152 | case Intrinsic::mips_sldi_d: { |
2153 | // Report an error for out of range values. |
2154 | int64_t Max; |
2155 | switch (Intrinsic) { |
2156 | case Intrinsic::mips_sldi_b: Max = 15; break; |
2157 | case Intrinsic::mips_sldi_h: Max = 7; break; |
2158 | case Intrinsic::mips_sldi_w: Max = 3; break; |
2159 | case Intrinsic::mips_sldi_d: Max = 1; break; |
2160 | default: llvm_unreachable("Unmatched intrinsic" ); |
2161 | } |
2162 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 3))->getSExtValue(); |
2163 | if (Value < 0 || Value > Max) |
2164 | report_fatal_error(reason: "Immediate out of range" ); |
2165 | return SDValue(); |
2166 | } |
2167 | case Intrinsic::mips_sll_b: |
2168 | case Intrinsic::mips_sll_h: |
2169 | case Intrinsic::mips_sll_w: |
2170 | case Intrinsic::mips_sll_d: |
2171 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2172 | N2: truncateVecElts(Op, DAG)); |
2173 | case Intrinsic::mips_slli_b: |
2174 | case Intrinsic::mips_slli_h: |
2175 | case Intrinsic::mips_slli_w: |
2176 | case Intrinsic::mips_slli_d: |
2177 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), |
2178 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2179 | case Intrinsic::mips_splat_b: |
2180 | case Intrinsic::mips_splat_h: |
2181 | case Intrinsic::mips_splat_w: |
2182 | case Intrinsic::mips_splat_d: |
2183 | // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle |
2184 | // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because |
2185 | // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. |
2186 | // Instead we lower to MipsISD::VSHF and match from there. |
2187 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0), |
2188 | N1: lowerMSASplatZExt(Op, OpNr: 2, DAG), N2: Op->getOperand(Num: 1), |
2189 | N3: Op->getOperand(Num: 1)); |
2190 | case Intrinsic::mips_splati_b: |
2191 | case Intrinsic::mips_splati_h: |
2192 | case Intrinsic::mips_splati_w: |
2193 | case Intrinsic::mips_splati_d: |
2194 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0), |
2195 | N1: lowerMSASplatImm(Op, ImmOp: 2, DAG), N2: Op->getOperand(Num: 1), |
2196 | N3: Op->getOperand(Num: 1)); |
2197 | case Intrinsic::mips_sra_b: |
2198 | case Intrinsic::mips_sra_h: |
2199 | case Intrinsic::mips_sra_w: |
2200 | case Intrinsic::mips_sra_d: |
2201 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2202 | N2: truncateVecElts(Op, DAG)); |
2203 | case Intrinsic::mips_srai_b: |
2204 | case Intrinsic::mips_srai_h: |
2205 | case Intrinsic::mips_srai_w: |
2206 | case Intrinsic::mips_srai_d: |
2207 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), |
2208 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2209 | case Intrinsic::mips_srari_b: |
2210 | case Intrinsic::mips_srari_h: |
2211 | case Intrinsic::mips_srari_w: |
2212 | case Intrinsic::mips_srari_d: { |
2213 | // Report an error for out of range values. |
2214 | int64_t Max; |
2215 | switch (Intrinsic) { |
2216 | case Intrinsic::mips_srari_b: Max = 7; break; |
2217 | case Intrinsic::mips_srari_h: Max = 15; break; |
2218 | case Intrinsic::mips_srari_w: Max = 31; break; |
2219 | case Intrinsic::mips_srari_d: Max = 63; break; |
2220 | default: llvm_unreachable("Unmatched intrinsic" ); |
2221 | } |
2222 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2223 | if (Value < 0 || Value > Max) |
2224 | report_fatal_error(reason: "Immediate out of range" ); |
2225 | return SDValue(); |
2226 | } |
2227 | case Intrinsic::mips_srl_b: |
2228 | case Intrinsic::mips_srl_h: |
2229 | case Intrinsic::mips_srl_w: |
2230 | case Intrinsic::mips_srl_d: |
2231 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2232 | N2: truncateVecElts(Op, DAG)); |
2233 | case Intrinsic::mips_srli_b: |
2234 | case Intrinsic::mips_srli_h: |
2235 | case Intrinsic::mips_srli_w: |
2236 | case Intrinsic::mips_srli_d: |
2237 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), |
2238 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2239 | case Intrinsic::mips_srlri_b: |
2240 | case Intrinsic::mips_srlri_h: |
2241 | case Intrinsic::mips_srlri_w: |
2242 | case Intrinsic::mips_srlri_d: { |
2243 | // Report an error for out of range values. |
2244 | int64_t Max; |
2245 | switch (Intrinsic) { |
2246 | case Intrinsic::mips_srlri_b: Max = 7; break; |
2247 | case Intrinsic::mips_srlri_h: Max = 15; break; |
2248 | case Intrinsic::mips_srlri_w: Max = 31; break; |
2249 | case Intrinsic::mips_srlri_d: Max = 63; break; |
2250 | default: llvm_unreachable("Unmatched intrinsic" ); |
2251 | } |
2252 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2253 | if (Value < 0 || Value > Max) |
2254 | report_fatal_error(reason: "Immediate out of range" ); |
2255 | return SDValue(); |
2256 | } |
2257 | case Intrinsic::mips_subv_b: |
2258 | case Intrinsic::mips_subv_h: |
2259 | case Intrinsic::mips_subv_w: |
2260 | case Intrinsic::mips_subv_d: |
2261 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2262 | N2: Op->getOperand(Num: 2)); |
2263 | case Intrinsic::mips_subvi_b: |
2264 | case Intrinsic::mips_subvi_h: |
2265 | case Intrinsic::mips_subvi_w: |
2266 | case Intrinsic::mips_subvi_d: |
2267 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), |
2268 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2269 | case Intrinsic::mips_vshf_b: |
2270 | case Intrinsic::mips_vshf_h: |
2271 | case Intrinsic::mips_vshf_w: |
2272 | case Intrinsic::mips_vshf_d: |
2273 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0), |
2274 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3)); |
2275 | case Intrinsic::mips_xor_v: |
2276 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2277 | N2: Op->getOperand(Num: 2)); |
2278 | case Intrinsic::mips_xori_b: |
2279 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), |
2280 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2281 | case Intrinsic::thread_pointer: { |
2282 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
2283 | return DAG.getNode(Opcode: MipsISD::ThreadPointer, DL, VT: PtrVT); |
2284 | } |
2285 | } |
2286 | } |
2287 | |
2288 | static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, |
2289 | const MipsSubtarget &Subtarget) { |
2290 | SDLoc DL(Op); |
2291 | SDValue ChainIn = Op->getOperand(Num: 0); |
2292 | SDValue Address = Op->getOperand(Num: 2); |
2293 | SDValue Offset = Op->getOperand(Num: 3); |
2294 | EVT ResTy = Op->getValueType(ResNo: 0); |
2295 | EVT PtrTy = Address->getValueType(ResNo: 0); |
2296 | |
2297 | // For N64 addresses have the underlying type MVT::i64. This intrinsic |
2298 | // however takes an i32 signed constant offset. The actual type of the |
2299 | // intrinsic is a scaled signed i10. |
2300 | if (Subtarget.isABI_N64()) |
2301 | Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset); |
2302 | |
2303 | Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset); |
2304 | return DAG.getLoad(VT: ResTy, dl: DL, Chain: ChainIn, Ptr: Address, PtrInfo: MachinePointerInfo(), |
2305 | Alignment: Align(16)); |
2306 | } |
2307 | |
2308 | SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
2309 | SelectionDAG &DAG) const { |
2310 | unsigned Intr = Op->getConstantOperandVal(Num: 1); |
2311 | switch (Intr) { |
2312 | default: |
2313 | return SDValue(); |
2314 | case Intrinsic::mips_extp: |
2315 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTP); |
2316 | case Intrinsic::mips_extpdp: |
2317 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTPDP); |
2318 | case Intrinsic::mips_extr_w: |
2319 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_W); |
2320 | case Intrinsic::mips_extr_r_w: |
2321 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_R_W); |
2322 | case Intrinsic::mips_extr_rs_w: |
2323 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_RS_W); |
2324 | case Intrinsic::mips_extr_s_h: |
2325 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_S_H); |
2326 | case Intrinsic::mips_mthlip: |
2327 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MTHLIP); |
2328 | case Intrinsic::mips_mulsaq_s_w_ph: |
2329 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSAQ_S_W_PH); |
2330 | case Intrinsic::mips_maq_s_w_phl: |
2331 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHL); |
2332 | case Intrinsic::mips_maq_s_w_phr: |
2333 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHR); |
2334 | case Intrinsic::mips_maq_sa_w_phl: |
2335 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHL); |
2336 | case Intrinsic::mips_maq_sa_w_phr: |
2337 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHR); |
2338 | case Intrinsic::mips_dpaq_s_w_ph: |
2339 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_S_W_PH); |
2340 | case Intrinsic::mips_dpsq_s_w_ph: |
2341 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_S_W_PH); |
2342 | case Intrinsic::mips_dpaq_sa_l_w: |
2343 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_SA_L_W); |
2344 | case Intrinsic::mips_dpsq_sa_l_w: |
2345 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_SA_L_W); |
2346 | case Intrinsic::mips_dpaqx_s_w_ph: |
2347 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_S_W_PH); |
2348 | case Intrinsic::mips_dpaqx_sa_w_ph: |
2349 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_SA_W_PH); |
2350 | case Intrinsic::mips_dpsqx_s_w_ph: |
2351 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_S_W_PH); |
2352 | case Intrinsic::mips_dpsqx_sa_w_ph: |
2353 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_SA_W_PH); |
2354 | case Intrinsic::mips_ld_b: |
2355 | case Intrinsic::mips_ld_h: |
2356 | case Intrinsic::mips_ld_w: |
2357 | case Intrinsic::mips_ld_d: |
2358 | return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); |
2359 | } |
2360 | } |
2361 | |
2362 | static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, |
2363 | const MipsSubtarget &Subtarget) { |
2364 | SDLoc DL(Op); |
2365 | SDValue ChainIn = Op->getOperand(Num: 0); |
2366 | SDValue Value = Op->getOperand(Num: 2); |
2367 | SDValue Address = Op->getOperand(Num: 3); |
2368 | SDValue Offset = Op->getOperand(Num: 4); |
2369 | EVT PtrTy = Address->getValueType(ResNo: 0); |
2370 | |
2371 | // For N64 addresses have the underlying type MVT::i64. This intrinsic |
2372 | // however takes an i32 signed constant offset. The actual type of the |
2373 | // intrinsic is a scaled signed i10. |
2374 | if (Subtarget.isABI_N64()) |
2375 | Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset); |
2376 | |
2377 | Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset); |
2378 | |
2379 | return DAG.getStore(Chain: ChainIn, dl: DL, Val: Value, Ptr: Address, PtrInfo: MachinePointerInfo(), |
2380 | Alignment: Align(16)); |
2381 | } |
2382 | |
2383 | SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, |
2384 | SelectionDAG &DAG) const { |
2385 | unsigned Intr = Op->getConstantOperandVal(Num: 1); |
2386 | switch (Intr) { |
2387 | default: |
2388 | return SDValue(); |
2389 | case Intrinsic::mips_st_b: |
2390 | case Intrinsic::mips_st_h: |
2391 | case Intrinsic::mips_st_w: |
2392 | case Intrinsic::mips_st_d: |
2393 | return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); |
2394 | } |
2395 | } |
2396 | |
2397 | // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. |
2398 | // |
2399 | // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We |
2400 | // choose to sign-extend but we could have equally chosen zero-extend. The |
2401 | // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT |
2402 | // result into this node later (possibly changing it to a zero-extend in the |
2403 | // process). |
2404 | SDValue MipsSETargetLowering:: |
2405 | (SDValue Op, SelectionDAG &DAG) const { |
2406 | SDLoc DL(Op); |
2407 | EVT ResTy = Op->getValueType(ResNo: 0); |
2408 | SDValue Op0 = Op->getOperand(Num: 0); |
2409 | EVT VecTy = Op0->getValueType(ResNo: 0); |
2410 | |
2411 | if (!VecTy.is128BitVector()) |
2412 | return SDValue(); |
2413 | |
2414 | if (ResTy.isInteger()) { |
2415 | SDValue Op1 = Op->getOperand(Num: 1); |
2416 | EVT EltTy = VecTy.getVectorElementType(); |
2417 | return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL, VT: ResTy, N1: Op0, N2: Op1, |
2418 | N3: DAG.getValueType(EltTy)); |
2419 | } |
2420 | |
2421 | return Op; |
2422 | } |
2423 | |
2424 | static bool isConstantOrUndef(const SDValue Op) { |
2425 | if (Op->isUndef()) |
2426 | return true; |
2427 | if (isa<ConstantSDNode>(Val: Op)) |
2428 | return true; |
2429 | if (isa<ConstantFPSDNode>(Val: Op)) |
2430 | return true; |
2431 | return false; |
2432 | } |
2433 | |
2434 | static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { |
2435 | for (unsigned i = 0; i < Op->getNumOperands(); ++i) |
2436 | if (isConstantOrUndef(Op: Op->getOperand(Num: i))) |
2437 | return true; |
2438 | return false; |
2439 | } |
2440 | |
2441 | // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the |
2442 | // backend. |
2443 | // |
2444 | // Lowers according to the following rules: |
2445 | // - Constant splats are legal as-is as long as the SplatBitSize is a power of |
2446 | // 2 less than or equal to 64 and the value fits into a signed 10-bit |
2447 | // immediate |
2448 | // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize |
2449 | // is a power of 2 less than or equal to 64 and the value does not fit into a |
2450 | // signed 10-bit immediate |
2451 | // - Non-constant splats are legal as-is. |
2452 | // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. |
2453 | // - All others are illegal and must be expanded. |
2454 | SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, |
2455 | SelectionDAG &DAG) const { |
2456 | BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op); |
2457 | EVT ResTy = Op->getValueType(ResNo: 0); |
2458 | SDLoc DL(Op); |
2459 | APInt SplatValue, SplatUndef; |
2460 | unsigned SplatBitSize; |
2461 | bool HasAnyUndefs; |
2462 | |
2463 | if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) |
2464 | return SDValue(); |
2465 | |
2466 | if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, |
2467 | HasAnyUndefs, MinSplatBits: 8, |
2468 | isBigEndian: !Subtarget.isLittle()) && SplatBitSize <= 64) { |
2469 | // We can only cope with 8, 16, 32, or 64-bit elements |
2470 | if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && |
2471 | SplatBitSize != 64) |
2472 | return SDValue(); |
2473 | |
2474 | // If the value isn't an integer type we will have to bitcast |
2475 | // from an integer type first. Also, if there are any undefs, we must |
2476 | // lower them to defined values first. |
2477 | if (ResTy.isInteger() && !HasAnyUndefs) |
2478 | return Op; |
2479 | |
2480 | EVT ViaVecTy; |
2481 | |
2482 | switch (SplatBitSize) { |
2483 | default: |
2484 | return SDValue(); |
2485 | case 8: |
2486 | ViaVecTy = MVT::v16i8; |
2487 | break; |
2488 | case 16: |
2489 | ViaVecTy = MVT::v8i16; |
2490 | break; |
2491 | case 32: |
2492 | ViaVecTy = MVT::v4i32; |
2493 | break; |
2494 | case 64: |
2495 | // There's no fill.d to fall back on for 64-bit values |
2496 | return SDValue(); |
2497 | } |
2498 | |
2499 | // SelectionDAG::getConstant will promote SplatValue appropriately. |
2500 | SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy); |
2501 | |
2502 | // Bitcast to the type we originally wanted |
2503 | if (ViaVecTy != ResTy) |
2504 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result); |
2505 | |
2506 | return Result; |
2507 | } else if (DAG.isSplatValue(V: Op, /* AllowUndefs */ false)) |
2508 | return Op; |
2509 | else if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) { |
2510 | // Use INSERT_VECTOR_ELT operations rather than expand to stores. |
2511 | // The resulting code is the same length as the expansion, but it doesn't |
2512 | // use memory operations |
2513 | EVT ResTy = Node->getValueType(ResNo: 0); |
2514 | |
2515 | assert(ResTy.isVector()); |
2516 | |
2517 | unsigned NumElts = ResTy.getVectorNumElements(); |
2518 | SDValue Vector = DAG.getUNDEF(VT: ResTy); |
2519 | for (unsigned i = 0; i < NumElts; ++i) { |
2520 | Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, |
2521 | N2: Node->getOperand(Num: i), |
2522 | N3: DAG.getConstant(Val: i, DL, VT: MVT::i32)); |
2523 | } |
2524 | return Vector; |
2525 | } |
2526 | |
2527 | return SDValue(); |
2528 | } |
2529 | |
2530 | // Lower VECTOR_SHUFFLE into SHF (if possible). |
2531 | // |
2532 | // SHF splits the vector into blocks of four elements, then shuffles these |
2533 | // elements according to a <4 x i2> constant (encoded as an integer immediate). |
2534 | // |
2535 | // It is therefore possible to lower into SHF when the mask takes the form: |
2536 | // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> |
2537 | // When undef's appear they are treated as if they were whatever value is |
2538 | // necessary in order to fit the above forms. |
2539 | // |
2540 | // For example: |
2541 | // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, |
2542 | // <8 x i32> <i32 3, i32 2, i32 1, i32 0, |
2543 | // i32 7, i32 6, i32 5, i32 4> |
2544 | // is lowered to: |
2545 | // (SHF_H $w0, $w1, 27) |
2546 | // where the 27 comes from: |
2547 | // 3 + (2 << 2) + (1 << 4) + (0 << 6) |
2548 | static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, |
2549 | SmallVector<int, 16> Indices, |
2550 | SelectionDAG &DAG) { |
2551 | int SHFIndices[4] = { -1, -1, -1, -1 }; |
2552 | |
2553 | if (Indices.size() < 4) |
2554 | return SDValue(); |
2555 | |
2556 | for (unsigned i = 0; i < 4; ++i) { |
2557 | for (unsigned j = i; j < Indices.size(); j += 4) { |
2558 | int Idx = Indices[j]; |
2559 | |
2560 | // Convert from vector index to 4-element subvector index |
2561 | // If an index refers to an element outside of the subvector then give up |
2562 | if (Idx != -1) { |
2563 | Idx -= 4 * (j / 4); |
2564 | if (Idx < 0 || Idx >= 4) |
2565 | return SDValue(); |
2566 | } |
2567 | |
2568 | // If the mask has an undef, replace it with the current index. |
2569 | // Note that it might still be undef if the current index is also undef |
2570 | if (SHFIndices[i] == -1) |
2571 | SHFIndices[i] = Idx; |
2572 | |
2573 | // Check that non-undef values are the same as in the mask. If they |
2574 | // aren't then give up |
2575 | if (!(Idx == -1 || Idx == SHFIndices[i])) |
2576 | return SDValue(); |
2577 | } |
2578 | } |
2579 | |
2580 | // Calculate the immediate. Replace any remaining undefs with zero |
2581 | APInt Imm(32, 0); |
2582 | for (int i = 3; i >= 0; --i) { |
2583 | int Idx = SHFIndices[i]; |
2584 | |
2585 | if (Idx == -1) |
2586 | Idx = 0; |
2587 | |
2588 | Imm <<= 2; |
2589 | Imm |= Idx & 0x3; |
2590 | } |
2591 | |
2592 | SDLoc DL(Op); |
2593 | return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: ResTy, |
2594 | N1: DAG.getTargetConstant(Val: Imm, DL, VT: MVT::i32), |
2595 | N2: Op->getOperand(Num: 0)); |
2596 | } |
2597 | |
2598 | /// Determine whether a range fits a regular pattern of values. |
2599 | /// This function accounts for the possibility of jumping over the End iterator. |
2600 | template <typename ValType> |
2601 | static bool |
2602 | fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, |
2603 | unsigned CheckStride, |
2604 | typename SmallVectorImpl<ValType>::const_iterator End, |
2605 | ValType ExpectedIndex, unsigned ExpectedIndexStride) { |
2606 | auto &I = Begin; |
2607 | |
2608 | while (I != End) { |
2609 | if (*I != -1 && *I != ExpectedIndex) |
2610 | return false; |
2611 | ExpectedIndex += ExpectedIndexStride; |
2612 | |
2613 | // Incrementing past End is undefined behaviour so we must increment one |
2614 | // step at a time and check for End at each step. |
2615 | for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) |
2616 | ; // Empty loop body. |
2617 | } |
2618 | return true; |
2619 | } |
2620 | |
2621 | // Determine whether VECTOR_SHUFFLE is a SPLATI. |
2622 | // |
2623 | // It is a SPLATI when the mask is: |
2624 | // <x, x, x, ...> |
2625 | // where x is any valid index. |
2626 | // |
2627 | // When undef's appear in the mask they are treated as if they were whatever |
2628 | // value is necessary in order to fit the above form. |
2629 | static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, |
2630 | SmallVector<int, 16> Indices, |
2631 | SelectionDAG &DAG) { |
2632 | assert((Indices.size() % 2) == 0); |
2633 | |
2634 | int SplatIndex = -1; |
2635 | for (const auto &V : Indices) { |
2636 | if (V != -1) { |
2637 | SplatIndex = V; |
2638 | break; |
2639 | } |
2640 | } |
2641 | |
2642 | return fitsRegularPattern<int>(Begin: Indices.begin(), CheckStride: 1, End: Indices.end(), ExpectedIndex: SplatIndex, |
2643 | ExpectedIndexStride: 0); |
2644 | } |
2645 | |
2646 | // Lower VECTOR_SHUFFLE into ILVEV (if possible). |
2647 | // |
2648 | // ILVEV interleaves the even elements from each vector. |
2649 | // |
2650 | // It is possible to lower into ILVEV when the mask consists of two of the |
2651 | // following forms interleaved: |
2652 | // <0, 2, 4, ...> |
2653 | // <n, n+2, n+4, ...> |
2654 | // where n is the number of elements in the vector. |
2655 | // For example: |
2656 | // <0, 0, 2, 2, 4, 4, ...> |
2657 | // <0, n, 2, n+2, 4, n+4, ...> |
2658 | // |
2659 | // When undef's appear in the mask they are treated as if they were whatever |
2660 | // value is necessary in order to fit the above forms. |
2661 | static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, |
2662 | SmallVector<int, 16> Indices, |
2663 | SelectionDAG &DAG) { |
2664 | assert((Indices.size() % 2) == 0); |
2665 | |
2666 | SDValue Wt; |
2667 | SDValue Ws; |
2668 | const auto &Begin = Indices.begin(); |
2669 | const auto &End = Indices.end(); |
2670 | |
2671 | // Check even elements are taken from the even elements of one half or the |
2672 | // other and pick an operand accordingly. |
2673 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2674 | Wt = Op->getOperand(Num: 0); |
2675 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2676 | Wt = Op->getOperand(Num: 1); |
2677 | else |
2678 | return SDValue(); |
2679 | |
2680 | // Check odd elements are taken from the even elements of one half or the |
2681 | // other and pick an operand accordingly. |
2682 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2683 | Ws = Op->getOperand(Num: 0); |
2684 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2685 | Ws = Op->getOperand(Num: 1); |
2686 | else |
2687 | return SDValue(); |
2688 | |
2689 | return DAG.getNode(Opcode: MipsISD::ILVEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2690 | } |
2691 | |
2692 | // Lower VECTOR_SHUFFLE into ILVOD (if possible). |
2693 | // |
2694 | // ILVOD interleaves the odd elements from each vector. |
2695 | // |
2696 | // It is possible to lower into ILVOD when the mask consists of two of the |
2697 | // following forms interleaved: |
2698 | // <1, 3, 5, ...> |
2699 | // <n+1, n+3, n+5, ...> |
2700 | // where n is the number of elements in the vector. |
2701 | // For example: |
2702 | // <1, 1, 3, 3, 5, 5, ...> |
2703 | // <1, n+1, 3, n+3, 5, n+5, ...> |
2704 | // |
2705 | // When undef's appear in the mask they are treated as if they were whatever |
2706 | // value is necessary in order to fit the above forms. |
2707 | static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, |
2708 | SmallVector<int, 16> Indices, |
2709 | SelectionDAG &DAG) { |
2710 | assert((Indices.size() % 2) == 0); |
2711 | |
2712 | SDValue Wt; |
2713 | SDValue Ws; |
2714 | const auto &Begin = Indices.begin(); |
2715 | const auto &End = Indices.end(); |
2716 | |
2717 | // Check even elements are taken from the odd elements of one half or the |
2718 | // other and pick an operand accordingly. |
2719 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2720 | Wt = Op->getOperand(Num: 0); |
2721 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2722 | Wt = Op->getOperand(Num: 1); |
2723 | else |
2724 | return SDValue(); |
2725 | |
2726 | // Check odd elements are taken from the odd elements of one half or the |
2727 | // other and pick an operand accordingly. |
2728 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2729 | Ws = Op->getOperand(Num: 0); |
2730 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2731 | Ws = Op->getOperand(Num: 1); |
2732 | else |
2733 | return SDValue(); |
2734 | |
2735 | return DAG.getNode(Opcode: MipsISD::ILVOD, DL: SDLoc(Op), VT: ResTy, N1: Wt, N2: Ws); |
2736 | } |
2737 | |
2738 | // Lower VECTOR_SHUFFLE into ILVR (if possible). |
2739 | // |
2740 | // ILVR interleaves consecutive elements from the right (lowest-indexed) half of |
2741 | // each vector. |
2742 | // |
2743 | // It is possible to lower into ILVR when the mask consists of two of the |
2744 | // following forms interleaved: |
2745 | // <0, 1, 2, ...> |
2746 | // <n, n+1, n+2, ...> |
2747 | // where n is the number of elements in the vector. |
2748 | // For example: |
2749 | // <0, 0, 1, 1, 2, 2, ...> |
2750 | // <0, n, 1, n+1, 2, n+2, ...> |
2751 | // |
2752 | // When undef's appear in the mask they are treated as if they were whatever |
2753 | // value is necessary in order to fit the above forms. |
2754 | static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, |
2755 | SmallVector<int, 16> Indices, |
2756 | SelectionDAG &DAG) { |
2757 | assert((Indices.size() % 2) == 0); |
2758 | |
2759 | SDValue Wt; |
2760 | SDValue Ws; |
2761 | const auto &Begin = Indices.begin(); |
2762 | const auto &End = Indices.end(); |
2763 | |
2764 | // Check even elements are taken from the right (lowest-indexed) elements of |
2765 | // one half or the other and pick an operand accordingly. |
2766 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
2767 | Wt = Op->getOperand(Num: 0); |
2768 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1)) |
2769 | Wt = Op->getOperand(Num: 1); |
2770 | else |
2771 | return SDValue(); |
2772 | |
2773 | // Check odd elements are taken from the right (lowest-indexed) elements of |
2774 | // one half or the other and pick an operand accordingly. |
2775 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
2776 | Ws = Op->getOperand(Num: 0); |
2777 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1)) |
2778 | Ws = Op->getOperand(Num: 1); |
2779 | else |
2780 | return SDValue(); |
2781 | |
2782 | return DAG.getNode(Opcode: MipsISD::ILVR, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2783 | } |
2784 | |
2785 | // Lower VECTOR_SHUFFLE into ILVL (if possible). |
2786 | // |
2787 | // ILVL interleaves consecutive elements from the left (highest-indexed) half |
2788 | // of each vector. |
2789 | // |
2790 | // It is possible to lower into ILVL when the mask consists of two of the |
2791 | // following forms interleaved: |
2792 | // <x, x+1, x+2, ...> |
2793 | // <n+x, n+x+1, n+x+2, ...> |
2794 | // where n is the number of elements in the vector and x is half n. |
2795 | // For example: |
2796 | // <x, x, x+1, x+1, x+2, x+2, ...> |
2797 | // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> |
2798 | // |
2799 | // When undef's appear in the mask they are treated as if they were whatever |
2800 | // value is necessary in order to fit the above forms. |
2801 | static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, |
2802 | SmallVector<int, 16> Indices, |
2803 | SelectionDAG &DAG) { |
2804 | assert((Indices.size() % 2) == 0); |
2805 | |
2806 | unsigned HalfSize = Indices.size() / 2; |
2807 | SDValue Wt; |
2808 | SDValue Ws; |
2809 | const auto &Begin = Indices.begin(); |
2810 | const auto &End = Indices.end(); |
2811 | |
2812 | // Check even elements are taken from the left (highest-indexed) elements of |
2813 | // one half or the other and pick an operand accordingly. |
2814 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
2815 | Wt = Op->getOperand(Num: 0); |
2816 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, ExpectedIndexStride: 1)) |
2817 | Wt = Op->getOperand(Num: 1); |
2818 | else |
2819 | return SDValue(); |
2820 | |
2821 | // Check odd elements are taken from the left (highest-indexed) elements of |
2822 | // one half or the other and pick an operand accordingly. |
2823 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
2824 | Ws = Op->getOperand(Num: 0); |
2825 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, |
2826 | ExpectedIndexStride: 1)) |
2827 | Ws = Op->getOperand(Num: 1); |
2828 | else |
2829 | return SDValue(); |
2830 | |
2831 | return DAG.getNode(Opcode: MipsISD::ILVL, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2832 | } |
2833 | |
2834 | // Lower VECTOR_SHUFFLE into PCKEV (if possible). |
2835 | // |
2836 | // PCKEV copies the even elements of each vector into the result vector. |
2837 | // |
2838 | // It is possible to lower into PCKEV when the mask consists of two of the |
2839 | // following forms concatenated: |
2840 | // <0, 2, 4, ...> |
2841 | // <n, n+2, n+4, ...> |
2842 | // where n is the number of elements in the vector. |
2843 | // For example: |
2844 | // <0, 2, 4, ..., 0, 2, 4, ...> |
2845 | // <0, 2, 4, ..., n, n+2, n+4, ...> |
2846 | // |
2847 | // When undef's appear in the mask they are treated as if they were whatever |
2848 | // value is necessary in order to fit the above forms. |
2849 | static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, |
2850 | SmallVector<int, 16> Indices, |
2851 | SelectionDAG &DAG) { |
2852 | assert((Indices.size() % 2) == 0); |
2853 | |
2854 | SDValue Wt; |
2855 | SDValue Ws; |
2856 | const auto &Begin = Indices.begin(); |
2857 | const auto &Mid = Indices.begin() + Indices.size() / 2; |
2858 | const auto &End = Indices.end(); |
2859 | |
2860 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2861 | Wt = Op->getOperand(Num: 0); |
2862 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2863 | Wt = Op->getOperand(Num: 1); |
2864 | else |
2865 | return SDValue(); |
2866 | |
2867 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2868 | Ws = Op->getOperand(Num: 0); |
2869 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2870 | Ws = Op->getOperand(Num: 1); |
2871 | else |
2872 | return SDValue(); |
2873 | |
2874 | return DAG.getNode(Opcode: MipsISD::PCKEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2875 | } |
2876 | |
2877 | // Lower VECTOR_SHUFFLE into PCKOD (if possible). |
2878 | // |
2879 | // PCKOD copies the odd elements of each vector into the result vector. |
2880 | // |
2881 | // It is possible to lower into PCKOD when the mask consists of two of the |
2882 | // following forms concatenated: |
2883 | // <1, 3, 5, ...> |
2884 | // <n+1, n+3, n+5, ...> |
2885 | // where n is the number of elements in the vector. |
2886 | // For example: |
2887 | // <1, 3, 5, ..., 1, 3, 5, ...> |
2888 | // <1, 3, 5, ..., n+1, n+3, n+5, ...> |
2889 | // |
2890 | // When undef's appear in the mask they are treated as if they were whatever |
2891 | // value is necessary in order to fit the above forms. |
2892 | static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, |
2893 | SmallVector<int, 16> Indices, |
2894 | SelectionDAG &DAG) { |
2895 | assert((Indices.size() % 2) == 0); |
2896 | |
2897 | SDValue Wt; |
2898 | SDValue Ws; |
2899 | const auto &Begin = Indices.begin(); |
2900 | const auto &Mid = Indices.begin() + Indices.size() / 2; |
2901 | const auto &End = Indices.end(); |
2902 | |
2903 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2904 | Wt = Op->getOperand(Num: 0); |
2905 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2906 | Wt = Op->getOperand(Num: 1); |
2907 | else |
2908 | return SDValue(); |
2909 | |
2910 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2911 | Ws = Op->getOperand(Num: 0); |
2912 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2913 | Ws = Op->getOperand(Num: 1); |
2914 | else |
2915 | return SDValue(); |
2916 | |
2917 | return DAG.getNode(Opcode: MipsISD::PCKOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2918 | } |
2919 | |
2920 | // Lower VECTOR_SHUFFLE into VSHF. |
2921 | // |
2922 | // This mostly consists of converting the shuffle indices in Indices into a |
2923 | // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is |
2924 | // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, |
2925 | // if the type is v8i16 and all the indices are less than 8 then the second |
2926 | // operand is unused and can be replaced with anything. We choose to replace it |
2927 | // with the used operand since this reduces the number of instructions overall. |
2928 | static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, |
2929 | const SmallVector<int, 16> &Indices, |
2930 | SelectionDAG &DAG) { |
2931 | SmallVector<SDValue, 16> Ops; |
2932 | SDValue Op0; |
2933 | SDValue Op1; |
2934 | EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); |
2935 | EVT MaskEltTy = MaskVecTy.getVectorElementType(); |
2936 | bool Using1stVec = false; |
2937 | bool Using2ndVec = false; |
2938 | SDLoc DL(Op); |
2939 | int ResTyNumElts = ResTy.getVectorNumElements(); |
2940 | |
2941 | for (int i = 0; i < ResTyNumElts; ++i) { |
2942 | // Idx == -1 means UNDEF |
2943 | int Idx = Indices[i]; |
2944 | |
2945 | if (0 <= Idx && Idx < ResTyNumElts) |
2946 | Using1stVec = true; |
2947 | if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) |
2948 | Using2ndVec = true; |
2949 | } |
2950 | |
2951 | for (int Idx : Indices) |
2952 | Ops.push_back(Elt: DAG.getTargetConstant(Val: Idx, DL, VT: MaskEltTy)); |
2953 | |
2954 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops); |
2955 | |
2956 | if (Using1stVec && Using2ndVec) { |
2957 | Op0 = Op->getOperand(Num: 0); |
2958 | Op1 = Op->getOperand(Num: 1); |
2959 | } else if (Using1stVec) |
2960 | Op0 = Op1 = Op->getOperand(Num: 0); |
2961 | else if (Using2ndVec) |
2962 | Op0 = Op1 = Op->getOperand(Num: 1); |
2963 | else |
2964 | llvm_unreachable("shuffle vector mask references neither vector operand?" ); |
2965 | |
2966 | // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. |
2967 | // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> |
2968 | // VSHF concatenates the vectors in a bitwise fashion: |
2969 | // <0b00, 0b01> + <0b10, 0b11> -> |
2970 | // 0b0100 + 0b1110 -> 0b01001110 |
2971 | // <0b10, 0b11, 0b00, 0b01> |
2972 | // We must therefore swap the operands to get the correct result. |
2973 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: ResTy, N1: MaskVec, N2: Op1, N3: Op0); |
2974 | } |
2975 | |
2976 | // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the |
2977 | // indices in the shuffle. |
2978 | SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
2979 | SelectionDAG &DAG) const { |
2980 | ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Val&: Op); |
2981 | EVT ResTy = Op->getValueType(ResNo: 0); |
2982 | |
2983 | if (!ResTy.is128BitVector()) |
2984 | return SDValue(); |
2985 | |
2986 | int ResTyNumElts = ResTy.getVectorNumElements(); |
2987 | SmallVector<int, 16> Indices; |
2988 | |
2989 | for (int i = 0; i < ResTyNumElts; ++i) |
2990 | Indices.push_back(Elt: Node->getMaskElt(Idx: i)); |
2991 | |
2992 | // splati.[bhwd] is preferable to the others but is matched from |
2993 | // MipsISD::VSHF. |
2994 | if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) |
2995 | return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); |
2996 | SDValue Result; |
2997 | if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) |
2998 | return Result; |
2999 | if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) |
3000 | return Result; |
3001 | if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) |
3002 | return Result; |
3003 | if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) |
3004 | return Result; |
3005 | if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) |
3006 | return Result; |
3007 | if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) |
3008 | return Result; |
3009 | if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) |
3010 | return Result; |
3011 | return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); |
3012 | } |
3013 | |
3014 | MachineBasicBlock * |
3015 | MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, |
3016 | MachineBasicBlock *BB) const { |
3017 | // $bb: |
3018 | // bposge32_pseudo $vr0 |
3019 | // => |
3020 | // $bb: |
3021 | // bposge32 $tbb |
3022 | // $fbb: |
3023 | // li $vr2, 0 |
3024 | // b $sink |
3025 | // $tbb: |
3026 | // li $vr1, 1 |
3027 | // $sink: |
3028 | // $vr0 = phi($vr2, $fbb, $vr1, $tbb) |
3029 | |
3030 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3031 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3032 | const TargetRegisterClass *RC = &Mips::GPR32RegClass; |
3033 | DebugLoc DL = MI.getDebugLoc(); |
3034 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
3035 | MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB)); |
3036 | MachineFunction *F = BB->getParent(); |
3037 | MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3038 | MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3039 | MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3040 | F->insert(MBBI: It, MBB: FBB); |
3041 | F->insert(MBBI: It, MBB: TBB); |
3042 | F->insert(MBBI: It, MBB: Sink); |
3043 | |
3044 | // Transfer the remainder of BB and its successor edges to Sink. |
3045 | Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)), |
3046 | To: BB->end()); |
3047 | Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
3048 | |
3049 | // Add successors. |
3050 | BB->addSuccessor(Succ: FBB); |
3051 | BB->addSuccessor(Succ: TBB); |
3052 | FBB->addSuccessor(Succ: Sink); |
3053 | TBB->addSuccessor(Succ: Sink); |
3054 | |
3055 | // Insert the real bposge32 instruction to $BB. |
3056 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32)).addMBB(MBB: TBB); |
3057 | // Insert the real bposge32c instruction to $BB. |
3058 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32C_MMR3)).addMBB(MBB: TBB); |
3059 | |
3060 | // Fill $FBB. |
3061 | Register VR2 = RegInfo.createVirtualRegister(RegClass: RC); |
3062 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR2) |
3063 | .addReg(RegNo: Mips::ZERO).addImm(Val: 0); |
3064 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink); |
3065 | |
3066 | // Fill $TBB. |
3067 | Register VR1 = RegInfo.createVirtualRegister(RegClass: RC); |
3068 | BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR1) |
3069 | .addReg(RegNo: Mips::ZERO).addImm(Val: 1); |
3070 | |
3071 | // Insert phi function to $Sink. |
3072 | BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI), |
3073 | DestReg: MI.getOperand(i: 0).getReg()) |
3074 | .addReg(RegNo: VR2) |
3075 | .addMBB(MBB: FBB) |
3076 | .addReg(RegNo: VR1) |
3077 | .addMBB(MBB: TBB); |
3078 | |
3079 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3080 | return Sink; |
3081 | } |
3082 | |
3083 | MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( |
3084 | MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { |
3085 | // $bb: |
3086 | // vany_nonzero $rd, $ws |
3087 | // => |
3088 | // $bb: |
3089 | // bnz.b $ws, $tbb |
3090 | // b $fbb |
3091 | // $fbb: |
3092 | // li $rd1, 0 |
3093 | // b $sink |
3094 | // $tbb: |
3095 | // li $rd2, 1 |
3096 | // $sink: |
3097 | // $rd = phi($rd1, $fbb, $rd2, $tbb) |
3098 | |
3099 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3100 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3101 | const TargetRegisterClass *RC = &Mips::GPR32RegClass; |
3102 | DebugLoc DL = MI.getDebugLoc(); |
3103 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
3104 | MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB)); |
3105 | MachineFunction *F = BB->getParent(); |
3106 | MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3107 | MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3108 | MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3109 | F->insert(MBBI: It, MBB: FBB); |
3110 | F->insert(MBBI: It, MBB: TBB); |
3111 | F->insert(MBBI: It, MBB: Sink); |
3112 | |
3113 | // Transfer the remainder of BB and its successor edges to Sink. |
3114 | Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)), |
3115 | To: BB->end()); |
3116 | Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
3117 | |
3118 | // Add successors. |
3119 | BB->addSuccessor(Succ: FBB); |
3120 | BB->addSuccessor(Succ: TBB); |
3121 | FBB->addSuccessor(Succ: Sink); |
3122 | TBB->addSuccessor(Succ: Sink); |
3123 | |
3124 | // Insert the real bnz.b instruction to $BB. |
3125 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: BranchOp)) |
3126 | .addReg(RegNo: MI.getOperand(i: 1).getReg()) |
3127 | .addMBB(MBB: TBB); |
3128 | |
3129 | // Fill $FBB. |
3130 | Register RD1 = RegInfo.createVirtualRegister(RegClass: RC); |
3131 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD1) |
3132 | .addReg(RegNo: Mips::ZERO).addImm(Val: 0); |
3133 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink); |
3134 | |
3135 | // Fill $TBB. |
3136 | Register RD2 = RegInfo.createVirtualRegister(RegClass: RC); |
3137 | BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD2) |
3138 | .addReg(RegNo: Mips::ZERO).addImm(Val: 1); |
3139 | |
3140 | // Insert phi function to $Sink. |
3141 | BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI), |
3142 | DestReg: MI.getOperand(i: 0).getReg()) |
3143 | .addReg(RegNo: RD1) |
3144 | .addMBB(MBB: FBB) |
3145 | .addReg(RegNo: RD2) |
3146 | .addMBB(MBB: TBB); |
3147 | |
3148 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3149 | return Sink; |
3150 | } |
3151 | |
3152 | // Emit the COPY_FW pseudo instruction. |
3153 | // |
3154 | // copy_fw_pseudo $fd, $ws, n |
3155 | // => |
3156 | // copy_u_w $rt, $ws, $n |
3157 | // mtc1 $rt, $fd |
3158 | // |
3159 | // When n is zero, the equivalent operation can be performed with (potentially) |
3160 | // zero instructions due to register overlaps. This optimization is never valid |
3161 | // for lane 1 because it would require FR=0 mode which isn't supported by MSA. |
3162 | MachineBasicBlock * |
3163 | MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, |
3164 | MachineBasicBlock *BB) const { |
3165 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3166 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3167 | DebugLoc DL = MI.getDebugLoc(); |
3168 | Register Fd = MI.getOperand(i: 0).getReg(); |
3169 | Register Ws = MI.getOperand(i: 1).getReg(); |
3170 | unsigned Lane = MI.getOperand(i: 2).getImm(); |
3171 | |
3172 | if (Lane == 0) { |
3173 | unsigned Wt = Ws; |
3174 | if (!Subtarget.useOddSPReg()) { |
3175 | // We must copy to an even-numbered MSA register so that the |
3176 | // single-precision sub-register is also guaranteed to be even-numbered. |
3177 | Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WEvensRegClass); |
3178 | |
3179 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Wt).addReg(RegNo: Ws); |
3180 | } |
3181 | |
3182 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, flags: 0, SubReg: Mips::sub_lo); |
3183 | } else { |
3184 | Register Wt = RegInfo.createVirtualRegister( |
3185 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3186 | : &Mips::MSA128WEvensRegClass); |
3187 | |
3188 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: Lane); |
3189 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, flags: 0, SubReg: Mips::sub_lo); |
3190 | } |
3191 | |
3192 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3193 | return BB; |
3194 | } |
3195 | |
3196 | // Emit the COPY_FD pseudo instruction. |
3197 | // |
3198 | // copy_fd_pseudo $fd, $ws, n |
3199 | // => |
3200 | // splati.d $wt, $ws, $n |
3201 | // copy $fd, $wt:sub_64 |
3202 | // |
3203 | // When n is zero, the equivalent operation can be performed with (potentially) |
3204 | // zero instructions due to register overlaps. This optimization is always |
3205 | // valid because FR=1 mode which is the only supported mode in MSA. |
3206 | MachineBasicBlock * |
3207 | MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, |
3208 | MachineBasicBlock *BB) const { |
3209 | assert(Subtarget.isFP64bit()); |
3210 | |
3211 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3212 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3213 | Register Fd = MI.getOperand(i: 0).getReg(); |
3214 | Register Ws = MI.getOperand(i: 1).getReg(); |
3215 | unsigned Lane = MI.getOperand(i: 2).getImm() * 2; |
3216 | DebugLoc DL = MI.getDebugLoc(); |
3217 | |
3218 | if (Lane == 0) |
3219 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Ws, flags: 0, SubReg: Mips::sub_64); |
3220 | else { |
3221 | Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3222 | |
3223 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: 1); |
3224 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, flags: 0, SubReg: Mips::sub_64); |
3225 | } |
3226 | |
3227 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3228 | return BB; |
3229 | } |
3230 | |
3231 | // Emit the INSERT_FW pseudo instruction. |
3232 | // |
3233 | // insert_fw_pseudo $wd, $wd_in, $n, $fs |
3234 | // => |
3235 | // subreg_to_reg $wt:sub_lo, $fs |
3236 | // insve_w $wd[$n], $wd_in, $wt[0] |
3237 | MachineBasicBlock * |
3238 | MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, |
3239 | MachineBasicBlock *BB) const { |
3240 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3241 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3242 | DebugLoc DL = MI.getDebugLoc(); |
3243 | Register Wd = MI.getOperand(i: 0).getReg(); |
3244 | Register Wd_in = MI.getOperand(i: 1).getReg(); |
3245 | unsigned Lane = MI.getOperand(i: 2).getImm(); |
3246 | Register Fs = MI.getOperand(i: 3).getReg(); |
3247 | Register Wt = RegInfo.createVirtualRegister( |
3248 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3249 | : &Mips::MSA128WEvensRegClass); |
3250 | |
3251 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt) |
3252 | .addImm(Val: 0) |
3253 | .addReg(RegNo: Fs) |
3254 | .addImm(Val: Mips::sub_lo); |
3255 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_W), DestReg: Wd) |
3256 | .addReg(RegNo: Wd_in) |
3257 | .addImm(Val: Lane) |
3258 | .addReg(RegNo: Wt) |
3259 | .addImm(Val: 0); |
3260 | |
3261 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3262 | return BB; |
3263 | } |
3264 | |
3265 | // Emit the INSERT_FD pseudo instruction. |
3266 | // |
3267 | // insert_fd_pseudo $wd, $fs, n |
3268 | // => |
3269 | // subreg_to_reg $wt:sub_64, $fs |
3270 | // insve_d $wd[$n], $wd_in, $wt[0] |
3271 | MachineBasicBlock * |
3272 | MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, |
3273 | MachineBasicBlock *BB) const { |
3274 | assert(Subtarget.isFP64bit()); |
3275 | |
3276 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3277 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3278 | DebugLoc DL = MI.getDebugLoc(); |
3279 | Register Wd = MI.getOperand(i: 0).getReg(); |
3280 | Register Wd_in = MI.getOperand(i: 1).getReg(); |
3281 | unsigned Lane = MI.getOperand(i: 2).getImm(); |
3282 | Register Fs = MI.getOperand(i: 3).getReg(); |
3283 | Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3284 | |
3285 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt) |
3286 | .addImm(Val: 0) |
3287 | .addReg(RegNo: Fs) |
3288 | .addImm(Val: Mips::sub_64); |
3289 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_D), DestReg: Wd) |
3290 | .addReg(RegNo: Wd_in) |
3291 | .addImm(Val: Lane) |
3292 | .addReg(RegNo: Wt) |
3293 | .addImm(Val: 0); |
3294 | |
3295 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3296 | return BB; |
3297 | } |
3298 | |
3299 | // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. |
3300 | // |
3301 | // For integer: |
3302 | // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) |
3303 | // => |
3304 | // (SLL $lanetmp1, $lane, <log2size) |
3305 | // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) |
3306 | // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) |
3307 | // (NEG $lanetmp2, $lanetmp1) |
3308 | // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) |
3309 | // |
3310 | // For floating point: |
3311 | // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) |
3312 | // => |
3313 | // (SUBREG_TO_REG $wt, $fs, <subreg>) |
3314 | // (SLL $lanetmp1, $lane, <log2size) |
3315 | // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) |
3316 | // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) |
3317 | // (NEG $lanetmp2, $lanetmp1) |
3318 | // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) |
3319 | MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( |
3320 | MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, |
3321 | bool IsFP) const { |
3322 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3323 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3324 | DebugLoc DL = MI.getDebugLoc(); |
3325 | Register Wd = MI.getOperand(i: 0).getReg(); |
3326 | Register SrcVecReg = MI.getOperand(i: 1).getReg(); |
3327 | Register LaneReg = MI.getOperand(i: 2).getReg(); |
3328 | Register SrcValReg = MI.getOperand(i: 3).getReg(); |
3329 | |
3330 | const TargetRegisterClass *VecRC = nullptr; |
3331 | // FIXME: This should be true for N32 too. |
3332 | const TargetRegisterClass *GPRRC = |
3333 | Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; |
3334 | unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; |
3335 | unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; |
3336 | unsigned EltLog2Size; |
3337 | unsigned InsertOp = 0; |
3338 | unsigned InsveOp = 0; |
3339 | switch (EltSizeInBytes) { |
3340 | default: |
3341 | llvm_unreachable("Unexpected size" ); |
3342 | case 1: |
3343 | EltLog2Size = 0; |
3344 | InsertOp = Mips::INSERT_B; |
3345 | InsveOp = Mips::INSVE_B; |
3346 | VecRC = &Mips::MSA128BRegClass; |
3347 | break; |
3348 | case 2: |
3349 | EltLog2Size = 1; |
3350 | InsertOp = Mips::INSERT_H; |
3351 | InsveOp = Mips::INSVE_H; |
3352 | VecRC = &Mips::MSA128HRegClass; |
3353 | break; |
3354 | case 4: |
3355 | EltLog2Size = 2; |
3356 | InsertOp = Mips::INSERT_W; |
3357 | InsveOp = Mips::INSVE_W; |
3358 | VecRC = &Mips::MSA128WRegClass; |
3359 | break; |
3360 | case 8: |
3361 | EltLog2Size = 3; |
3362 | InsertOp = Mips::INSERT_D; |
3363 | InsveOp = Mips::INSVE_D; |
3364 | VecRC = &Mips::MSA128DRegClass; |
3365 | break; |
3366 | } |
3367 | |
3368 | if (IsFP) { |
3369 | Register Wt = RegInfo.createVirtualRegister(RegClass: VecRC); |
3370 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt) |
3371 | .addImm(Val: 0) |
3372 | .addReg(RegNo: SrcValReg) |
3373 | .addImm(Val: EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); |
3374 | SrcValReg = Wt; |
3375 | } |
3376 | |
3377 | // Convert the lane index into a byte index |
3378 | if (EltSizeInBytes != 1) { |
3379 | Register LaneTmp1 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3380 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: ShiftOp), DestReg: LaneTmp1) |
3381 | .addReg(RegNo: LaneReg) |
3382 | .addImm(Val: EltLog2Size); |
3383 | LaneReg = LaneTmp1; |
3384 | } |
3385 | |
3386 | // Rotate bytes around so that the desired lane is element zero |
3387 | Register WdTmp1 = RegInfo.createVirtualRegister(RegClass: VecRC); |
3388 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: WdTmp1) |
3389 | .addReg(RegNo: SrcVecReg) |
3390 | .addReg(RegNo: SrcVecReg) |
3391 | .addReg(RegNo: LaneReg, flags: 0, SubReg: SubRegIdx); |
3392 | |
3393 | Register WdTmp2 = RegInfo.createVirtualRegister(RegClass: VecRC); |
3394 | if (IsFP) { |
3395 | // Use insve.df to insert to element zero |
3396 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsveOp), DestReg: WdTmp2) |
3397 | .addReg(RegNo: WdTmp1) |
3398 | .addImm(Val: 0) |
3399 | .addReg(RegNo: SrcValReg) |
3400 | .addImm(Val: 0); |
3401 | } else { |
3402 | // Use insert.df to insert to element zero |
3403 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsertOp), DestReg: WdTmp2) |
3404 | .addReg(RegNo: WdTmp1) |
3405 | .addReg(RegNo: SrcValReg) |
3406 | .addImm(Val: 0); |
3407 | } |
3408 | |
3409 | // Rotate elements the rest of the way for a full rotation. |
3410 | // sld.df inteprets $rt modulo the number of columns so we only need to negate |
3411 | // the lane index to do this. |
3412 | Register LaneTmp2 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3413 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), |
3414 | DestReg: LaneTmp2) |
3415 | .addReg(RegNo: Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) |
3416 | .addReg(RegNo: LaneReg); |
3417 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: Wd) |
3418 | .addReg(RegNo: WdTmp2) |
3419 | .addReg(RegNo: WdTmp2) |
3420 | .addReg(RegNo: LaneTmp2, flags: 0, SubReg: SubRegIdx); |
3421 | |
3422 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3423 | return BB; |
3424 | } |
3425 | |
3426 | // Emit the FILL_FW pseudo instruction. |
3427 | // |
3428 | // fill_fw_pseudo $wd, $fs |
3429 | // => |
3430 | // implicit_def $wt1 |
3431 | // insert_subreg $wt2:subreg_lo, $wt1, $fs |
3432 | // splati.w $wd, $wt2[0] |
3433 | MachineBasicBlock * |
3434 | MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, |
3435 | MachineBasicBlock *BB) const { |
3436 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3437 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3438 | DebugLoc DL = MI.getDebugLoc(); |
3439 | Register Wd = MI.getOperand(i: 0).getReg(); |
3440 | Register Fs = MI.getOperand(i: 1).getReg(); |
3441 | Register Wt1 = RegInfo.createVirtualRegister( |
3442 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3443 | : &Mips::MSA128WEvensRegClass); |
3444 | Register Wt2 = RegInfo.createVirtualRegister( |
3445 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3446 | : &Mips::MSA128WEvensRegClass); |
3447 | |
3448 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1); |
3449 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2) |
3450 | .addReg(RegNo: Wt1) |
3451 | .addReg(RegNo: Fs) |
3452 | .addImm(Val: Mips::sub_lo); |
3453 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0); |
3454 | |
3455 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3456 | return BB; |
3457 | } |
3458 | |
3459 | // Emit the FILL_FD pseudo instruction. |
3460 | // |
3461 | // fill_fd_pseudo $wd, $fs |
3462 | // => |
3463 | // implicit_def $wt1 |
3464 | // insert_subreg $wt2:subreg_64, $wt1, $fs |
3465 | // splati.d $wd, $wt2[0] |
3466 | MachineBasicBlock * |
3467 | MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, |
3468 | MachineBasicBlock *BB) const { |
3469 | assert(Subtarget.isFP64bit()); |
3470 | |
3471 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3472 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3473 | DebugLoc DL = MI.getDebugLoc(); |
3474 | Register Wd = MI.getOperand(i: 0).getReg(); |
3475 | Register Fs = MI.getOperand(i: 1).getReg(); |
3476 | Register Wt1 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3477 | Register Wt2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3478 | |
3479 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1); |
3480 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2) |
3481 | .addReg(RegNo: Wt1) |
3482 | .addReg(RegNo: Fs) |
3483 | .addImm(Val: Mips::sub_64); |
3484 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0); |
3485 | |
3486 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3487 | return BB; |
3488 | } |
3489 | |
3490 | // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA |
3491 | // register. |
3492 | // |
3493 | // STF16 MSA128F16:$wd, mem_simm10:$addr |
3494 | // => |
3495 | // copy_u.h $rtemp,$wd[0] |
3496 | // sh $rtemp, $addr |
3497 | // |
3498 | // Safety: We can't use st.h & co as they would over write the memory after |
3499 | // the destination. It would require half floats be allocated 16 bytes(!) of |
3500 | // space. |
3501 | MachineBasicBlock * |
3502 | MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, |
3503 | MachineBasicBlock *BB) const { |
3504 | |
3505 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3506 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3507 | DebugLoc DL = MI.getDebugLoc(); |
3508 | Register Ws = MI.getOperand(i: 0).getReg(); |
3509 | Register Rt = MI.getOperand(i: 1).getReg(); |
3510 | const MachineMemOperand &MMO = **MI.memoperands_begin(); |
3511 | unsigned Imm = MMO.getOffset(); |
3512 | |
3513 | // Caution: A load via the GOT can expand to a GPR32 operand, a load via |
3514 | // spill and reload can expand as a GPR64 operand. Examine the |
3515 | // operand in detail and default to ABI. |
3516 | const TargetRegisterClass *RC = |
3517 | MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg()) |
3518 | : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass |
3519 | : &Mips::GPR64RegClass); |
3520 | const bool UsingMips32 = RC == &Mips::GPR32RegClass; |
3521 | Register Rs = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass); |
3522 | |
3523 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_U_H), DestReg: Rs).addReg(RegNo: Ws).addImm(Val: 0); |
3524 | if(!UsingMips32) { |
3525 | Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR64RegClass); |
3526 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Tmp) |
3527 | .addImm(Val: 0) |
3528 | .addReg(RegNo: Rs) |
3529 | .addImm(Val: Mips::sub_32); |
3530 | Rs = Tmp; |
3531 | } |
3532 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::SH : Mips::SH64)) |
3533 | .addReg(RegNo: Rs) |
3534 | .addReg(RegNo: Rt) |
3535 | .addImm(Val: Imm) |
3536 | .addMemOperand(MMO: BB->getParent()->getMachineMemOperand( |
3537 | MMO: &MMO, Offset: MMO.getOffset(), Size: MMO.getSize())); |
3538 | |
3539 | MI.eraseFromParent(); |
3540 | return BB; |
3541 | } |
3542 | |
3543 | // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. |
3544 | // |
3545 | // LD_F16 MSA128F16:$wd, mem_simm10:$addr |
3546 | // => |
3547 | // lh $rtemp, $addr |
3548 | // fill.h $wd, $rtemp |
3549 | // |
3550 | // Safety: We can't use ld.h & co as they over-read from the source. |
3551 | // Additionally, if the address is not modulo 16, 2 cases can occur: |
3552 | // a) Segmentation fault as the load instruction reads from a memory page |
3553 | // memory it's not supposed to. |
3554 | // b) The load crosses an implementation specific boundary, requiring OS |
3555 | // intervention. |
3556 | MachineBasicBlock * |
3557 | MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, |
3558 | MachineBasicBlock *BB) const { |
3559 | |
3560 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3561 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3562 | DebugLoc DL = MI.getDebugLoc(); |
3563 | Register Wd = MI.getOperand(i: 0).getReg(); |
3564 | |
3565 | // Caution: A load via the GOT can expand to a GPR32 operand, a load via |
3566 | // spill and reload can expand as a GPR64 operand. Examine the |
3567 | // operand in detail and default to ABI. |
3568 | const TargetRegisterClass *RC = |
3569 | MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg()) |
3570 | : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass |
3571 | : &Mips::GPR64RegClass); |
3572 | |
3573 | const bool UsingMips32 = RC == &Mips::GPR32RegClass; |
3574 | Register Rt = RegInfo.createVirtualRegister(RegClass: RC); |
3575 | |
3576 | MachineInstrBuilder MIB = |
3577 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::LH : Mips::LH64), DestReg: Rt); |
3578 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) |
3579 | MIB.add(MO); |
3580 | |
3581 | if(!UsingMips32) { |
3582 | Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass); |
3583 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Tmp).addReg(RegNo: Rt, flags: 0, SubReg: Mips::sub_32); |
3584 | Rt = Tmp; |
3585 | } |
3586 | |
3587 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FILL_H), DestReg: Wd).addReg(RegNo: Rt); |
3588 | |
3589 | MI.eraseFromParent(); |
3590 | return BB; |
3591 | } |
3592 | |
3593 | // Emit the FPROUND_PSEUDO instruction. |
3594 | // |
3595 | // Round an FGR64Opnd, FGR32Opnd to an f16. |
3596 | // |
3597 | // Safety: Cycle the operand through the GPRs so the result always ends up |
3598 | // the correct MSA register. |
3599 | // |
3600 | // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs |
3601 | // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register |
3602 | // (which they can be, as the MSA registers are defined to alias the |
3603 | // FPU's 64 bit and 32 bit registers) the result can be accessed using |
3604 | // the correct register class. That requires operands be tie-able across |
3605 | // register classes which have a sub/super register class relationship. |
3606 | // |
3607 | // For FPG32Opnd: |
3608 | // |
3609 | // FPROUND MSA128F16:$wd, FGR32Opnd:$fs |
3610 | // => |
3611 | // mfc1 $rtemp, $fs |
3612 | // fill.w $rtemp, $wtemp |
3613 | // fexdo.w $wd, $wtemp, $wtemp |
3614 | // |
3615 | // For FPG64Opnd on mips32r2+: |
3616 | // |
3617 | // FPROUND MSA128F16:$wd, FGR64Opnd:$fs |
3618 | // => |
3619 | // mfc1 $rtemp, $fs |
3620 | // fill.w $rtemp, $wtemp |
3621 | // mfhc1 $rtemp2, $fs |
3622 | // insert.w $wtemp[1], $rtemp2 |
3623 | // insert.w $wtemp[3], $rtemp2 |
3624 | // fexdo.w $wtemp2, $wtemp, $wtemp |
3625 | // fexdo.h $wd, $temp2, $temp2 |
3626 | // |
3627 | // For FGR64Opnd on mips64r2+: |
3628 | // |
3629 | // FPROUND MSA128F16:$wd, FGR64Opnd:$fs |
3630 | // => |
3631 | // dmfc1 $rtemp, $fs |
3632 | // fill.d $rtemp, $wtemp |
3633 | // fexdo.w $wtemp2, $wtemp, $wtemp |
3634 | // fexdo.h $wd, $wtemp2, $wtemp2 |
3635 | // |
3636 | // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the |
3637 | // undef bits are "just right" and the exception enable bits are |
3638 | // set. By using fill.w to replicate $fs into all elements over |
3639 | // insert.w for one element, we avoid that potiential case. If |
3640 | // fexdo.[hw] causes an exception in, the exception is valid and it |
3641 | // occurs for all elements. |
3642 | MachineBasicBlock * |
3643 | MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, |
3644 | MachineBasicBlock *BB, |
3645 | bool IsFGR64) const { |
3646 | |
3647 | // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous |
3648 | // here. It's technically doable to support MIPS32 here, but the ISA forbids |
3649 | // it. |
3650 | assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); |
3651 | |
3652 | bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; |
3653 | bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; |
3654 | |
3655 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3656 | DebugLoc DL = MI.getDebugLoc(); |
3657 | Register Wd = MI.getOperand(i: 0).getReg(); |
3658 | Register Fs = MI.getOperand(i: 1).getReg(); |
3659 | |
3660 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3661 | Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3662 | const TargetRegisterClass *GPRRC = |
3663 | IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; |
3664 | unsigned MFC1Opc = IsFGR64onMips64 |
3665 | ? Mips::DMFC1 |
3666 | : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); |
3667 | unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; |
3668 | |
3669 | // Perform the register class copy as mentioned above. |
3670 | Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3671 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MFC1Opc), DestReg: Rtemp).addReg(RegNo: Fs); |
3672 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: FILLOpc), DestReg: Wtemp).addReg(RegNo: Rtemp); |
3673 | unsigned WPHI = Wtemp; |
3674 | |
3675 | if (IsFGR64onMips32) { |
3676 | Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3677 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MFHC1_D64), DestReg: Rtemp2).addReg(RegNo: Fs); |
3678 | Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3679 | Register Wtemp3 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3680 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp2) |
3681 | .addReg(RegNo: Wtemp) |
3682 | .addReg(RegNo: Rtemp2) |
3683 | .addImm(Val: 1); |
3684 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp3) |
3685 | .addReg(RegNo: Wtemp2) |
3686 | .addReg(RegNo: Rtemp2) |
3687 | .addImm(Val: 3); |
3688 | WPHI = Wtemp3; |
3689 | } |
3690 | |
3691 | if (IsFGR64) { |
3692 | Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3693 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_W), DestReg: Wtemp2) |
3694 | .addReg(RegNo: WPHI) |
3695 | .addReg(RegNo: WPHI); |
3696 | WPHI = Wtemp2; |
3697 | } |
3698 | |
3699 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_H), DestReg: Wd).addReg(RegNo: WPHI).addReg(RegNo: WPHI); |
3700 | |
3701 | MI.eraseFromParent(); |
3702 | return BB; |
3703 | } |
3704 | |
3705 | // Emit the FPEXTEND_PSEUDO instruction. |
3706 | // |
3707 | // Expand an f16 to either a FGR32Opnd or FGR64Opnd. |
3708 | // |
3709 | // Safety: Cycle the result through the GPRs so the result always ends up |
3710 | // the correct floating point register. |
3711 | // |
3712 | // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd |
3713 | // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register |
3714 | // (which they can be, as the MSA registers are defined to alias the |
3715 | // FPU's 64 bit and 32 bit registers) the result can be accessed using |
3716 | // the correct register class. That requires operands be tie-able across |
3717 | // register classes which have a sub/super register class relationship. I |
3718 | // haven't checked. |
3719 | // |
3720 | // For FGR32Opnd: |
3721 | // |
3722 | // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws |
3723 | // => |
3724 | // fexupr.w $wtemp, $ws |
3725 | // copy_s.w $rtemp, $ws[0] |
3726 | // mtc1 $rtemp, $fd |
3727 | // |
3728 | // For FGR64Opnd on Mips64: |
3729 | // |
3730 | // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws |
3731 | // => |
3732 | // fexupr.w $wtemp, $ws |
3733 | // fexupr.d $wtemp2, $wtemp |
3734 | // copy_s.d $rtemp, $wtemp2s[0] |
3735 | // dmtc1 $rtemp, $fd |
3736 | // |
3737 | // For FGR64Opnd on Mips32: |
3738 | // |
3739 | // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws |
3740 | // => |
3741 | // fexupr.w $wtemp, $ws |
3742 | // fexupr.d $wtemp2, $wtemp |
3743 | // copy_s.w $rtemp, $wtemp2[0] |
3744 | // mtc1 $rtemp, $ftemp |
3745 | // copy_s.w $rtemp2, $wtemp2[1] |
3746 | // $fd = mthc1 $rtemp2, $ftemp |
3747 | MachineBasicBlock * |
3748 | MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, |
3749 | MachineBasicBlock *BB, |
3750 | bool IsFGR64) const { |
3751 | |
3752 | // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous |
3753 | // here. It's technically doable to support MIPS32 here, but the ISA forbids |
3754 | // it. |
3755 | assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); |
3756 | |
3757 | bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; |
3758 | bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; |
3759 | |
3760 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3761 | DebugLoc DL = MI.getDebugLoc(); |
3762 | Register Fd = MI.getOperand(i: 0).getReg(); |
3763 | Register Ws = MI.getOperand(i: 1).getReg(); |
3764 | |
3765 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3766 | const TargetRegisterClass *GPRRC = |
3767 | IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; |
3768 | unsigned MTC1Opc = IsFGR64onMips64 |
3769 | ? Mips::DMTC1 |
3770 | : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); |
3771 | Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; |
3772 | |
3773 | Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3774 | Register WPHI = Wtemp; |
3775 | |
3776 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_W), DestReg: Wtemp).addReg(RegNo: Ws); |
3777 | if (IsFGR64) { |
3778 | WPHI = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3779 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_D), DestReg: WPHI).addReg(RegNo: Wtemp); |
3780 | } |
3781 | |
3782 | // Perform the safety regclass copy mentioned above. |
3783 | Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3784 | Register FPRPHI = IsFGR64onMips32 |
3785 | ? RegInfo.createVirtualRegister(RegClass: &Mips::FGR64RegClass) |
3786 | : Fd; |
3787 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: COPYOpc), DestReg: Rtemp).addReg(RegNo: WPHI).addImm(Val: 0); |
3788 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MTC1Opc), DestReg: FPRPHI).addReg(RegNo: Rtemp); |
3789 | |
3790 | if (IsFGR64onMips32) { |
3791 | Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3792 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_S_W), DestReg: Rtemp2) |
3793 | .addReg(RegNo: WPHI) |
3794 | .addImm(Val: 1); |
3795 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MTHC1_D64), DestReg: Fd) |
3796 | .addReg(RegNo: FPRPHI) |
3797 | .addReg(RegNo: Rtemp2); |
3798 | } |
3799 | |
3800 | MI.eraseFromParent(); |
3801 | return BB; |
3802 | } |
3803 | |
3804 | // Emit the FEXP2_W_1 pseudo instructions. |
3805 | // |
3806 | // fexp2_w_1_pseudo $wd, $wt |
3807 | // => |
3808 | // ldi.w $ws, 1 |
3809 | // fexp2.w $wd, $ws, $wt |
3810 | MachineBasicBlock * |
3811 | MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, |
3812 | MachineBasicBlock *BB) const { |
3813 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3814 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3815 | const TargetRegisterClass *RC = &Mips::MSA128WRegClass; |
3816 | Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC); |
3817 | Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC); |
3818 | DebugLoc DL = MI.getDebugLoc(); |
3819 | |
3820 | // Splat 1.0 into a vector |
3821 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_W), DestReg: Ws1).addImm(Val: 1); |
3822 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_W), DestReg: Ws2).addReg(RegNo: Ws1); |
3823 | |
3824 | // Emit 1.0 * fexp2(Wt) |
3825 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_W), DestReg: MI.getOperand(i: 0).getReg()) |
3826 | .addReg(RegNo: Ws2) |
3827 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
3828 | |
3829 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3830 | return BB; |
3831 | } |
3832 | |
3833 | // Emit the FEXP2_D_1 pseudo instructions. |
3834 | // |
3835 | // fexp2_d_1_pseudo $wd, $wt |
3836 | // => |
3837 | // ldi.d $ws, 1 |
3838 | // fexp2.d $wd, $ws, $wt |
3839 | MachineBasicBlock * |
3840 | MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, |
3841 | MachineBasicBlock *BB) const { |
3842 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3843 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3844 | const TargetRegisterClass *RC = &Mips::MSA128DRegClass; |
3845 | Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC); |
3846 | Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC); |
3847 | DebugLoc DL = MI.getDebugLoc(); |
3848 | |
3849 | // Splat 1.0 into a vector |
3850 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_D), DestReg: Ws1).addImm(Val: 1); |
3851 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_D), DestReg: Ws2).addReg(RegNo: Ws1); |
3852 | |
3853 | // Emit 1.0 * fexp2(Wt) |
3854 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_D), DestReg: MI.getOperand(i: 0).getReg()) |
3855 | .addReg(RegNo: Ws2) |
3856 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
3857 | |
3858 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3859 | return BB; |
3860 | } |
3861 | |