1 | //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Subclass of MipsTargetLowering specialized for mips32/64. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MipsSEISelLowering.h" |
14 | #include "MipsMachineFunction.h" |
15 | #include "MipsRegisterInfo.h" |
16 | #include "MipsSubtarget.h" |
17 | #include "llvm/ADT/APInt.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/CodeGen/CallingConvLower.h" |
21 | #include "llvm/CodeGen/ISDOpcodes.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineInstr.h" |
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
26 | #include "llvm/CodeGen/MachineMemOperand.h" |
27 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
28 | #include "llvm/CodeGen/SelectionDAG.h" |
29 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
30 | #include "llvm/CodeGen/TargetInstrInfo.h" |
31 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
32 | #include "llvm/CodeGen/ValueTypes.h" |
33 | #include "llvm/CodeGenTypes/MachineValueType.h" |
34 | #include "llvm/IR/DebugLoc.h" |
35 | #include "llvm/IR/Intrinsics.h" |
36 | #include "llvm/IR/IntrinsicsMips.h" |
37 | #include "llvm/Support/Casting.h" |
38 | #include "llvm/Support/CommandLine.h" |
39 | #include "llvm/Support/Debug.h" |
40 | #include "llvm/Support/ErrorHandling.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/TargetParser/Triple.h" |
43 | #include <algorithm> |
44 | #include <cassert> |
45 | #include <cstddef> |
46 | #include <cstdint> |
47 | #include <iterator> |
48 | #include <utility> |
49 | |
50 | using namespace llvm; |
51 | |
52 | #define DEBUG_TYPE "mips-isel" |
53 | |
54 | static cl::opt<bool> |
55 | UseMipsTailCalls("mips-tail-calls" , cl::Hidden, |
56 | cl::desc("MIPS: permit tail calls." ), cl::init(Val: false)); |
57 | |
58 | static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1" , cl::init(Val: false), |
59 | cl::desc("Expand double precision loads and " |
60 | "stores to their single precision " |
61 | "counterparts" )); |
62 | |
63 | // Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16, |
64 | // v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e. |
65 | // INST.h for v16, INST.w for v32, INST.d for v64. |
66 | TargetLoweringBase::LegalizeTypeAction |
67 | MipsSETargetLowering::getPreferredVectorAction(MVT VT) const { |
68 | if (this->Subtarget.hasMSA()) { |
69 | switch (VT.SimpleTy) { |
70 | // Leave v2i1 vectors to be promoted to larger ones. |
71 | // Other i1 types will be promoted by default. |
72 | case MVT::v2i1: |
73 | return TypePromoteInteger; |
74 | break; |
75 | // 16-bit vector types (v2 and longer) |
76 | case MVT::v2i8: |
77 | // 32-bit vector types (v2 and longer) |
78 | case MVT::v2i16: |
79 | case MVT::v4i8: |
80 | // 64-bit vector types (v2 and longer) |
81 | case MVT::v2i32: |
82 | case MVT::v4i16: |
83 | case MVT::v8i8: |
84 | return TypeWidenVector; |
85 | break; |
86 | // Only word (.w) and doubleword (.d) are available for floating point |
87 | // vectors. That means floating point vectors should be either v2f64 |
88 | // or v4f32. |
89 | // Here we only explicitly widen the f32 types - f16 will be promoted |
90 | // by default. |
91 | case MVT::v2f32: |
92 | case MVT::v3f32: |
93 | return TypeWidenVector; |
94 | // v2i64 is already 128-bit wide. |
95 | default: |
96 | break; |
97 | } |
98 | } |
99 | return TargetLoweringBase::getPreferredVectorAction(VT); |
100 | } |
101 | |
102 | MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, |
103 | const MipsSubtarget &STI) |
104 | : MipsTargetLowering(TM, STI) { |
105 | // Set up the register classes |
106 | addRegisterClass(VT: MVT::i32, RC: &Mips::GPR32RegClass); |
107 | |
108 | if (Subtarget.isGP64bit()) |
109 | addRegisterClass(VT: MVT::i64, RC: &Mips::GPR64RegClass); |
110 | |
111 | if (Subtarget.hasDSP() || Subtarget.hasMSA()) { |
112 | // Expand all truncating stores and extending loads. |
113 | for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { |
114 | for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { |
115 | setTruncStoreAction(ValVT: VT0, MemVT: VT1, Action: Expand); |
116 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand); |
117 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand); |
118 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand); |
119 | } |
120 | } |
121 | } |
122 | |
123 | if (Subtarget.hasDSP()) { |
124 | MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; |
125 | |
126 | for (const auto &VecTy : VecTys) { |
127 | addRegisterClass(VT: VecTy, RC: &Mips::DSPRRegClass); |
128 | |
129 | // Expand all builtin opcodes. |
130 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
131 | setOperationAction(Op: Opc, VT: VecTy, Action: Expand); |
132 | |
133 | setOperationAction(Op: ISD::ADD, VT: VecTy, Action: Legal); |
134 | setOperationAction(Op: ISD::SUB, VT: VecTy, Action: Legal); |
135 | setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Legal); |
136 | setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Legal); |
137 | setOperationAction(Op: ISD::BITCAST, VT: VecTy, Action: Legal); |
138 | } |
139 | |
140 | setTargetDAGCombine( |
141 | {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); |
142 | |
143 | if (Subtarget.hasMips32r2()) { |
144 | setOperationAction(Op: ISD::ADDC, VT: MVT::i32, Action: Legal); |
145 | setOperationAction(Op: ISD::ADDE, VT: MVT::i32, Action: Legal); |
146 | } |
147 | } |
148 | |
149 | if (Subtarget.hasDSPR2()) |
150 | setOperationAction(Op: ISD::MUL, VT: MVT::v2i16, Action: Legal); |
151 | |
152 | if (Subtarget.hasMSA()) { |
153 | addMSAIntType(Ty: MVT::v16i8, RC: &Mips::MSA128BRegClass); |
154 | addMSAIntType(Ty: MVT::v8i16, RC: &Mips::MSA128HRegClass); |
155 | addMSAIntType(Ty: MVT::v4i32, RC: &Mips::MSA128WRegClass); |
156 | addMSAIntType(Ty: MVT::v2i64, RC: &Mips::MSA128DRegClass); |
157 | addMSAFloatType(Ty: MVT::v8f16, RC: &Mips::MSA128HRegClass); |
158 | addMSAFloatType(Ty: MVT::v4f32, RC: &Mips::MSA128WRegClass); |
159 | addMSAFloatType(Ty: MVT::v2f64, RC: &Mips::MSA128DRegClass); |
160 | |
161 | // f16 is a storage-only type, always promote it to f32. |
162 | addRegisterClass(VT: MVT::f16, RC: &Mips::MSA128HRegClass); |
163 | setOperationAction(Op: ISD::SETCC, VT: MVT::f16, Action: Promote); |
164 | setOperationAction(Op: ISD::BR_CC, VT: MVT::f16, Action: Promote); |
165 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f16, Action: Promote); |
166 | setOperationAction(Op: ISD::SELECT, VT: MVT::f16, Action: Promote); |
167 | setOperationAction(Op: ISD::FADD, VT: MVT::f16, Action: Promote); |
168 | setOperationAction(Op: ISD::FSUB, VT: MVT::f16, Action: Promote); |
169 | setOperationAction(Op: ISD::FMUL, VT: MVT::f16, Action: Promote); |
170 | setOperationAction(Op: ISD::FDIV, VT: MVT::f16, Action: Promote); |
171 | setOperationAction(Op: ISD::FREM, VT: MVT::f16, Action: Promote); |
172 | setOperationAction(Op: ISD::FMA, VT: MVT::f16, Action: Promote); |
173 | setOperationAction(Op: ISD::FNEG, VT: MVT::f16, Action: Promote); |
174 | setOperationAction(Op: ISD::FABS, VT: MVT::f16, Action: Promote); |
175 | setOperationAction(Op: ISD::FCEIL, VT: MVT::f16, Action: Promote); |
176 | setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f16, Action: Promote); |
177 | setOperationAction(Op: ISD::FCOS, VT: MVT::f16, Action: Promote); |
178 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f16, Action: Promote); |
179 | setOperationAction(Op: ISD::FFLOOR, VT: MVT::f16, Action: Promote); |
180 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f16, Action: Promote); |
181 | setOperationAction(Op: ISD::FPOW, VT: MVT::f16, Action: Promote); |
182 | setOperationAction(Op: ISD::FPOWI, VT: MVT::f16, Action: Promote); |
183 | setOperationAction(Op: ISD::FRINT, VT: MVT::f16, Action: Promote); |
184 | setOperationAction(Op: ISD::FSIN, VT: MVT::f16, Action: Promote); |
185 | setOperationAction(Op: ISD::FSINCOS, VT: MVT::f16, Action: Promote); |
186 | setOperationAction(Op: ISD::FSQRT, VT: MVT::f16, Action: Promote); |
187 | setOperationAction(Op: ISD::FEXP, VT: MVT::f16, Action: Promote); |
188 | setOperationAction(Op: ISD::FEXP2, VT: MVT::f16, Action: Promote); |
189 | setOperationAction(Op: ISD::FLOG, VT: MVT::f16, Action: Promote); |
190 | setOperationAction(Op: ISD::FLOG2, VT: MVT::f16, Action: Promote); |
191 | setOperationAction(Op: ISD::FLOG10, VT: MVT::f16, Action: Promote); |
192 | setOperationAction(Op: ISD::FROUND, VT: MVT::f16, Action: Promote); |
193 | setOperationAction(Op: ISD::FTRUNC, VT: MVT::f16, Action: Promote); |
194 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f16, Action: Promote); |
195 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f16, Action: Promote); |
196 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f16, Action: Promote); |
197 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f16, Action: Promote); |
198 | |
199 | setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); |
200 | } |
201 | |
202 | if (!Subtarget.useSoftFloat()) { |
203 | addRegisterClass(VT: MVT::f32, RC: &Mips::FGR32RegClass); |
204 | |
205 | // When dealing with single precision only, use libcalls |
206 | if (!Subtarget.isSingleFloat()) { |
207 | if (Subtarget.isFP64bit()) |
208 | addRegisterClass(VT: MVT::f64, RC: &Mips::FGR64RegClass); |
209 | else |
210 | addRegisterClass(VT: MVT::f64, RC: &Mips::AFGR64RegClass); |
211 | } |
212 | } |
213 | |
214 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Custom); |
215 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Custom); |
216 | setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Custom); |
217 | setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Custom); |
218 | |
219 | if (Subtarget.hasCnMips()) |
220 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal); |
221 | else if (Subtarget.isGP64bit()) |
222 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom); |
223 | |
224 | if (Subtarget.isGP64bit()) { |
225 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom); |
226 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom); |
227 | setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom); |
228 | setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom); |
229 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Custom); |
230 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Custom); |
231 | } |
232 | |
233 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom); |
234 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom); |
235 | |
236 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Custom); |
237 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Custom); |
238 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
239 | if (Subtarget.hasMips32r6()) { |
240 | setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Legal); |
241 | setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Legal); |
242 | } else { |
243 | setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom); |
244 | setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Custom); |
245 | } |
246 | |
247 | setTargetDAGCombine(ISD::MUL); |
248 | |
249 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
250 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom); |
251 | setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom); |
252 | |
253 | if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && |
254 | !Subtarget.hasMips64()) { |
255 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom); |
256 | } |
257 | |
258 | if (NoDPLoadStore) { |
259 | setOperationAction(Op: ISD::LOAD, VT: MVT::f64, Action: Custom); |
260 | setOperationAction(Op: ISD::STORE, VT: MVT::f64, Action: Custom); |
261 | } |
262 | |
263 | if (Subtarget.hasMips32r6()) { |
264 | // MIPS32r6 replaces the accumulator-based multiplies with a three register |
265 | // instruction |
266 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand); |
267 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand); |
268 | setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Legal); |
269 | setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Legal); |
270 | setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Legal); |
271 | |
272 | // MIPS32r6 replaces the accumulator-based division/remainder with separate |
273 | // three register division and remainder instructions. |
274 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand); |
275 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand); |
276 | setOperationAction(Op: ISD::SDIV, VT: MVT::i32, Action: Legal); |
277 | setOperationAction(Op: ISD::UDIV, VT: MVT::i32, Action: Legal); |
278 | setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal); |
279 | setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal); |
280 | |
281 | // MIPS32r6 replaces conditional moves with an equivalent that removes the |
282 | // need for three GPR read ports. |
283 | setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Legal); |
284 | setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Legal); |
285 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Expand); |
286 | |
287 | setOperationAction(Op: ISD::SETCC, VT: MVT::f32, Action: Legal); |
288 | setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Legal); |
289 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand); |
290 | |
291 | assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6" ); |
292 | setOperationAction(Op: ISD::SETCC, VT: MVT::f64, Action: Legal); |
293 | setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Custom); |
294 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand); |
295 | |
296 | setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Legal); |
297 | |
298 | // Floating point > and >= are supported via < and <= |
299 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand); |
300 | setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand); |
301 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f32, Action: Expand); |
302 | setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand); |
303 | |
304 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand); |
305 | setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f64, Action: Expand); |
306 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f64, Action: Expand); |
307 | setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand); |
308 | } |
309 | |
310 | if (Subtarget.hasMips64r6()) { |
311 | // MIPS64r6 replaces the accumulator-based multiplies with a three register |
312 | // instruction |
313 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand); |
314 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand); |
315 | setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal); |
316 | setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Legal); |
317 | setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Legal); |
318 | |
319 | // MIPS32r6 replaces the accumulator-based division/remainder with separate |
320 | // three register division and remainder instructions. |
321 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand); |
322 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand); |
323 | setOperationAction(Op: ISD::SDIV, VT: MVT::i64, Action: Legal); |
324 | setOperationAction(Op: ISD::UDIV, VT: MVT::i64, Action: Legal); |
325 | setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal); |
326 | setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal); |
327 | |
328 | // MIPS64r6 replaces conditional moves with an equivalent that removes the |
329 | // need for three GPR read ports. |
330 | setOperationAction(Op: ISD::SETCC, VT: MVT::i64, Action: Legal); |
331 | setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Legal); |
332 | setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: Expand); |
333 | } |
334 | |
335 | computeRegisterProperties(TRI: Subtarget.getRegisterInfo()); |
336 | } |
337 | |
338 | const MipsTargetLowering * |
339 | llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, |
340 | const MipsSubtarget &STI) { |
341 | return new MipsSETargetLowering(TM, STI); |
342 | } |
343 | |
344 | const TargetRegisterClass * |
345 | MipsSETargetLowering::getRepRegClassFor(MVT VT) const { |
346 | if (VT == MVT::Untyped) |
347 | return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; |
348 | |
349 | return TargetLowering::getRepRegClassFor(VT); |
350 | } |
351 | |
352 | // Enable MSA support for the given integer type and Register class. |
353 | void MipsSETargetLowering:: |
354 | addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { |
355 | addRegisterClass(VT: Ty, RC); |
356 | |
357 | // Expand all builtin opcodes. |
358 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
359 | setOperationAction(Op: Opc, VT: Ty, Action: Expand); |
360 | |
361 | setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal); |
362 | setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal); |
363 | setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal); |
364 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Custom); |
365 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal); |
366 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom); |
367 | setOperationAction(Op: ISD::UNDEF, VT: Ty, Action: Legal); |
368 | |
369 | setOperationAction(Op: ISD::ADD, VT: Ty, Action: Legal); |
370 | setOperationAction(Op: ISD::AND, VT: Ty, Action: Legal); |
371 | setOperationAction(Op: ISD::CTLZ, VT: Ty, Action: Legal); |
372 | setOperationAction(Op: ISD::CTPOP, VT: Ty, Action: Legal); |
373 | setOperationAction(Op: ISD::MUL, VT: Ty, Action: Legal); |
374 | setOperationAction(Op: ISD::OR, VT: Ty, Action: Legal); |
375 | setOperationAction(Op: ISD::SDIV, VT: Ty, Action: Legal); |
376 | setOperationAction(Op: ISD::SREM, VT: Ty, Action: Legal); |
377 | setOperationAction(Op: ISD::SHL, VT: Ty, Action: Legal); |
378 | setOperationAction(Op: ISD::SRA, VT: Ty, Action: Legal); |
379 | setOperationAction(Op: ISD::SRL, VT: Ty, Action: Legal); |
380 | setOperationAction(Op: ISD::SUB, VT: Ty, Action: Legal); |
381 | setOperationAction(Op: ISD::SMAX, VT: Ty, Action: Legal); |
382 | setOperationAction(Op: ISD::SMIN, VT: Ty, Action: Legal); |
383 | setOperationAction(Op: ISD::UDIV, VT: Ty, Action: Legal); |
384 | setOperationAction(Op: ISD::UREM, VT: Ty, Action: Legal); |
385 | setOperationAction(Op: ISD::UMAX, VT: Ty, Action: Legal); |
386 | setOperationAction(Op: ISD::UMIN, VT: Ty, Action: Legal); |
387 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: Ty, Action: Custom); |
388 | setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal); |
389 | setOperationAction(Op: ISD::XOR, VT: Ty, Action: Legal); |
390 | |
391 | if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { |
392 | setOperationAction(Op: ISD::FP_TO_SINT, VT: Ty, Action: Legal); |
393 | setOperationAction(Op: ISD::FP_TO_UINT, VT: Ty, Action: Legal); |
394 | setOperationAction(Op: ISD::SINT_TO_FP, VT: Ty, Action: Legal); |
395 | setOperationAction(Op: ISD::UINT_TO_FP, VT: Ty, Action: Legal); |
396 | } |
397 | |
398 | setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal); |
399 | setCondCodeAction(CCs: ISD::SETNE, VT: Ty, Action: Expand); |
400 | setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand); |
401 | setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand); |
402 | setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand); |
403 | setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand); |
404 | } |
405 | |
406 | // Enable MSA support for the given floating-point type and Register class. |
407 | void MipsSETargetLowering:: |
408 | addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { |
409 | addRegisterClass(VT: Ty, RC); |
410 | |
411 | // Expand all builtin opcodes. |
412 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
413 | setOperationAction(Op: Opc, VT: Ty, Action: Expand); |
414 | |
415 | setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal); |
416 | setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal); |
417 | setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal); |
418 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Legal); |
419 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal); |
420 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom); |
421 | |
422 | if (Ty != MVT::v8f16) { |
423 | setOperationAction(Op: ISD::FABS, VT: Ty, Action: Legal); |
424 | setOperationAction(Op: ISD::FADD, VT: Ty, Action: Legal); |
425 | setOperationAction(Op: ISD::FDIV, VT: Ty, Action: Legal); |
426 | setOperationAction(Op: ISD::FEXP2, VT: Ty, Action: Legal); |
427 | setOperationAction(Op: ISD::FLOG2, VT: Ty, Action: Legal); |
428 | setOperationAction(Op: ISD::FMA, VT: Ty, Action: Legal); |
429 | setOperationAction(Op: ISD::FMUL, VT: Ty, Action: Legal); |
430 | setOperationAction(Op: ISD::FRINT, VT: Ty, Action: Legal); |
431 | setOperationAction(Op: ISD::FSQRT, VT: Ty, Action: Legal); |
432 | setOperationAction(Op: ISD::FSUB, VT: Ty, Action: Legal); |
433 | setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal); |
434 | |
435 | setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal); |
436 | setCondCodeAction(CCs: ISD::SETOGE, VT: Ty, Action: Expand); |
437 | setCondCodeAction(CCs: ISD::SETOGT, VT: Ty, Action: Expand); |
438 | setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand); |
439 | setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand); |
440 | setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand); |
441 | setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand); |
442 | } |
443 | } |
444 | |
445 | SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
446 | if(!Subtarget.hasMips32r6()) |
447 | return MipsTargetLowering::LowerOperation(Op, DAG); |
448 | |
449 | EVT ResTy = Op->getValueType(ResNo: 0); |
450 | SDLoc DL(Op); |
451 | |
452 | // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the |
453 | // floating point register are undefined. Not really an issue as sel.d, which |
454 | // is produced from an FSELECT node, only looks at bit 0. |
455 | SDValue Tmp = DAG.getNode(Opcode: MipsISD::MTC1_D64, DL, VT: MVT::f64, Operand: Op->getOperand(Num: 0)); |
456 | return DAG.getNode(Opcode: MipsISD::FSELECT, DL, VT: ResTy, N1: Tmp, N2: Op->getOperand(Num: 1), |
457 | N3: Op->getOperand(Num: 2)); |
458 | } |
459 | |
460 | bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( |
461 | EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { |
462 | MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; |
463 | |
464 | if (Subtarget.systemSupportsUnalignedAccess()) { |
465 | // MIPS32r6/MIPS64r6 is required to support unaligned access. It's |
466 | // implementation defined whether this is handled by hardware, software, or |
467 | // a hybrid of the two but it's expected that most implementations will |
468 | // handle the majority of cases in hardware. |
469 | if (Fast) |
470 | *Fast = 1; |
471 | return true; |
472 | } else if (Subtarget.hasMips32r6()) { |
473 | return false; |
474 | } |
475 | |
476 | switch (SVT) { |
477 | case MVT::i64: |
478 | case MVT::i32: |
479 | if (Fast) |
480 | *Fast = 1; |
481 | return true; |
482 | default: |
483 | return false; |
484 | } |
485 | } |
486 | |
487 | SDValue MipsSETargetLowering::LowerOperation(SDValue Op, |
488 | SelectionDAG &DAG) const { |
489 | switch(Op.getOpcode()) { |
490 | case ISD::LOAD: return lowerLOAD(Op, DAG); |
491 | case ISD::STORE: return lowerSTORE(Op, DAG); |
492 | case ISD::SMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: true, DAG); |
493 | case ISD::UMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: true, HasHi: true, DAG); |
494 | case ISD::MULHS: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: false, HasHi: true, DAG); |
495 | case ISD::MULHU: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: false, HasHi: true, DAG); |
496 | case ISD::MUL: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: false, DAG); |
497 | case ISD::SDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRem, HasLo: true, HasHi: true, DAG); |
498 | case ISD::UDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRemU, HasLo: true, HasHi: true, |
499 | DAG); |
500 | case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
501 | case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); |
502 | case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); |
503 | case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
504 | case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); |
505 | case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); |
506 | case ISD::SELECT: return lowerSELECT(Op, DAG); |
507 | case ISD::BITCAST: return lowerBITCAST(Op, DAG); |
508 | } |
509 | |
510 | return MipsTargetLowering::LowerOperation(Op, DAG); |
511 | } |
512 | |
513 | // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT |
514 | // |
515 | // Performs the following transformations: |
516 | // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its |
517 | // sign/zero-extension is completely overwritten by the new one performed by |
518 | // the ISD::AND. |
519 | // - Removes redundant zero extensions performed by an ISD::AND. |
520 | static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, |
521 | TargetLowering::DAGCombinerInfo &DCI, |
522 | const MipsSubtarget &Subtarget) { |
523 | if (!Subtarget.hasMSA()) |
524 | return SDValue(); |
525 | |
526 | SDValue Op0 = N->getOperand(Num: 0); |
527 | SDValue Op1 = N->getOperand(Num: 1); |
528 | unsigned Op0Opcode = Op0->getOpcode(); |
529 | |
530 | // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) |
531 | // where $d + 1 == 2^n and n == 32 |
532 | // or $d + 1 == 2^n and n <= 32 and ZExt |
533 | // -> (MipsVExtractZExt $a, $b, $c) |
534 | if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || |
535 | Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { |
536 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Val&: Op1); |
537 | |
538 | if (!Mask) |
539 | return SDValue(); |
540 | |
541 | int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); |
542 | |
543 | if (Log2IfPositive <= 0) |
544 | return SDValue(); // Mask+1 is not a power of 2 |
545 | |
546 | SDValue Op0Op2 = Op0->getOperand(Num: 2); |
547 | EVT ExtendTy = cast<VTSDNode>(Val&: Op0Op2)->getVT(); |
548 | unsigned ExtendTySize = ExtendTy.getSizeInBits(); |
549 | unsigned Log2 = Log2IfPositive; |
550 | |
551 | if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || |
552 | Log2 == ExtendTySize) { |
553 | SDValue Ops[] = { Op0->getOperand(Num: 0), Op0->getOperand(Num: 1), Op0Op2 }; |
554 | return DAG.getNode(Opcode: MipsISD::VEXTRACT_ZEXT_ELT, DL: SDLoc(Op0), |
555 | VTList: Op0->getVTList(), |
556 | Ops: ArrayRef(Ops, Op0->getNumOperands())); |
557 | } |
558 | } |
559 | |
560 | return SDValue(); |
561 | } |
562 | |
563 | // Determine if the specified node is a constant vector splat. |
564 | // |
565 | // Returns true and sets Imm if: |
566 | // * N is a ISD::BUILD_VECTOR representing a constant splat |
567 | // |
568 | // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The |
569 | // differences are that it assumes the MSA has already been checked and the |
570 | // arbitrary requirement for a maximum of 32-bit integers isn't applied (and |
571 | // must not be in order for binsri.d to be selectable). |
572 | static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { |
573 | BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(Val: N.getNode()); |
574 | |
575 | if (!Node) |
576 | return false; |
577 | |
578 | APInt SplatValue, SplatUndef; |
579 | unsigned SplatBitSize; |
580 | bool HasAnyUndefs; |
581 | |
582 | if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
583 | MinSplatBits: 8, isBigEndian: !IsLittleEndian)) |
584 | return false; |
585 | |
586 | Imm = SplatValue; |
587 | |
588 | return true; |
589 | } |
590 | |
591 | // Test whether the given node is an all-ones build_vector. |
592 | static bool isVectorAllOnes(SDValue N) { |
593 | // Look through bitcasts. Endianness doesn't matter because we are looking |
594 | // for an all-ones value. |
595 | if (N->getOpcode() == ISD::BITCAST) |
596 | N = N->getOperand(Num: 0); |
597 | |
598 | BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val&: N); |
599 | |
600 | if (!BVN) |
601 | return false; |
602 | |
603 | APInt SplatValue, SplatUndef; |
604 | unsigned SplatBitSize; |
605 | bool HasAnyUndefs; |
606 | |
607 | // Endianness doesn't matter in this context because we are looking for |
608 | // an all-ones value. |
609 | if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) |
610 | return SplatValue.isAllOnes(); |
611 | |
612 | return false; |
613 | } |
614 | |
615 | // Test whether N is the bitwise inverse of OfNode. |
616 | static bool isBitwiseInverse(SDValue N, SDValue OfNode) { |
617 | if (N->getOpcode() != ISD::XOR) |
618 | return false; |
619 | |
620 | if (isVectorAllOnes(N: N->getOperand(Num: 0))) |
621 | return N->getOperand(Num: 1) == OfNode; |
622 | |
623 | if (isVectorAllOnes(N: N->getOperand(Num: 1))) |
624 | return N->getOperand(Num: 0) == OfNode; |
625 | |
626 | return false; |
627 | } |
628 | |
629 | // Perform combines where ISD::OR is the root node. |
630 | // |
631 | // Performs the following transformations: |
632 | // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) |
633 | // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit |
634 | // vector type. |
635 | static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, |
636 | TargetLowering::DAGCombinerInfo &DCI, |
637 | const MipsSubtarget &Subtarget) { |
638 | if (!Subtarget.hasMSA()) |
639 | return SDValue(); |
640 | |
641 | EVT Ty = N->getValueType(ResNo: 0); |
642 | |
643 | if (!Ty.is128BitVector()) |
644 | return SDValue(); |
645 | |
646 | SDValue Op0 = N->getOperand(Num: 0); |
647 | SDValue Op1 = N->getOperand(Num: 1); |
648 | |
649 | if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { |
650 | SDValue Op0Op0 = Op0->getOperand(Num: 0); |
651 | SDValue Op0Op1 = Op0->getOperand(Num: 1); |
652 | SDValue Op1Op0 = Op1->getOperand(Num: 0); |
653 | SDValue Op1Op1 = Op1->getOperand(Num: 1); |
654 | bool IsLittleEndian = !Subtarget.isLittle(); |
655 | |
656 | SDValue IfSet, IfClr, Cond; |
657 | bool IsConstantMask = false; |
658 | APInt Mask, InvMask; |
659 | |
660 | // If Op0Op0 is an appropriate mask, try to find it's inverse in either |
661 | // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while |
662 | // looking. |
663 | // IfClr will be set if we find a valid match. |
664 | if (isVSplat(N: Op0Op0, Imm&: Mask, IsLittleEndian)) { |
665 | Cond = Op0Op0; |
666 | IfSet = Op0Op1; |
667 | |
668 | if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) && |
669 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
670 | IfClr = Op1Op1; |
671 | else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) && |
672 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
673 | IfClr = Op1Op0; |
674 | |
675 | IsConstantMask = true; |
676 | } |
677 | |
678 | // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same |
679 | // thing again using this mask. |
680 | // IfClr will be set if we find a valid match. |
681 | if (!IfClr.getNode() && isVSplat(N: Op0Op1, Imm&: Mask, IsLittleEndian)) { |
682 | Cond = Op0Op1; |
683 | IfSet = Op0Op0; |
684 | |
685 | if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) && |
686 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
687 | IfClr = Op1Op1; |
688 | else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) && |
689 | Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) |
690 | IfClr = Op1Op0; |
691 | |
692 | IsConstantMask = true; |
693 | } |
694 | |
695 | // If IfClr is not yet set, try looking for a non-constant match. |
696 | // IfClr will be set if we find a valid match amongst the eight |
697 | // possibilities. |
698 | if (!IfClr.getNode()) { |
699 | if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op0)) { |
700 | Cond = Op1Op0; |
701 | IfSet = Op1Op1; |
702 | IfClr = Op0Op1; |
703 | } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op0)) { |
704 | Cond = Op1Op0; |
705 | IfSet = Op1Op1; |
706 | IfClr = Op0Op0; |
707 | } else if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op1)) { |
708 | Cond = Op1Op1; |
709 | IfSet = Op1Op0; |
710 | IfClr = Op0Op1; |
711 | } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op1)) { |
712 | Cond = Op1Op1; |
713 | IfSet = Op1Op0; |
714 | IfClr = Op0Op0; |
715 | } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op0)) { |
716 | Cond = Op0Op0; |
717 | IfSet = Op0Op1; |
718 | IfClr = Op1Op1; |
719 | } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op0)) { |
720 | Cond = Op0Op0; |
721 | IfSet = Op0Op1; |
722 | IfClr = Op1Op0; |
723 | } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op1)) { |
724 | Cond = Op0Op1; |
725 | IfSet = Op0Op0; |
726 | IfClr = Op1Op1; |
727 | } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op1)) { |
728 | Cond = Op0Op1; |
729 | IfSet = Op0Op0; |
730 | IfClr = Op1Op0; |
731 | } |
732 | } |
733 | |
734 | // At this point, IfClr will be set if we have a valid match. |
735 | if (!IfClr.getNode()) |
736 | return SDValue(); |
737 | |
738 | assert(Cond.getNode() && IfSet.getNode()); |
739 | |
740 | // Fold degenerate cases. |
741 | if (IsConstantMask) { |
742 | if (Mask.isAllOnes()) |
743 | return IfSet; |
744 | else if (Mask == 0) |
745 | return IfClr; |
746 | } |
747 | |
748 | // Transform the DAG into an equivalent VSELECT. |
749 | return DAG.getNode(Opcode: ISD::VSELECT, DL: SDLoc(N), VT: Ty, N1: Cond, N2: IfSet, N3: IfClr); |
750 | } |
751 | |
752 | return SDValue(); |
753 | } |
754 | |
755 | static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, |
756 | SelectionDAG &DAG, |
757 | const MipsSubtarget &Subtarget) { |
758 | // Estimate the number of operations the below transform will turn a |
759 | // constant multiply into. The number is approximately equal to the minimal |
760 | // number of powers of two that constant can be broken down to by adding |
761 | // or subtracting them. |
762 | // |
763 | // If we have taken more than 12[1] / 8[2] steps to attempt the |
764 | // optimization for a native sized value, it is more than likely that this |
765 | // optimization will make things worse. |
766 | // |
767 | // [1] MIPS64 requires 6 instructions at most to materialize any constant, |
768 | // multiplication requires at least 4 cycles, but another cycle (or two) |
769 | // to retrieve the result from the HI/LO registers. |
770 | // |
771 | // [2] For MIPS32, more than 8 steps is expensive as the constant could be |
772 | // materialized in 2 instructions, multiplication requires at least 4 |
773 | // cycles, but another cycle (or two) to retrieve the result from the |
774 | // HI/LO registers. |
775 | // |
776 | // TODO: |
777 | // - MaxSteps needs to consider the `VT` of the constant for the current |
778 | // target. |
779 | // - Consider to perform this optimization after type legalization. |
780 | // That allows to remove a workaround for types not supported natively. |
781 | // - Take in account `-Os, -Oz` flags because this optimization |
782 | // increases code size. |
783 | unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; |
784 | |
785 | SmallVector<APInt, 16> WorkStack(1, C); |
786 | unsigned Steps = 0; |
787 | unsigned BitWidth = C.getBitWidth(); |
788 | |
789 | while (!WorkStack.empty()) { |
790 | APInt Val = WorkStack.pop_back_val(); |
791 | |
792 | if (Val == 0 || Val == 1) |
793 | continue; |
794 | |
795 | if (Steps >= MaxSteps) |
796 | return false; |
797 | |
798 | if (Val.isPowerOf2()) { |
799 | ++Steps; |
800 | continue; |
801 | } |
802 | |
803 | APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); |
804 | APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) |
805 | : APInt(BitWidth, 1) << C.ceilLogBase2(); |
806 | if ((Val - Floor).ule(RHS: Ceil - Val)) { |
807 | WorkStack.push_back(Elt: Floor); |
808 | WorkStack.push_back(Elt: Val - Floor); |
809 | } else { |
810 | WorkStack.push_back(Elt: Ceil); |
811 | WorkStack.push_back(Elt: Ceil - Val); |
812 | } |
813 | |
814 | ++Steps; |
815 | } |
816 | |
817 | // If the value being multiplied is not supported natively, we have to pay |
818 | // an additional legalization cost, conservatively assume an increase in the |
819 | // cost of 3 instructions per step. This values for this heuristic were |
820 | // determined experimentally. |
821 | unsigned RegisterSize = DAG.getTargetLoweringInfo() |
822 | .getRegisterType(Context&: *DAG.getContext(), VT) |
823 | .getSizeInBits(); |
824 | Steps *= (VT.getSizeInBits() != RegisterSize) * 3; |
825 | if (Steps > 27) |
826 | return false; |
827 | |
828 | return true; |
829 | } |
830 | |
831 | static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, |
832 | EVT ShiftTy, SelectionDAG &DAG) { |
833 | // Return 0. |
834 | if (C == 0) |
835 | return DAG.getConstant(Val: 0, DL, VT); |
836 | |
837 | // Return x. |
838 | if (C == 1) |
839 | return X; |
840 | |
841 | // If c is power of 2, return (shl x, log2(c)). |
842 | if (C.isPowerOf2()) |
843 | return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, |
844 | N2: DAG.getConstant(Val: C.logBase2(), DL, VT: ShiftTy)); |
845 | |
846 | unsigned BitWidth = C.getBitWidth(); |
847 | APInt Floor = APInt(BitWidth, 1) << C.logBase2(); |
848 | APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : |
849 | APInt(BitWidth, 1) << C.ceilLogBase2(); |
850 | |
851 | // If |c - floor_c| <= |c - ceil_c|, |
852 | // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), |
853 | // return (add constMult(x, floor_c), constMult(x, c - floor_c)). |
854 | if ((C - Floor).ule(RHS: Ceil - C)) { |
855 | SDValue Op0 = genConstMult(X, C: Floor, DL, VT, ShiftTy, DAG); |
856 | SDValue Op1 = genConstMult(X, C: C - Floor, DL, VT, ShiftTy, DAG); |
857 | return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1); |
858 | } |
859 | |
860 | // If |c - floor_c| > |c - ceil_c|, |
861 | // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). |
862 | SDValue Op0 = genConstMult(X, C: Ceil, DL, VT, ShiftTy, DAG); |
863 | SDValue Op1 = genConstMult(X, C: Ceil - C, DL, VT, ShiftTy, DAG); |
864 | return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1); |
865 | } |
866 | |
867 | static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, |
868 | const TargetLowering::DAGCombinerInfo &DCI, |
869 | const MipsSETargetLowering *TL, |
870 | const MipsSubtarget &Subtarget) { |
871 | EVT VT = N->getValueType(ResNo: 0); |
872 | |
873 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) |
874 | if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( |
875 | C: C->getAPIntValue(), VT, DAG, Subtarget)) |
876 | return genConstMult(X: N->getOperand(Num: 0), C: C->getAPIntValue(), DL: SDLoc(N), VT, |
877 | ShiftTy: TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), |
878 | DAG); |
879 | |
880 | return SDValue(N, 0); |
881 | } |
882 | |
883 | static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, |
884 | SelectionDAG &DAG, |
885 | const MipsSubtarget &Subtarget) { |
886 | // See if this is a vector splat immediate node. |
887 | APInt SplatValue, SplatUndef; |
888 | unsigned SplatBitSize; |
889 | bool HasAnyUndefs; |
890 | unsigned EltSize = Ty.getScalarSizeInBits(); |
891 | BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 1)); |
892 | |
893 | if (!Subtarget.hasDSP()) |
894 | return SDValue(); |
895 | |
896 | if (!BV || |
897 | !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, |
898 | MinSplatBits: EltSize, isBigEndian: !Subtarget.isLittle()) || |
899 | (SplatBitSize != EltSize) || |
900 | (SplatValue.getZExtValue() >= EltSize)) |
901 | return SDValue(); |
902 | |
903 | SDLoc DL(N); |
904 | return DAG.getNode(Opcode: Opc, DL, VT: Ty, N1: N->getOperand(Num: 0), |
905 | N2: DAG.getConstant(Val: SplatValue.getZExtValue(), DL, VT: MVT::i32)); |
906 | } |
907 | |
908 | static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, |
909 | TargetLowering::DAGCombinerInfo &DCI, |
910 | const MipsSubtarget &Subtarget) { |
911 | EVT Ty = N->getValueType(ResNo: 0); |
912 | |
913 | if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) |
914 | return SDValue(); |
915 | |
916 | return performDSPShiftCombine(Opc: MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); |
917 | } |
918 | |
919 | // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold |
920 | // constant splats into MipsISD::SHRA_DSP for DSPr2. |
921 | // |
922 | // Performs the following transformations: |
923 | // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its |
924 | // sign/zero-extension is completely overwritten by the new one performed by |
925 | // the ISD::SRA and ISD::SHL nodes. |
926 | // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL |
927 | // sequence. |
928 | // |
929 | // See performDSPShiftCombine for more information about the transformation |
930 | // used for DSPr2. |
931 | static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, |
932 | TargetLowering::DAGCombinerInfo &DCI, |
933 | const MipsSubtarget &Subtarget) { |
934 | EVT Ty = N->getValueType(ResNo: 0); |
935 | |
936 | if (Subtarget.hasMSA()) { |
937 | SDValue Op0 = N->getOperand(Num: 0); |
938 | SDValue Op1 = N->getOperand(Num: 1); |
939 | |
940 | // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) |
941 | // where $d + sizeof($c) == 32 |
942 | // or $d + sizeof($c) <= 32 and SExt |
943 | // -> (MipsVExtractSExt $a, $b, $c) |
944 | if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(Num: 1)) { |
945 | SDValue Op0Op0 = Op0->getOperand(Num: 0); |
946 | ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Val&: Op1); |
947 | |
948 | if (!ShAmount) |
949 | return SDValue(); |
950 | |
951 | if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && |
952 | Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) |
953 | return SDValue(); |
954 | |
955 | EVT ExtendTy = cast<VTSDNode>(Val: Op0Op0->getOperand(Num: 2))->getVT(); |
956 | unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); |
957 | |
958 | if (TotalBits == 32 || |
959 | (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && |
960 | TotalBits <= 32)) { |
961 | SDValue Ops[] = { Op0Op0->getOperand(Num: 0), Op0Op0->getOperand(Num: 1), |
962 | Op0Op0->getOperand(Num: 2) }; |
963 | return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL: SDLoc(Op0Op0), |
964 | VTList: Op0Op0->getVTList(), |
965 | Ops: ArrayRef(Ops, Op0Op0->getNumOperands())); |
966 | } |
967 | } |
968 | } |
969 | |
970 | if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) |
971 | return SDValue(); |
972 | |
973 | return performDSPShiftCombine(Opc: MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); |
974 | } |
975 | |
976 | |
977 | static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, |
978 | TargetLowering::DAGCombinerInfo &DCI, |
979 | const MipsSubtarget &Subtarget) { |
980 | EVT Ty = N->getValueType(ResNo: 0); |
981 | |
982 | if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) |
983 | return SDValue(); |
984 | |
985 | return performDSPShiftCombine(Opc: MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); |
986 | } |
987 | |
988 | static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { |
989 | bool IsV216 = (Ty == MVT::v2i16); |
990 | |
991 | switch (CC) { |
992 | case ISD::SETEQ: |
993 | case ISD::SETNE: return true; |
994 | case ISD::SETLT: |
995 | case ISD::SETLE: |
996 | case ISD::SETGT: |
997 | case ISD::SETGE: return IsV216; |
998 | case ISD::SETULT: |
999 | case ISD::SETULE: |
1000 | case ISD::SETUGT: |
1001 | case ISD::SETUGE: return !IsV216; |
1002 | default: return false; |
1003 | } |
1004 | } |
1005 | |
1006 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { |
1007 | EVT Ty = N->getValueType(ResNo: 0); |
1008 | |
1009 | if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) |
1010 | return SDValue(); |
1011 | |
1012 | if (!isLegalDSPCondCode(Ty, CC: cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get())) |
1013 | return SDValue(); |
1014 | |
1015 | return DAG.getNode(Opcode: MipsISD::SETCC_DSP, DL: SDLoc(N), VT: Ty, N1: N->getOperand(Num: 0), |
1016 | N2: N->getOperand(Num: 1), N3: N->getOperand(Num: 2)); |
1017 | } |
1018 | |
1019 | static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { |
1020 | EVT Ty = N->getValueType(ResNo: 0); |
1021 | |
1022 | if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { |
1023 | SDValue SetCC = N->getOperand(Num: 0); |
1024 | |
1025 | if (SetCC.getOpcode() != MipsISD::SETCC_DSP) |
1026 | return SDValue(); |
1027 | |
1028 | return DAG.getNode(Opcode: MipsISD::SELECT_CC_DSP, DL: SDLoc(N), VT: Ty, |
1029 | N1: SetCC.getOperand(i: 0), N2: SetCC.getOperand(i: 1), |
1030 | N3: N->getOperand(Num: 1), N4: N->getOperand(Num: 2), N5: SetCC.getOperand(i: 2)); |
1031 | } |
1032 | |
1033 | return SDValue(); |
1034 | } |
1035 | |
1036 | static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, |
1037 | const MipsSubtarget &Subtarget) { |
1038 | EVT Ty = N->getValueType(ResNo: 0); |
1039 | |
1040 | if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { |
1041 | // Try the following combines: |
1042 | // (xor (or $a, $b), (build_vector allones)) |
1043 | // (xor (or $a, $b), (bitcast (build_vector allones))) |
1044 | SDValue Op0 = N->getOperand(Num: 0); |
1045 | SDValue Op1 = N->getOperand(Num: 1); |
1046 | SDValue NotOp; |
1047 | |
1048 | if (ISD::isBuildVectorAllOnes(N: Op0.getNode())) |
1049 | NotOp = Op1; |
1050 | else if (ISD::isBuildVectorAllOnes(N: Op1.getNode())) |
1051 | NotOp = Op0; |
1052 | else |
1053 | return SDValue(); |
1054 | |
1055 | if (NotOp->getOpcode() == ISD::OR) |
1056 | return DAG.getNode(Opcode: MipsISD::VNOR, DL: SDLoc(N), VT: Ty, N1: NotOp->getOperand(Num: 0), |
1057 | N2: NotOp->getOperand(Num: 1)); |
1058 | } |
1059 | |
1060 | return SDValue(); |
1061 | } |
1062 | |
1063 | SDValue |
1064 | MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { |
1065 | SelectionDAG &DAG = DCI.DAG; |
1066 | SDValue Val; |
1067 | |
1068 | switch (N->getOpcode()) { |
1069 | case ISD::AND: |
1070 | Val = performANDCombine(N, DAG, DCI, Subtarget); |
1071 | break; |
1072 | case ISD::OR: |
1073 | Val = performORCombine(N, DAG, DCI, Subtarget); |
1074 | break; |
1075 | case ISD::MUL: |
1076 | return performMULCombine(N, DAG, DCI, TL: this, Subtarget); |
1077 | case ISD::SHL: |
1078 | Val = performSHLCombine(N, DAG, DCI, Subtarget); |
1079 | break; |
1080 | case ISD::SRA: |
1081 | return performSRACombine(N, DAG, DCI, Subtarget); |
1082 | case ISD::SRL: |
1083 | return performSRLCombine(N, DAG, DCI, Subtarget); |
1084 | case ISD::VSELECT: |
1085 | return performVSELECTCombine(N, DAG); |
1086 | case ISD::XOR: |
1087 | Val = performXORCombine(N, DAG, Subtarget); |
1088 | break; |
1089 | case ISD::SETCC: |
1090 | Val = performSETCCCombine(N, DAG); |
1091 | break; |
1092 | } |
1093 | |
1094 | if (Val.getNode()) { |
1095 | LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n" ; |
1096 | N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n" ; |
1097 | Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n" ); |
1098 | return Val; |
1099 | } |
1100 | |
1101 | return MipsTargetLowering::PerformDAGCombine(N, DCI); |
1102 | } |
1103 | |
1104 | MachineBasicBlock * |
1105 | MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
1106 | MachineBasicBlock *BB) const { |
1107 | switch (MI.getOpcode()) { |
1108 | default: |
1109 | return MipsTargetLowering::EmitInstrWithCustomInserter(MI, MBB: BB); |
1110 | case Mips::BPOSGE32_PSEUDO: |
1111 | return emitBPOSGE32(MI, BB); |
1112 | case Mips::SNZ_B_PSEUDO: |
1113 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_B); |
1114 | case Mips::SNZ_H_PSEUDO: |
1115 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_H); |
1116 | case Mips::SNZ_W_PSEUDO: |
1117 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_W); |
1118 | case Mips::SNZ_D_PSEUDO: |
1119 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_D); |
1120 | case Mips::SNZ_V_PSEUDO: |
1121 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_V); |
1122 | case Mips::SZ_B_PSEUDO: |
1123 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_B); |
1124 | case Mips::SZ_H_PSEUDO: |
1125 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_H); |
1126 | case Mips::SZ_W_PSEUDO: |
1127 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_W); |
1128 | case Mips::SZ_D_PSEUDO: |
1129 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_D); |
1130 | case Mips::SZ_V_PSEUDO: |
1131 | return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_V); |
1132 | case Mips::COPY_FW_PSEUDO: |
1133 | return emitCOPY_FW(MI, BB); |
1134 | case Mips::COPY_FD_PSEUDO: |
1135 | return emitCOPY_FD(MI, BB); |
1136 | case Mips::INSERT_FW_PSEUDO: |
1137 | return emitINSERT_FW(MI, BB); |
1138 | case Mips::INSERT_FD_PSEUDO: |
1139 | return emitINSERT_FD(MI, BB); |
1140 | case Mips::INSERT_B_VIDX_PSEUDO: |
1141 | case Mips::INSERT_B_VIDX64_PSEUDO: |
1142 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 1, IsFP: false); |
1143 | case Mips::INSERT_H_VIDX_PSEUDO: |
1144 | case Mips::INSERT_H_VIDX64_PSEUDO: |
1145 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 2, IsFP: false); |
1146 | case Mips::INSERT_W_VIDX_PSEUDO: |
1147 | case Mips::INSERT_W_VIDX64_PSEUDO: |
1148 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: false); |
1149 | case Mips::INSERT_D_VIDX_PSEUDO: |
1150 | case Mips::INSERT_D_VIDX64_PSEUDO: |
1151 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: false); |
1152 | case Mips::INSERT_FW_VIDX_PSEUDO: |
1153 | case Mips::INSERT_FW_VIDX64_PSEUDO: |
1154 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: true); |
1155 | case Mips::INSERT_FD_VIDX_PSEUDO: |
1156 | case Mips::INSERT_FD_VIDX64_PSEUDO: |
1157 | return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: true); |
1158 | case Mips::FILL_FW_PSEUDO: |
1159 | return emitFILL_FW(MI, BB); |
1160 | case Mips::FILL_FD_PSEUDO: |
1161 | return emitFILL_FD(MI, BB); |
1162 | case Mips::FEXP2_W_1_PSEUDO: |
1163 | return emitFEXP2_W_1(MI, BB); |
1164 | case Mips::FEXP2_D_1_PSEUDO: |
1165 | return emitFEXP2_D_1(MI, BB); |
1166 | case Mips::ST_F16: |
1167 | return emitST_F16_PSEUDO(MI, BB); |
1168 | case Mips::LD_F16: |
1169 | return emitLD_F16_PSEUDO(MI, BB); |
1170 | case Mips::MSA_FP_EXTEND_W_PSEUDO: |
1171 | return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: false); |
1172 | case Mips::MSA_FP_ROUND_W_PSEUDO: |
1173 | return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: false); |
1174 | case Mips::MSA_FP_EXTEND_D_PSEUDO: |
1175 | return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: true); |
1176 | case Mips::MSA_FP_ROUND_D_PSEUDO: |
1177 | return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: true); |
1178 | } |
1179 | } |
1180 | |
1181 | bool MipsSETargetLowering::isEligibleForTailCallOptimization( |
1182 | const CCState &CCInfo, unsigned NextStackOffset, |
1183 | const MipsFunctionInfo &FI) const { |
1184 | if (!UseMipsTailCalls) |
1185 | return false; |
1186 | |
1187 | // Exception has to be cleared with eret. |
1188 | if (FI.isISR()) |
1189 | return false; |
1190 | |
1191 | // Return false if either the callee or caller has a byval argument. |
1192 | if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) |
1193 | return false; |
1194 | |
1195 | // Return true if the callee's argument area is no larger than the |
1196 | // caller's. |
1197 | return NextStackOffset <= FI.getIncomingArgSize(); |
1198 | } |
1199 | |
1200 | void MipsSETargetLowering:: |
1201 | getOpndList(SmallVectorImpl<SDValue> &Ops, |
1202 | std::deque<std::pair<unsigned, SDValue>> &RegsToPass, |
1203 | bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, |
1204 | bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, |
1205 | SDValue Chain) const { |
1206 | Ops.push_back(Elt: Callee); |
1207 | MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, |
1208 | InternalLinkage, IsCallReloc, CLI, Callee, |
1209 | Chain); |
1210 | } |
1211 | |
1212 | SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { |
1213 | LoadSDNode &Nd = *cast<LoadSDNode>(Val&: Op); |
1214 | |
1215 | if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) |
1216 | return MipsTargetLowering::lowerLOAD(Op, DAG); |
1217 | |
1218 | // Replace a double precision load with two i32 loads and a buildpair64. |
1219 | SDLoc DL(Op); |
1220 | SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); |
1221 | EVT PtrVT = Ptr.getValueType(); |
1222 | |
1223 | // i32 load from lower address. |
1224 | SDValue Lo = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo: MachinePointerInfo(), |
1225 | Alignment: Nd.getAlign(), MMOFlags: Nd.getMemOperand()->getFlags()); |
1226 | |
1227 | // i32 load from higher address. |
1228 | Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT)); |
1229 | SDValue Hi = DAG.getLoad( |
1230 | VT: MVT::i32, dl: DL, Chain: Lo.getValue(R: 1), Ptr, PtrInfo: MachinePointerInfo(), |
1231 | Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), MMOFlags: Nd.getMemOperand()->getFlags()); |
1232 | |
1233 | if (!Subtarget.isLittle()) |
1234 | std::swap(a&: Lo, b&: Hi); |
1235 | |
1236 | SDValue BP = DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
1237 | SDValue Ops[2] = {BP, Hi.getValue(R: 1)}; |
1238 | return DAG.getMergeValues(Ops, dl: DL); |
1239 | } |
1240 | |
1241 | SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { |
1242 | StoreSDNode &Nd = *cast<StoreSDNode>(Val&: Op); |
1243 | |
1244 | if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) |
1245 | return MipsTargetLowering::lowerSTORE(Op, DAG); |
1246 | |
1247 | // Replace a double precision store with two extractelement64s and i32 stores. |
1248 | SDLoc DL(Op); |
1249 | SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); |
1250 | EVT PtrVT = Ptr.getValueType(); |
1251 | SDValue Lo = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, |
1252 | N1: Val, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
1253 | SDValue Hi = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, |
1254 | N1: Val, N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32)); |
1255 | |
1256 | if (!Subtarget.isLittle()) |
1257 | std::swap(a&: Lo, b&: Hi); |
1258 | |
1259 | // i32 store to lower address. |
1260 | Chain = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: MachinePointerInfo(), Alignment: Nd.getAlign(), |
1261 | MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo()); |
1262 | |
1263 | // i32 store to higher address. |
1264 | Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT)); |
1265 | return DAG.getStore(Chain, dl: DL, Val: Hi, Ptr, PtrInfo: MachinePointerInfo(), |
1266 | Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), |
1267 | MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo()); |
1268 | } |
1269 | |
1270 | SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, |
1271 | SelectionDAG &DAG) const { |
1272 | SDLoc DL(Op); |
1273 | MVT Src = Op.getOperand(i: 0).getValueType().getSimpleVT(); |
1274 | MVT Dest = Op.getValueType().getSimpleVT(); |
1275 | |
1276 | // Bitcast i64 to double. |
1277 | if (Src == MVT::i64 && Dest == MVT::f64) { |
1278 | SDValue Lo, Hi; |
1279 | std::tie(args&: Lo, args&: Hi) = |
1280 | DAG.SplitScalar(N: Op.getOperand(i: 0), DL, LoVT: MVT::i32, HiVT: MVT::i32); |
1281 | return DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi); |
1282 | } |
1283 | |
1284 | // Bitcast double to i64. |
1285 | if (Src == MVT::f64 && Dest == MVT::i64) { |
1286 | // Skip lower bitcast when operand0 has converted float results to integer |
1287 | // which was done by function SoftenFloatResult. |
1288 | if (getTypeAction(Context&: *DAG.getContext(), VT: Op.getOperand(i: 0).getValueType()) == |
1289 | TargetLowering::TypeSoftenFloat) |
1290 | return SDValue(); |
1291 | SDValue Lo = |
1292 | DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0), |
1293 | N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
1294 | SDValue Hi = |
1295 | DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0), |
1296 | N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32)); |
1297 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi); |
1298 | } |
1299 | |
1300 | // Skip other cases of bitcast and use default lowering. |
1301 | return SDValue(); |
1302 | } |
1303 | |
1304 | SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, |
1305 | bool HasLo, bool HasHi, |
1306 | SelectionDAG &DAG) const { |
1307 | // MIPS32r6/MIPS64r6 removed accumulator based multiplies. |
1308 | assert(!Subtarget.hasMips32r6()); |
1309 | |
1310 | EVT Ty = Op.getOperand(i: 0).getValueType(); |
1311 | SDLoc DL(Op); |
1312 | SDValue Mult = DAG.getNode(Opcode: NewOpc, DL, VT: MVT::Untyped, |
1313 | N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1)); |
1314 | SDValue Lo, Hi; |
1315 | |
1316 | if (HasLo) |
1317 | Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: Ty, Operand: Mult); |
1318 | if (HasHi) |
1319 | Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: Ty, Operand: Mult); |
1320 | |
1321 | if (!HasLo || !HasHi) |
1322 | return HasLo ? Lo : Hi; |
1323 | |
1324 | SDValue Vals[] = { Lo, Hi }; |
1325 | return DAG.getMergeValues(Ops: Vals, dl: DL); |
1326 | } |
1327 | |
1328 | static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { |
1329 | SDValue InLo, InHi; |
1330 | std::tie(args&: InLo, args&: InHi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
1331 | return DAG.getNode(Opcode: MipsISD::MTLOHI, DL, VT: MVT::Untyped, N1: InLo, N2: InHi); |
1332 | } |
1333 | |
1334 | static SDValue (SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { |
1335 | SDValue Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: MVT::i32, Operand: Op); |
1336 | SDValue Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: MVT::i32, Operand: Op); |
1337 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi); |
1338 | } |
1339 | |
1340 | // This function expands mips intrinsic nodes which have 64-bit input operands |
1341 | // or output values. |
1342 | // |
1343 | // out64 = intrinsic-node in64 |
1344 | // => |
1345 | // lo = copy (extract-element (in64, 0)) |
1346 | // hi = copy (extract-element (in64, 1)) |
1347 | // mips-specific-node |
1348 | // v0 = copy lo |
1349 | // v1 = copy hi |
1350 | // out64 = merge-values (v0, v1) |
1351 | // |
1352 | static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { |
1353 | SDLoc DL(Op); |
1354 | bool HasChainIn = Op->getOperand(Num: 0).getValueType() == MVT::Other; |
1355 | SmallVector<SDValue, 3> Ops; |
1356 | unsigned OpNo = 0; |
1357 | |
1358 | // See if Op has a chain input. |
1359 | if (HasChainIn) |
1360 | Ops.push_back(Elt: Op->getOperand(Num: OpNo++)); |
1361 | |
1362 | // The next operand is the intrinsic opcode. |
1363 | assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); |
1364 | |
1365 | // See if the next operand has type i64. |
1366 | SDValue Opnd = Op->getOperand(Num: ++OpNo), In64; |
1367 | |
1368 | if (Opnd.getValueType() == MVT::i64) |
1369 | In64 = initAccumulator(In: Opnd, DL, DAG); |
1370 | else |
1371 | Ops.push_back(Elt: Opnd); |
1372 | |
1373 | // Push the remaining operands. |
1374 | for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) |
1375 | Ops.push_back(Elt: Op->getOperand(Num: OpNo)); |
1376 | |
1377 | // Add In64 to the end of the list. |
1378 | if (In64.getNode()) |
1379 | Ops.push_back(Elt: In64); |
1380 | |
1381 | // Scan output. |
1382 | SmallVector<EVT, 2> ResTys; |
1383 | |
1384 | for (EVT Ty : Op->values()) |
1385 | ResTys.push_back(Elt: (Ty == MVT::i64) ? MVT::Untyped : Ty); |
1386 | |
1387 | // Create node. |
1388 | SDValue Val = DAG.getNode(Opcode: Opc, DL, ResultTys: ResTys, Ops); |
1389 | SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Op: Val, DL, DAG) : Val; |
1390 | |
1391 | if (!HasChainIn) |
1392 | return Out; |
1393 | |
1394 | assert(Val->getValueType(1) == MVT::Other); |
1395 | SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; |
1396 | return DAG.getMergeValues(Ops: Vals, dl: DL); |
1397 | } |
1398 | |
1399 | // Lower an MSA copy intrinsic into the specified SelectionDAG node |
1400 | static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { |
1401 | SDLoc DL(Op); |
1402 | SDValue Vec = Op->getOperand(Num: 1); |
1403 | SDValue Idx = Op->getOperand(Num: 2); |
1404 | EVT ResTy = Op->getValueType(ResNo: 0); |
1405 | EVT EltTy = Vec->getValueType(ResNo: 0).getVectorElementType(); |
1406 | |
1407 | SDValue Result = DAG.getNode(Opcode: Opc, DL, VT: ResTy, N1: Vec, N2: Idx, |
1408 | N3: DAG.getValueType(EltTy)); |
1409 | |
1410 | return Result; |
1411 | } |
1412 | |
1413 | static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { |
1414 | EVT ResVecTy = Op->getValueType(ResNo: 0); |
1415 | EVT ViaVecTy = ResVecTy; |
1416 | bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); |
1417 | SDLoc DL(Op); |
1418 | |
1419 | // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and |
1420 | // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating |
1421 | // lanes. |
1422 | SDValue LaneA = Op->getOperand(Num: OpNr); |
1423 | SDValue LaneB; |
1424 | |
1425 | if (ResVecTy == MVT::v2i64) { |
1426 | // In case of the index being passed as an immediate value, set the upper |
1427 | // lane to 0 so that the splati.d instruction can be matched. |
1428 | if (isa<ConstantSDNode>(Val: LaneA)) |
1429 | LaneB = DAG.getConstant(Val: 0, DL, VT: MVT::i32); |
1430 | // Having the index passed in a register, set the upper lane to the same |
1431 | // value as the lower - this results in the BUILD_VECTOR node not being |
1432 | // expanded through stack. This way we are able to pattern match the set of |
1433 | // nodes created here to splat.d. |
1434 | else |
1435 | LaneB = LaneA; |
1436 | ViaVecTy = MVT::v4i32; |
1437 | if(BigEndian) |
1438 | std::swap(a&: LaneA, b&: LaneB); |
1439 | } else |
1440 | LaneB = LaneA; |
1441 | |
1442 | SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, |
1443 | LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; |
1444 | |
1445 | SDValue Result = DAG.getBuildVector( |
1446 | VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements())); |
1447 | |
1448 | if (ViaVecTy != ResVecTy) { |
1449 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ViaVecTy); |
1450 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVecTy, |
1451 | Operand: DAG.getNode(Opcode: ISD::AND, DL, VT: ViaVecTy, N1: Result, N2: One)); |
1452 | } |
1453 | |
1454 | return Result; |
1455 | } |
1456 | |
1457 | static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, |
1458 | bool IsSigned = false) { |
1459 | auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp)); |
1460 | return DAG.getConstant( |
1461 | Val: APInt(Op->getValueType(ResNo: 0).getScalarType().getSizeInBits(), |
1462 | IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), |
1463 | DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0)); |
1464 | } |
1465 | |
1466 | static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, |
1467 | bool BigEndian, SelectionDAG &DAG) { |
1468 | EVT ViaVecTy = VecTy; |
1469 | SDValue SplatValueA = SplatValue; |
1470 | SDValue SplatValueB = SplatValue; |
1471 | SDLoc DL(SplatValue); |
1472 | |
1473 | if (VecTy == MVT::v2i64) { |
1474 | // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. |
1475 | ViaVecTy = MVT::v4i32; |
1476 | |
1477 | SplatValueA = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValue); |
1478 | SplatValueB = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SplatValue, |
1479 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i32)); |
1480 | SplatValueB = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValueB); |
1481 | } |
1482 | |
1483 | // We currently hold the parts in little endian order. Swap them if |
1484 | // necessary. |
1485 | if (BigEndian) |
1486 | std::swap(a&: SplatValueA, b&: SplatValueB); |
1487 | |
1488 | SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, |
1489 | SplatValueA, SplatValueB, SplatValueA, SplatValueB, |
1490 | SplatValueA, SplatValueB, SplatValueA, SplatValueB, |
1491 | SplatValueA, SplatValueB, SplatValueA, SplatValueB }; |
1492 | |
1493 | SDValue Result = DAG.getBuildVector( |
1494 | VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements())); |
1495 | |
1496 | if (VecTy != ViaVecTy) |
1497 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecTy, Operand: Result); |
1498 | |
1499 | return Result; |
1500 | } |
1501 | |
1502 | static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, |
1503 | unsigned Opc, SDValue Imm, |
1504 | bool BigEndian) { |
1505 | EVT VecTy = Op->getValueType(ResNo: 0); |
1506 | SDValue Exp2Imm; |
1507 | SDLoc DL(Op); |
1508 | |
1509 | // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it |
1510 | // here for now. |
1511 | if (VecTy == MVT::v2i64) { |
1512 | if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Val&: Imm)) { |
1513 | APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); |
1514 | |
1515 | SDValue BitImmHiOp = DAG.getConstant(Val: BitImm.lshr(shiftAmt: 32).trunc(width: 32), DL, |
1516 | VT: MVT::i32); |
1517 | SDValue BitImmLoOp = DAG.getConstant(Val: BitImm.trunc(width: 32), DL, VT: MVT::i32); |
1518 | |
1519 | if (BigEndian) |
1520 | std::swap(a&: BitImmLoOp, b&: BitImmHiOp); |
1521 | |
1522 | Exp2Imm = DAG.getNode( |
1523 | Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, |
1524 | Operand: DAG.getBuildVector(VT: MVT::v4i32, DL, |
1525 | Ops: {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); |
1526 | } |
1527 | } |
1528 | |
1529 | if (!Exp2Imm.getNode()) { |
1530 | // We couldnt constant fold, do a vector shift instead |
1531 | |
1532 | // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since |
1533 | // only values 0-63 are valid. |
1534 | if (VecTy == MVT::v2i64) |
1535 | Imm = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Imm); |
1536 | |
1537 | Exp2Imm = getBuildVectorSplat(VecTy, SplatValue: Imm, BigEndian, DAG); |
1538 | |
1539 | Exp2Imm = DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: DAG.getConstant(Val: 1, DL, VT: VecTy), |
1540 | N2: Exp2Imm); |
1541 | } |
1542 | |
1543 | return DAG.getNode(Opcode: Opc, DL, VT: VecTy, N1: Op->getOperand(Num: 1), N2: Exp2Imm); |
1544 | } |
1545 | |
1546 | static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { |
1547 | SDLoc DL(Op); |
1548 | EVT ResTy = Op->getValueType(ResNo: 0); |
1549 | SDValue Vec = Op->getOperand(Num: 2); |
1550 | bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); |
1551 | MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; |
1552 | SDValue ConstValue = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1, |
1553 | DL, VT: ResEltTy); |
1554 | SDValue SplatVec = getBuildVectorSplat(VecTy: ResTy, SplatValue: ConstValue, BigEndian, DAG); |
1555 | |
1556 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: SplatVec); |
1557 | } |
1558 | |
1559 | static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { |
1560 | EVT ResTy = Op->getValueType(ResNo: 0); |
1561 | SDLoc DL(Op); |
1562 | SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy); |
1563 | SDValue Bit = DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Op, DAG)); |
1564 | |
1565 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), |
1566 | N2: DAG.getNOT(DL, Val: Bit, VT: ResTy)); |
1567 | } |
1568 | |
1569 | static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { |
1570 | SDLoc DL(Op); |
1571 | EVT ResTy = Op->getValueType(ResNo: 0); |
1572 | APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) |
1573 | << Op->getConstantOperandAPInt(Num: 2); |
1574 | SDValue BitMask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy); |
1575 | |
1576 | return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), N2: BitMask); |
1577 | } |
1578 | |
1579 | SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
1580 | SelectionDAG &DAG) const { |
1581 | SDLoc DL(Op); |
1582 | unsigned Intrinsic = Op->getConstantOperandVal(Num: 0); |
1583 | switch (Intrinsic) { |
1584 | default: |
1585 | return SDValue(); |
1586 | case Intrinsic::mips_shilo: |
1587 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::SHILO); |
1588 | case Intrinsic::mips_dpau_h_qbl: |
1589 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBL); |
1590 | case Intrinsic::mips_dpau_h_qbr: |
1591 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBR); |
1592 | case Intrinsic::mips_dpsu_h_qbl: |
1593 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBL); |
1594 | case Intrinsic::mips_dpsu_h_qbr: |
1595 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBR); |
1596 | case Intrinsic::mips_dpa_w_ph: |
1597 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPA_W_PH); |
1598 | case Intrinsic::mips_dps_w_ph: |
1599 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPS_W_PH); |
1600 | case Intrinsic::mips_dpax_w_ph: |
1601 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAX_W_PH); |
1602 | case Intrinsic::mips_dpsx_w_ph: |
1603 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSX_W_PH); |
1604 | case Intrinsic::mips_mulsa_w_ph: |
1605 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSA_W_PH); |
1606 | case Intrinsic::mips_mult: |
1607 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::Mult); |
1608 | case Intrinsic::mips_multu: |
1609 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::Multu); |
1610 | case Intrinsic::mips_madd: |
1611 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAdd); |
1612 | case Intrinsic::mips_maddu: |
1613 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAddu); |
1614 | case Intrinsic::mips_msub: |
1615 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSub); |
1616 | case Intrinsic::mips_msubu: |
1617 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSubu); |
1618 | case Intrinsic::mips_addv_b: |
1619 | case Intrinsic::mips_addv_h: |
1620 | case Intrinsic::mips_addv_w: |
1621 | case Intrinsic::mips_addv_d: |
1622 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1623 | N2: Op->getOperand(Num: 2)); |
1624 | case Intrinsic::mips_addvi_b: |
1625 | case Intrinsic::mips_addvi_h: |
1626 | case Intrinsic::mips_addvi_w: |
1627 | case Intrinsic::mips_addvi_d: |
1628 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1629 | N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
1630 | case Intrinsic::mips_and_v: |
1631 | return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1632 | N2: Op->getOperand(Num: 2)); |
1633 | case Intrinsic::mips_andi_b: |
1634 | return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1635 | N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
1636 | case Intrinsic::mips_bclr_b: |
1637 | case Intrinsic::mips_bclr_h: |
1638 | case Intrinsic::mips_bclr_w: |
1639 | case Intrinsic::mips_bclr_d: |
1640 | return lowerMSABitClear(Op, DAG); |
1641 | case Intrinsic::mips_bclri_b: |
1642 | case Intrinsic::mips_bclri_h: |
1643 | case Intrinsic::mips_bclri_w: |
1644 | case Intrinsic::mips_bclri_d: |
1645 | return lowerMSABitClearImm(Op, DAG); |
1646 | case Intrinsic::mips_binsli_b: |
1647 | case Intrinsic::mips_binsli_h: |
1648 | case Intrinsic::mips_binsli_w: |
1649 | case Intrinsic::mips_binsli_d: { |
1650 | // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) |
1651 | EVT VecTy = Op->getValueType(ResNo: 0); |
1652 | EVT EltTy = VecTy.getVectorElementType(); |
1653 | if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits()) |
1654 | report_fatal_error(reason: "Immediate out of range" ); |
1655 | APInt Mask = APInt::getHighBitsSet(numBits: EltTy.getSizeInBits(), |
1656 | hiBitsSet: Op->getConstantOperandVal(Num: 3) + 1); |
1657 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy, |
1658 | N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true), |
1659 | N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1)); |
1660 | } |
1661 | case Intrinsic::mips_binsri_b: |
1662 | case Intrinsic::mips_binsri_h: |
1663 | case Intrinsic::mips_binsri_w: |
1664 | case Intrinsic::mips_binsri_d: { |
1665 | // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) |
1666 | EVT VecTy = Op->getValueType(ResNo: 0); |
1667 | EVT EltTy = VecTy.getVectorElementType(); |
1668 | if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits()) |
1669 | report_fatal_error(reason: "Immediate out of range" ); |
1670 | APInt Mask = APInt::getLowBitsSet(numBits: EltTy.getSizeInBits(), |
1671 | loBitsSet: Op->getConstantOperandVal(Num: 3) + 1); |
1672 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy, |
1673 | N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true), |
1674 | N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1)); |
1675 | } |
1676 | case Intrinsic::mips_bmnz_v: |
1677 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3), |
1678 | N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1)); |
1679 | case Intrinsic::mips_bmnzi_b: |
1680 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1681 | N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 2), |
1682 | N3: Op->getOperand(Num: 1)); |
1683 | case Intrinsic::mips_bmz_v: |
1684 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3), |
1685 | N2: Op->getOperand(Num: 1), N3: Op->getOperand(Num: 2)); |
1686 | case Intrinsic::mips_bmzi_b: |
1687 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1688 | N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 1), |
1689 | N3: Op->getOperand(Num: 2)); |
1690 | case Intrinsic::mips_bneg_b: |
1691 | case Intrinsic::mips_bneg_h: |
1692 | case Intrinsic::mips_bneg_w: |
1693 | case Intrinsic::mips_bneg_d: { |
1694 | EVT VecTy = Op->getValueType(ResNo: 0); |
1695 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
1696 | |
1697 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: VecTy, N1: Op->getOperand(Num: 1), |
1698 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, |
1699 | N2: truncateVecElts(Op, DAG))); |
1700 | } |
1701 | case Intrinsic::mips_bnegi_b: |
1702 | case Intrinsic::mips_bnegi_h: |
1703 | case Intrinsic::mips_bnegi_w: |
1704 | case Intrinsic::mips_bnegi_d: |
1705 | return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::XOR, Imm: Op->getOperand(Num: 2), |
1706 | BigEndian: !Subtarget.isLittle()); |
1707 | case Intrinsic::mips_bnz_b: |
1708 | case Intrinsic::mips_bnz_h: |
1709 | case Intrinsic::mips_bnz_w: |
1710 | case Intrinsic::mips_bnz_d: |
1711 | return DAG.getNode(Opcode: MipsISD::VALL_NONZERO, DL, VT: Op->getValueType(ResNo: 0), |
1712 | Operand: Op->getOperand(Num: 1)); |
1713 | case Intrinsic::mips_bnz_v: |
1714 | return DAG.getNode(Opcode: MipsISD::VANY_NONZERO, DL, VT: Op->getValueType(ResNo: 0), |
1715 | Operand: Op->getOperand(Num: 1)); |
1716 | case Intrinsic::mips_bsel_v: |
1717 | // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) |
1718 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1719 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), |
1720 | N3: Op->getOperand(Num: 2)); |
1721 | case Intrinsic::mips_bseli_b: |
1722 | // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) |
1723 | return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), |
1724 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 3, DAG), |
1725 | N3: Op->getOperand(Num: 2)); |
1726 | case Intrinsic::mips_bset_b: |
1727 | case Intrinsic::mips_bset_h: |
1728 | case Intrinsic::mips_bset_w: |
1729 | case Intrinsic::mips_bset_d: { |
1730 | EVT VecTy = Op->getValueType(ResNo: 0); |
1731 | SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy); |
1732 | |
1733 | return DAG.getNode(Opcode: ISD::OR, DL, VT: VecTy, N1: Op->getOperand(Num: 1), |
1734 | N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One, |
1735 | N2: truncateVecElts(Op, DAG))); |
1736 | } |
1737 | case Intrinsic::mips_bseti_b: |
1738 | case Intrinsic::mips_bseti_h: |
1739 | case Intrinsic::mips_bseti_w: |
1740 | case Intrinsic::mips_bseti_d: |
1741 | return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::OR, Imm: Op->getOperand(Num: 2), |
1742 | BigEndian: !Subtarget.isLittle()); |
1743 | case Intrinsic::mips_bz_b: |
1744 | case Intrinsic::mips_bz_h: |
1745 | case Intrinsic::mips_bz_w: |
1746 | case Intrinsic::mips_bz_d: |
1747 | return DAG.getNode(Opcode: MipsISD::VALL_ZERO, DL, VT: Op->getValueType(ResNo: 0), |
1748 | Operand: Op->getOperand(Num: 1)); |
1749 | case Intrinsic::mips_bz_v: |
1750 | return DAG.getNode(Opcode: MipsISD::VANY_ZERO, DL, VT: Op->getValueType(ResNo: 0), |
1751 | Operand: Op->getOperand(Num: 1)); |
1752 | case Intrinsic::mips_ceq_b: |
1753 | case Intrinsic::mips_ceq_h: |
1754 | case Intrinsic::mips_ceq_w: |
1755 | case Intrinsic::mips_ceq_d: |
1756 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1757 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETEQ); |
1758 | case Intrinsic::mips_ceqi_b: |
1759 | case Intrinsic::mips_ceqi_h: |
1760 | case Intrinsic::mips_ceqi_w: |
1761 | case Intrinsic::mips_ceqi_d: |
1762 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1763 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETEQ); |
1764 | case Intrinsic::mips_cle_s_b: |
1765 | case Intrinsic::mips_cle_s_h: |
1766 | case Intrinsic::mips_cle_s_w: |
1767 | case Intrinsic::mips_cle_s_d: |
1768 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1769 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETLE); |
1770 | case Intrinsic::mips_clei_s_b: |
1771 | case Intrinsic::mips_clei_s_h: |
1772 | case Intrinsic::mips_clei_s_w: |
1773 | case Intrinsic::mips_clei_s_d: |
1774 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1775 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLE); |
1776 | case Intrinsic::mips_cle_u_b: |
1777 | case Intrinsic::mips_cle_u_h: |
1778 | case Intrinsic::mips_cle_u_w: |
1779 | case Intrinsic::mips_cle_u_d: |
1780 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1781 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE); |
1782 | case Intrinsic::mips_clei_u_b: |
1783 | case Intrinsic::mips_clei_u_h: |
1784 | case Intrinsic::mips_clei_u_w: |
1785 | case Intrinsic::mips_clei_u_d: |
1786 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1787 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULE); |
1788 | case Intrinsic::mips_clt_s_b: |
1789 | case Intrinsic::mips_clt_s_h: |
1790 | case Intrinsic::mips_clt_s_w: |
1791 | case Intrinsic::mips_clt_s_d: |
1792 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1793 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETLT); |
1794 | case Intrinsic::mips_clti_s_b: |
1795 | case Intrinsic::mips_clti_s_h: |
1796 | case Intrinsic::mips_clti_s_w: |
1797 | case Intrinsic::mips_clti_s_d: |
1798 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1799 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLT); |
1800 | case Intrinsic::mips_clt_u_b: |
1801 | case Intrinsic::mips_clt_u_h: |
1802 | case Intrinsic::mips_clt_u_w: |
1803 | case Intrinsic::mips_clt_u_d: |
1804 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1805 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT); |
1806 | case Intrinsic::mips_clti_u_b: |
1807 | case Intrinsic::mips_clti_u_h: |
1808 | case Intrinsic::mips_clti_u_w: |
1809 | case Intrinsic::mips_clti_u_d: |
1810 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1811 | RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULT); |
1812 | case Intrinsic::mips_copy_s_b: |
1813 | case Intrinsic::mips_copy_s_h: |
1814 | case Intrinsic::mips_copy_s_w: |
1815 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT); |
1816 | case Intrinsic::mips_copy_s_d: |
1817 | if (Subtarget.hasMips64()) |
1818 | // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. |
1819 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT); |
1820 | else { |
1821 | // Lower into the generic EXTRACT_VECTOR_ELT node and let the type |
1822 | // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. |
1823 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op), |
1824 | VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1825 | N2: Op->getOperand(Num: 2)); |
1826 | } |
1827 | case Intrinsic::mips_copy_u_b: |
1828 | case Intrinsic::mips_copy_u_h: |
1829 | case Intrinsic::mips_copy_u_w: |
1830 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT); |
1831 | case Intrinsic::mips_copy_u_d: |
1832 | if (Subtarget.hasMips64()) |
1833 | // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. |
1834 | return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT); |
1835 | else { |
1836 | // Lower into the generic EXTRACT_VECTOR_ELT node and let the type |
1837 | // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. |
1838 | // Note: When i64 is illegal, this results in copy_s.w instructions |
1839 | // instead of copy_u.w instructions. This makes no difference to the |
1840 | // behaviour since i64 is only illegal when the register file is 32-bit. |
1841 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op), |
1842 | VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1843 | N2: Op->getOperand(Num: 2)); |
1844 | } |
1845 | case Intrinsic::mips_div_s_b: |
1846 | case Intrinsic::mips_div_s_h: |
1847 | case Intrinsic::mips_div_s_w: |
1848 | case Intrinsic::mips_div_s_d: |
1849 | return DAG.getNode(Opcode: ISD::SDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1850 | N2: Op->getOperand(Num: 2)); |
1851 | case Intrinsic::mips_div_u_b: |
1852 | case Intrinsic::mips_div_u_h: |
1853 | case Intrinsic::mips_div_u_w: |
1854 | case Intrinsic::mips_div_u_d: |
1855 | return DAG.getNode(Opcode: ISD::UDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1856 | N2: Op->getOperand(Num: 2)); |
1857 | case Intrinsic::mips_fadd_w: |
1858 | case Intrinsic::mips_fadd_d: |
1859 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1860 | return DAG.getNode(Opcode: ISD::FADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1861 | N2: Op->getOperand(Num: 2)); |
1862 | // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away |
1863 | case Intrinsic::mips_fceq_w: |
1864 | case Intrinsic::mips_fceq_d: |
1865 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1866 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETOEQ); |
1867 | case Intrinsic::mips_fcle_w: |
1868 | case Intrinsic::mips_fcle_d: |
1869 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1870 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLE); |
1871 | case Intrinsic::mips_fclt_w: |
1872 | case Intrinsic::mips_fclt_d: |
1873 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1874 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLT); |
1875 | case Intrinsic::mips_fcne_w: |
1876 | case Intrinsic::mips_fcne_d: |
1877 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1878 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETONE); |
1879 | case Intrinsic::mips_fcor_w: |
1880 | case Intrinsic::mips_fcor_d: |
1881 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1882 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETO); |
1883 | case Intrinsic::mips_fcueq_w: |
1884 | case Intrinsic::mips_fcueq_d: |
1885 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1886 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETUEQ); |
1887 | case Intrinsic::mips_fcule_w: |
1888 | case Intrinsic::mips_fcule_d: |
1889 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1890 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE); |
1891 | case Intrinsic::mips_fcult_w: |
1892 | case Intrinsic::mips_fcult_d: |
1893 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1894 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT); |
1895 | case Intrinsic::mips_fcun_w: |
1896 | case Intrinsic::mips_fcun_d: |
1897 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1898 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETUO); |
1899 | case Intrinsic::mips_fcune_w: |
1900 | case Intrinsic::mips_fcune_d: |
1901 | return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1), |
1902 | RHS: Op->getOperand(Num: 2), Cond: ISD::SETUNE); |
1903 | case Intrinsic::mips_fdiv_w: |
1904 | case Intrinsic::mips_fdiv_d: |
1905 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1906 | return DAG.getNode(Opcode: ISD::FDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1907 | N2: Op->getOperand(Num: 2)); |
1908 | case Intrinsic::mips_ffint_u_w: |
1909 | case Intrinsic::mips_ffint_u_d: |
1910 | return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0), |
1911 | Operand: Op->getOperand(Num: 1)); |
1912 | case Intrinsic::mips_ffint_s_w: |
1913 | case Intrinsic::mips_ffint_s_d: |
1914 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0), |
1915 | Operand: Op->getOperand(Num: 1)); |
1916 | case Intrinsic::mips_fill_b: |
1917 | case Intrinsic::mips_fill_h: |
1918 | case Intrinsic::mips_fill_w: |
1919 | case Intrinsic::mips_fill_d: { |
1920 | EVT ResTy = Op->getValueType(ResNo: 0); |
1921 | SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), |
1922 | Op->getOperand(Num: 1)); |
1923 | |
1924 | // If ResTy is v2i64 then the type legalizer will break this node down into |
1925 | // an equivalent v4i32. |
1926 | return DAG.getBuildVector(VT: ResTy, DL, Ops); |
1927 | } |
1928 | case Intrinsic::mips_fexp2_w: |
1929 | case Intrinsic::mips_fexp2_d: { |
1930 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1931 | EVT ResTy = Op->getValueType(ResNo: 0); |
1932 | return DAG.getNode( |
1933 | Opcode: ISD::FMUL, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
1934 | N2: DAG.getNode(Opcode: ISD::FEXP2, DL: SDLoc(Op), VT: ResTy, Operand: Op->getOperand(Num: 2))); |
1935 | } |
1936 | case Intrinsic::mips_flog2_w: |
1937 | case Intrinsic::mips_flog2_d: |
1938 | return DAG.getNode(Opcode: ISD::FLOG2, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
1939 | case Intrinsic::mips_fmadd_w: |
1940 | case Intrinsic::mips_fmadd_d: |
1941 | return DAG.getNode(Opcode: ISD::FMA, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0), |
1942 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3)); |
1943 | case Intrinsic::mips_fmul_w: |
1944 | case Intrinsic::mips_fmul_d: |
1945 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1946 | return DAG.getNode(Opcode: ISD::FMUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1947 | N2: Op->getOperand(Num: 2)); |
1948 | case Intrinsic::mips_fmsub_w: |
1949 | case Intrinsic::mips_fmsub_d: { |
1950 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1951 | return DAG.getNode(Opcode: MipsISD::FMS, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0), |
1952 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3)); |
1953 | } |
1954 | case Intrinsic::mips_frint_w: |
1955 | case Intrinsic::mips_frint_d: |
1956 | return DAG.getNode(Opcode: ISD::FRINT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
1957 | case Intrinsic::mips_fsqrt_w: |
1958 | case Intrinsic::mips_fsqrt_d: |
1959 | return DAG.getNode(Opcode: ISD::FSQRT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
1960 | case Intrinsic::mips_fsub_w: |
1961 | case Intrinsic::mips_fsub_d: |
1962 | // TODO: If intrinsics have fast-math-flags, propagate them. |
1963 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
1964 | N2: Op->getOperand(Num: 2)); |
1965 | case Intrinsic::mips_ftrunc_u_w: |
1966 | case Intrinsic::mips_ftrunc_u_d: |
1967 | return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL, VT: Op->getValueType(ResNo: 0), |
1968 | Operand: Op->getOperand(Num: 1)); |
1969 | case Intrinsic::mips_ftrunc_s_w: |
1970 | case Intrinsic::mips_ftrunc_s_d: |
1971 | return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL, VT: Op->getValueType(ResNo: 0), |
1972 | Operand: Op->getOperand(Num: 1)); |
1973 | case Intrinsic::mips_ilvev_b: |
1974 | case Intrinsic::mips_ilvev_h: |
1975 | case Intrinsic::mips_ilvev_w: |
1976 | case Intrinsic::mips_ilvev_d: |
1977 | return DAG.getNode(Opcode: MipsISD::ILVEV, DL, VT: Op->getValueType(ResNo: 0), |
1978 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1979 | case Intrinsic::mips_ilvl_b: |
1980 | case Intrinsic::mips_ilvl_h: |
1981 | case Intrinsic::mips_ilvl_w: |
1982 | case Intrinsic::mips_ilvl_d: |
1983 | return DAG.getNode(Opcode: MipsISD::ILVL, DL, VT: Op->getValueType(ResNo: 0), |
1984 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1985 | case Intrinsic::mips_ilvod_b: |
1986 | case Intrinsic::mips_ilvod_h: |
1987 | case Intrinsic::mips_ilvod_w: |
1988 | case Intrinsic::mips_ilvod_d: |
1989 | return DAG.getNode(Opcode: MipsISD::ILVOD, DL, VT: Op->getValueType(ResNo: 0), |
1990 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1991 | case Intrinsic::mips_ilvr_b: |
1992 | case Intrinsic::mips_ilvr_h: |
1993 | case Intrinsic::mips_ilvr_w: |
1994 | case Intrinsic::mips_ilvr_d: |
1995 | return DAG.getNode(Opcode: MipsISD::ILVR, DL, VT: Op->getValueType(ResNo: 0), |
1996 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
1997 | case Intrinsic::mips_insert_b: |
1998 | case Intrinsic::mips_insert_h: |
1999 | case Intrinsic::mips_insert_w: |
2000 | case Intrinsic::mips_insert_d: |
2001 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0), |
2002 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), N3: Op->getOperand(Num: 2)); |
2003 | case Intrinsic::mips_insve_b: |
2004 | case Intrinsic::mips_insve_h: |
2005 | case Intrinsic::mips_insve_w: |
2006 | case Intrinsic::mips_insve_d: { |
2007 | // Report an error for out of range values. |
2008 | int64_t Max; |
2009 | switch (Intrinsic) { |
2010 | case Intrinsic::mips_insve_b: Max = 15; break; |
2011 | case Intrinsic::mips_insve_h: Max = 7; break; |
2012 | case Intrinsic::mips_insve_w: Max = 3; break; |
2013 | case Intrinsic::mips_insve_d: Max = 1; break; |
2014 | default: llvm_unreachable("Unmatched intrinsic" ); |
2015 | } |
2016 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2017 | if (Value < 0 || Value > Max) |
2018 | report_fatal_error(reason: "Immediate out of range" ); |
2019 | return DAG.getNode(Opcode: MipsISD::INSVE, DL, VT: Op->getValueType(ResNo: 0), |
2020 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3), |
2021 | N4: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
2022 | } |
2023 | case Intrinsic::mips_ldi_b: |
2024 | case Intrinsic::mips_ldi_h: |
2025 | case Intrinsic::mips_ldi_w: |
2026 | case Intrinsic::mips_ldi_d: |
2027 | return lowerMSASplatImm(Op, ImmOp: 1, DAG, IsSigned: true); |
2028 | case Intrinsic::mips_lsa: |
2029 | case Intrinsic::mips_dlsa: { |
2030 | EVT ResTy = Op->getValueType(ResNo: 0); |
2031 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
2032 | N2: DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Op), VT: ResTy, |
2033 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3))); |
2034 | } |
2035 | case Intrinsic::mips_maddv_b: |
2036 | case Intrinsic::mips_maddv_h: |
2037 | case Intrinsic::mips_maddv_w: |
2038 | case Intrinsic::mips_maddv_d: { |
2039 | EVT ResTy = Op->getValueType(ResNo: 0); |
2040 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
2041 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy, |
2042 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3))); |
2043 | } |
2044 | case Intrinsic::mips_max_s_b: |
2045 | case Intrinsic::mips_max_s_h: |
2046 | case Intrinsic::mips_max_s_w: |
2047 | case Intrinsic::mips_max_s_d: |
2048 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0), |
2049 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2050 | case Intrinsic::mips_max_u_b: |
2051 | case Intrinsic::mips_max_u_h: |
2052 | case Intrinsic::mips_max_u_w: |
2053 | case Intrinsic::mips_max_u_d: |
2054 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0), |
2055 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2056 | case Intrinsic::mips_maxi_s_b: |
2057 | case Intrinsic::mips_maxi_s_h: |
2058 | case Intrinsic::mips_maxi_s_w: |
2059 | case Intrinsic::mips_maxi_s_d: |
2060 | return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0), |
2061 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true)); |
2062 | case Intrinsic::mips_maxi_u_b: |
2063 | case Intrinsic::mips_maxi_u_h: |
2064 | case Intrinsic::mips_maxi_u_w: |
2065 | case Intrinsic::mips_maxi_u_d: |
2066 | return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0), |
2067 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2068 | case Intrinsic::mips_min_s_b: |
2069 | case Intrinsic::mips_min_s_h: |
2070 | case Intrinsic::mips_min_s_w: |
2071 | case Intrinsic::mips_min_s_d: |
2072 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0), |
2073 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2074 | case Intrinsic::mips_min_u_b: |
2075 | case Intrinsic::mips_min_u_h: |
2076 | case Intrinsic::mips_min_u_w: |
2077 | case Intrinsic::mips_min_u_d: |
2078 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0), |
2079 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2080 | case Intrinsic::mips_mini_s_b: |
2081 | case Intrinsic::mips_mini_s_h: |
2082 | case Intrinsic::mips_mini_s_w: |
2083 | case Intrinsic::mips_mini_s_d: |
2084 | return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0), |
2085 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true)); |
2086 | case Intrinsic::mips_mini_u_b: |
2087 | case Intrinsic::mips_mini_u_h: |
2088 | case Intrinsic::mips_mini_u_w: |
2089 | case Intrinsic::mips_mini_u_d: |
2090 | return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0), |
2091 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2092 | case Intrinsic::mips_mod_s_b: |
2093 | case Intrinsic::mips_mod_s_h: |
2094 | case Intrinsic::mips_mod_s_w: |
2095 | case Intrinsic::mips_mod_s_d: |
2096 | return DAG.getNode(Opcode: ISD::SREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2097 | N2: Op->getOperand(Num: 2)); |
2098 | case Intrinsic::mips_mod_u_b: |
2099 | case Intrinsic::mips_mod_u_h: |
2100 | case Intrinsic::mips_mod_u_w: |
2101 | case Intrinsic::mips_mod_u_d: |
2102 | return DAG.getNode(Opcode: ISD::UREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2103 | N2: Op->getOperand(Num: 2)); |
2104 | case Intrinsic::mips_mulv_b: |
2105 | case Intrinsic::mips_mulv_h: |
2106 | case Intrinsic::mips_mulv_w: |
2107 | case Intrinsic::mips_mulv_d: |
2108 | return DAG.getNode(Opcode: ISD::MUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2109 | N2: Op->getOperand(Num: 2)); |
2110 | case Intrinsic::mips_msubv_b: |
2111 | case Intrinsic::mips_msubv_h: |
2112 | case Intrinsic::mips_msubv_w: |
2113 | case Intrinsic::mips_msubv_d: { |
2114 | EVT ResTy = Op->getValueType(ResNo: 0); |
2115 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1), |
2116 | N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy, |
2117 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3))); |
2118 | } |
2119 | case Intrinsic::mips_nlzc_b: |
2120 | case Intrinsic::mips_nlzc_h: |
2121 | case Intrinsic::mips_nlzc_w: |
2122 | case Intrinsic::mips_nlzc_d: |
2123 | return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
2124 | case Intrinsic::mips_nor_v: { |
2125 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), |
2126 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2127 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
2128 | } |
2129 | case Intrinsic::mips_nori_b: { |
2130 | SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), |
2131 | N1: Op->getOperand(Num: 1), |
2132 | N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2133 | return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0)); |
2134 | } |
2135 | case Intrinsic::mips_or_v: |
2136 | return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2137 | N2: Op->getOperand(Num: 2)); |
2138 | case Intrinsic::mips_ori_b: |
2139 | return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), |
2140 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2141 | case Intrinsic::mips_pckev_b: |
2142 | case Intrinsic::mips_pckev_h: |
2143 | case Intrinsic::mips_pckev_w: |
2144 | case Intrinsic::mips_pckev_d: |
2145 | return DAG.getNode(Opcode: MipsISD::PCKEV, DL, VT: Op->getValueType(ResNo: 0), |
2146 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2147 | case Intrinsic::mips_pckod_b: |
2148 | case Intrinsic::mips_pckod_h: |
2149 | case Intrinsic::mips_pckod_w: |
2150 | case Intrinsic::mips_pckod_d: |
2151 | return DAG.getNode(Opcode: MipsISD::PCKOD, DL, VT: Op->getValueType(ResNo: 0), |
2152 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2)); |
2153 | case Intrinsic::mips_pcnt_b: |
2154 | case Intrinsic::mips_pcnt_h: |
2155 | case Intrinsic::mips_pcnt_w: |
2156 | case Intrinsic::mips_pcnt_d: |
2157 | return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1)); |
2158 | case Intrinsic::mips_sat_s_b: |
2159 | case Intrinsic::mips_sat_s_h: |
2160 | case Intrinsic::mips_sat_s_w: |
2161 | case Intrinsic::mips_sat_s_d: |
2162 | case Intrinsic::mips_sat_u_b: |
2163 | case Intrinsic::mips_sat_u_h: |
2164 | case Intrinsic::mips_sat_u_w: |
2165 | case Intrinsic::mips_sat_u_d: { |
2166 | // Report an error for out of range values. |
2167 | int64_t Max; |
2168 | switch (Intrinsic) { |
2169 | case Intrinsic::mips_sat_s_b: |
2170 | case Intrinsic::mips_sat_u_b: Max = 7; break; |
2171 | case Intrinsic::mips_sat_s_h: |
2172 | case Intrinsic::mips_sat_u_h: Max = 15; break; |
2173 | case Intrinsic::mips_sat_s_w: |
2174 | case Intrinsic::mips_sat_u_w: Max = 31; break; |
2175 | case Intrinsic::mips_sat_s_d: |
2176 | case Intrinsic::mips_sat_u_d: Max = 63; break; |
2177 | default: llvm_unreachable("Unmatched intrinsic" ); |
2178 | } |
2179 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2180 | if (Value < 0 || Value > Max) |
2181 | report_fatal_error(reason: "Immediate out of range" ); |
2182 | return SDValue(); |
2183 | } |
2184 | case Intrinsic::mips_shf_b: |
2185 | case Intrinsic::mips_shf_h: |
2186 | case Intrinsic::mips_shf_w: { |
2187 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2188 | if (Value < 0 || Value > 255) |
2189 | report_fatal_error(reason: "Immediate out of range" ); |
2190 | return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: Op->getValueType(ResNo: 0), |
2191 | N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 1)); |
2192 | } |
2193 | case Intrinsic::mips_sldi_b: |
2194 | case Intrinsic::mips_sldi_h: |
2195 | case Intrinsic::mips_sldi_w: |
2196 | case Intrinsic::mips_sldi_d: { |
2197 | // Report an error for out of range values. |
2198 | int64_t Max; |
2199 | switch (Intrinsic) { |
2200 | case Intrinsic::mips_sldi_b: Max = 15; break; |
2201 | case Intrinsic::mips_sldi_h: Max = 7; break; |
2202 | case Intrinsic::mips_sldi_w: Max = 3; break; |
2203 | case Intrinsic::mips_sldi_d: Max = 1; break; |
2204 | default: llvm_unreachable("Unmatched intrinsic" ); |
2205 | } |
2206 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 3))->getSExtValue(); |
2207 | if (Value < 0 || Value > Max) |
2208 | report_fatal_error(reason: "Immediate out of range" ); |
2209 | return SDValue(); |
2210 | } |
2211 | case Intrinsic::mips_sll_b: |
2212 | case Intrinsic::mips_sll_h: |
2213 | case Intrinsic::mips_sll_w: |
2214 | case Intrinsic::mips_sll_d: |
2215 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2216 | N2: truncateVecElts(Op, DAG)); |
2217 | case Intrinsic::mips_slli_b: |
2218 | case Intrinsic::mips_slli_h: |
2219 | case Intrinsic::mips_slli_w: |
2220 | case Intrinsic::mips_slli_d: |
2221 | return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), |
2222 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2223 | case Intrinsic::mips_splat_b: |
2224 | case Intrinsic::mips_splat_h: |
2225 | case Intrinsic::mips_splat_w: |
2226 | case Intrinsic::mips_splat_d: |
2227 | // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle |
2228 | // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because |
2229 | // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. |
2230 | // Instead we lower to MipsISD::VSHF and match from there. |
2231 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0), |
2232 | N1: lowerMSASplatZExt(Op, OpNr: 2, DAG), N2: Op->getOperand(Num: 1), |
2233 | N3: Op->getOperand(Num: 1)); |
2234 | case Intrinsic::mips_splati_b: |
2235 | case Intrinsic::mips_splati_h: |
2236 | case Intrinsic::mips_splati_w: |
2237 | case Intrinsic::mips_splati_d: |
2238 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0), |
2239 | N1: lowerMSASplatImm(Op, ImmOp: 2, DAG), N2: Op->getOperand(Num: 1), |
2240 | N3: Op->getOperand(Num: 1)); |
2241 | case Intrinsic::mips_sra_b: |
2242 | case Intrinsic::mips_sra_h: |
2243 | case Intrinsic::mips_sra_w: |
2244 | case Intrinsic::mips_sra_d: |
2245 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2246 | N2: truncateVecElts(Op, DAG)); |
2247 | case Intrinsic::mips_srai_b: |
2248 | case Intrinsic::mips_srai_h: |
2249 | case Intrinsic::mips_srai_w: |
2250 | case Intrinsic::mips_srai_d: |
2251 | return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), |
2252 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2253 | case Intrinsic::mips_srari_b: |
2254 | case Intrinsic::mips_srari_h: |
2255 | case Intrinsic::mips_srari_w: |
2256 | case Intrinsic::mips_srari_d: { |
2257 | // Report an error for out of range values. |
2258 | int64_t Max; |
2259 | switch (Intrinsic) { |
2260 | case Intrinsic::mips_srari_b: Max = 7; break; |
2261 | case Intrinsic::mips_srari_h: Max = 15; break; |
2262 | case Intrinsic::mips_srari_w: Max = 31; break; |
2263 | case Intrinsic::mips_srari_d: Max = 63; break; |
2264 | default: llvm_unreachable("Unmatched intrinsic" ); |
2265 | } |
2266 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2267 | if (Value < 0 || Value > Max) |
2268 | report_fatal_error(reason: "Immediate out of range" ); |
2269 | return SDValue(); |
2270 | } |
2271 | case Intrinsic::mips_srl_b: |
2272 | case Intrinsic::mips_srl_h: |
2273 | case Intrinsic::mips_srl_w: |
2274 | case Intrinsic::mips_srl_d: |
2275 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2276 | N2: truncateVecElts(Op, DAG)); |
2277 | case Intrinsic::mips_srli_b: |
2278 | case Intrinsic::mips_srli_h: |
2279 | case Intrinsic::mips_srli_w: |
2280 | case Intrinsic::mips_srli_d: |
2281 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), |
2282 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2283 | case Intrinsic::mips_srlri_b: |
2284 | case Intrinsic::mips_srlri_h: |
2285 | case Intrinsic::mips_srlri_w: |
2286 | case Intrinsic::mips_srlri_d: { |
2287 | // Report an error for out of range values. |
2288 | int64_t Max; |
2289 | switch (Intrinsic) { |
2290 | case Intrinsic::mips_srlri_b: Max = 7; break; |
2291 | case Intrinsic::mips_srlri_h: Max = 15; break; |
2292 | case Intrinsic::mips_srlri_w: Max = 31; break; |
2293 | case Intrinsic::mips_srlri_d: Max = 63; break; |
2294 | default: llvm_unreachable("Unmatched intrinsic" ); |
2295 | } |
2296 | int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue(); |
2297 | if (Value < 0 || Value > Max) |
2298 | report_fatal_error(reason: "Immediate out of range" ); |
2299 | return SDValue(); |
2300 | } |
2301 | case Intrinsic::mips_subv_b: |
2302 | case Intrinsic::mips_subv_h: |
2303 | case Intrinsic::mips_subv_w: |
2304 | case Intrinsic::mips_subv_d: |
2305 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2306 | N2: Op->getOperand(Num: 2)); |
2307 | case Intrinsic::mips_subvi_b: |
2308 | case Intrinsic::mips_subvi_h: |
2309 | case Intrinsic::mips_subvi_w: |
2310 | case Intrinsic::mips_subvi_d: |
2311 | return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), |
2312 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2313 | case Intrinsic::mips_vshf_b: |
2314 | case Intrinsic::mips_vshf_h: |
2315 | case Intrinsic::mips_vshf_w: |
2316 | case Intrinsic::mips_vshf_d: |
2317 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0), |
2318 | N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3)); |
2319 | case Intrinsic::mips_xor_v: |
2320 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1), |
2321 | N2: Op->getOperand(Num: 2)); |
2322 | case Intrinsic::mips_xori_b: |
2323 | return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), |
2324 | N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG)); |
2325 | case Intrinsic::thread_pointer: { |
2326 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
2327 | return DAG.getNode(Opcode: MipsISD::ThreadPointer, DL, VT: PtrVT); |
2328 | } |
2329 | } |
2330 | } |
2331 | |
2332 | static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, |
2333 | const MipsSubtarget &Subtarget) { |
2334 | SDLoc DL(Op); |
2335 | SDValue ChainIn = Op->getOperand(Num: 0); |
2336 | SDValue Address = Op->getOperand(Num: 2); |
2337 | SDValue Offset = Op->getOperand(Num: 3); |
2338 | EVT ResTy = Op->getValueType(ResNo: 0); |
2339 | EVT PtrTy = Address->getValueType(ResNo: 0); |
2340 | |
2341 | // For N64 addresses have the underlying type MVT::i64. This intrinsic |
2342 | // however takes an i32 signed constant offset. The actual type of the |
2343 | // intrinsic is a scaled signed i10. |
2344 | if (Subtarget.isABI_N64()) |
2345 | Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset); |
2346 | |
2347 | Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset); |
2348 | return DAG.getLoad(VT: ResTy, dl: DL, Chain: ChainIn, Ptr: Address, PtrInfo: MachinePointerInfo(), |
2349 | Alignment: Align(16)); |
2350 | } |
2351 | |
2352 | SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
2353 | SelectionDAG &DAG) const { |
2354 | unsigned Intr = Op->getConstantOperandVal(Num: 1); |
2355 | switch (Intr) { |
2356 | default: |
2357 | return SDValue(); |
2358 | case Intrinsic::mips_extp: |
2359 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTP); |
2360 | case Intrinsic::mips_extpdp: |
2361 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTPDP); |
2362 | case Intrinsic::mips_extr_w: |
2363 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_W); |
2364 | case Intrinsic::mips_extr_r_w: |
2365 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_R_W); |
2366 | case Intrinsic::mips_extr_rs_w: |
2367 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_RS_W); |
2368 | case Intrinsic::mips_extr_s_h: |
2369 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_S_H); |
2370 | case Intrinsic::mips_mthlip: |
2371 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MTHLIP); |
2372 | case Intrinsic::mips_mulsaq_s_w_ph: |
2373 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSAQ_S_W_PH); |
2374 | case Intrinsic::mips_maq_s_w_phl: |
2375 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHL); |
2376 | case Intrinsic::mips_maq_s_w_phr: |
2377 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHR); |
2378 | case Intrinsic::mips_maq_sa_w_phl: |
2379 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHL); |
2380 | case Intrinsic::mips_maq_sa_w_phr: |
2381 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHR); |
2382 | case Intrinsic::mips_dpaq_s_w_ph: |
2383 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_S_W_PH); |
2384 | case Intrinsic::mips_dpsq_s_w_ph: |
2385 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_S_W_PH); |
2386 | case Intrinsic::mips_dpaq_sa_l_w: |
2387 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_SA_L_W); |
2388 | case Intrinsic::mips_dpsq_sa_l_w: |
2389 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_SA_L_W); |
2390 | case Intrinsic::mips_dpaqx_s_w_ph: |
2391 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_S_W_PH); |
2392 | case Intrinsic::mips_dpaqx_sa_w_ph: |
2393 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_SA_W_PH); |
2394 | case Intrinsic::mips_dpsqx_s_w_ph: |
2395 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_S_W_PH); |
2396 | case Intrinsic::mips_dpsqx_sa_w_ph: |
2397 | return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_SA_W_PH); |
2398 | case Intrinsic::mips_ld_b: |
2399 | case Intrinsic::mips_ld_h: |
2400 | case Intrinsic::mips_ld_w: |
2401 | case Intrinsic::mips_ld_d: |
2402 | return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); |
2403 | } |
2404 | } |
2405 | |
2406 | static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, |
2407 | const MipsSubtarget &Subtarget) { |
2408 | SDLoc DL(Op); |
2409 | SDValue ChainIn = Op->getOperand(Num: 0); |
2410 | SDValue Value = Op->getOperand(Num: 2); |
2411 | SDValue Address = Op->getOperand(Num: 3); |
2412 | SDValue Offset = Op->getOperand(Num: 4); |
2413 | EVT PtrTy = Address->getValueType(ResNo: 0); |
2414 | |
2415 | // For N64 addresses have the underlying type MVT::i64. This intrinsic |
2416 | // however takes an i32 signed constant offset. The actual type of the |
2417 | // intrinsic is a scaled signed i10. |
2418 | if (Subtarget.isABI_N64()) |
2419 | Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset); |
2420 | |
2421 | Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset); |
2422 | |
2423 | return DAG.getStore(Chain: ChainIn, dl: DL, Val: Value, Ptr: Address, PtrInfo: MachinePointerInfo(), |
2424 | Alignment: Align(16)); |
2425 | } |
2426 | |
2427 | SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, |
2428 | SelectionDAG &DAG) const { |
2429 | unsigned Intr = Op->getConstantOperandVal(Num: 1); |
2430 | switch (Intr) { |
2431 | default: |
2432 | return SDValue(); |
2433 | case Intrinsic::mips_st_b: |
2434 | case Intrinsic::mips_st_h: |
2435 | case Intrinsic::mips_st_w: |
2436 | case Intrinsic::mips_st_d: |
2437 | return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); |
2438 | } |
2439 | } |
2440 | |
2441 | // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. |
2442 | // |
2443 | // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We |
2444 | // choose to sign-extend but we could have equally chosen zero-extend. The |
2445 | // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT |
2446 | // result into this node later (possibly changing it to a zero-extend in the |
2447 | // process). |
2448 | SDValue MipsSETargetLowering:: |
2449 | (SDValue Op, SelectionDAG &DAG) const { |
2450 | SDLoc DL(Op); |
2451 | EVT ResTy = Op->getValueType(ResNo: 0); |
2452 | SDValue Op0 = Op->getOperand(Num: 0); |
2453 | EVT VecTy = Op0->getValueType(ResNo: 0); |
2454 | |
2455 | if (!VecTy.is128BitVector()) |
2456 | return SDValue(); |
2457 | |
2458 | if (ResTy.isInteger()) { |
2459 | SDValue Op1 = Op->getOperand(Num: 1); |
2460 | EVT EltTy = VecTy.getVectorElementType(); |
2461 | return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL, VT: ResTy, N1: Op0, N2: Op1, |
2462 | N3: DAG.getValueType(EltTy)); |
2463 | } |
2464 | |
2465 | return Op; |
2466 | } |
2467 | |
2468 | static bool isConstantOrUndef(const SDValue Op) { |
2469 | if (Op->isUndef()) |
2470 | return true; |
2471 | if (isa<ConstantSDNode>(Val: Op)) |
2472 | return true; |
2473 | if (isa<ConstantFPSDNode>(Val: Op)) |
2474 | return true; |
2475 | return false; |
2476 | } |
2477 | |
2478 | static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { |
2479 | for (unsigned i = 0; i < Op->getNumOperands(); ++i) |
2480 | if (isConstantOrUndef(Op: Op->getOperand(Num: i))) |
2481 | return true; |
2482 | return false; |
2483 | } |
2484 | |
2485 | // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the |
2486 | // backend. |
2487 | // |
2488 | // Lowers according to the following rules: |
2489 | // - Constant splats are legal as-is as long as the SplatBitSize is a power of |
2490 | // 2 less than or equal to 64 and the value fits into a signed 10-bit |
2491 | // immediate |
2492 | // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize |
2493 | // is a power of 2 less than or equal to 64 and the value does not fit into a |
2494 | // signed 10-bit immediate |
2495 | // - Non-constant splats are legal as-is. |
2496 | // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. |
2497 | // - All others are illegal and must be expanded. |
2498 | SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, |
2499 | SelectionDAG &DAG) const { |
2500 | BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op); |
2501 | EVT ResTy = Op->getValueType(ResNo: 0); |
2502 | SDLoc DL(Op); |
2503 | APInt SplatValue, SplatUndef; |
2504 | unsigned SplatBitSize; |
2505 | bool HasAnyUndefs; |
2506 | |
2507 | if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) |
2508 | return SDValue(); |
2509 | |
2510 | if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, |
2511 | HasAnyUndefs, MinSplatBits: 8, |
2512 | isBigEndian: !Subtarget.isLittle()) && SplatBitSize <= 64) { |
2513 | // We can only cope with 8, 16, 32, or 64-bit elements |
2514 | if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && |
2515 | SplatBitSize != 64) |
2516 | return SDValue(); |
2517 | |
2518 | // If the value isn't an integer type we will have to bitcast |
2519 | // from an integer type first. Also, if there are any undefs, we must |
2520 | // lower them to defined values first. |
2521 | if (ResTy.isInteger() && !HasAnyUndefs) |
2522 | return Op; |
2523 | |
2524 | EVT ViaVecTy; |
2525 | |
2526 | switch (SplatBitSize) { |
2527 | default: |
2528 | return SDValue(); |
2529 | case 8: |
2530 | ViaVecTy = MVT::v16i8; |
2531 | break; |
2532 | case 16: |
2533 | ViaVecTy = MVT::v8i16; |
2534 | break; |
2535 | case 32: |
2536 | ViaVecTy = MVT::v4i32; |
2537 | break; |
2538 | case 64: |
2539 | // There's no fill.d to fall back on for 64-bit values |
2540 | return SDValue(); |
2541 | } |
2542 | |
2543 | // SelectionDAG::getConstant will promote SplatValue appropriately. |
2544 | SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy); |
2545 | |
2546 | // Bitcast to the type we originally wanted |
2547 | if (ViaVecTy != ResTy) |
2548 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result); |
2549 | |
2550 | return Result; |
2551 | } else if (DAG.isSplatValue(V: Op, /* AllowUndefs */ false)) |
2552 | return Op; |
2553 | else if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) { |
2554 | // Use INSERT_VECTOR_ELT operations rather than expand to stores. |
2555 | // The resulting code is the same length as the expansion, but it doesn't |
2556 | // use memory operations |
2557 | EVT ResTy = Node->getValueType(ResNo: 0); |
2558 | |
2559 | assert(ResTy.isVector()); |
2560 | |
2561 | unsigned NumElts = ResTy.getVectorNumElements(); |
2562 | SDValue Vector = DAG.getUNDEF(VT: ResTy); |
2563 | for (unsigned i = 0; i < NumElts; ++i) { |
2564 | Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector, |
2565 | N2: Node->getOperand(Num: i), |
2566 | N3: DAG.getConstant(Val: i, DL, VT: MVT::i32)); |
2567 | } |
2568 | return Vector; |
2569 | } |
2570 | |
2571 | return SDValue(); |
2572 | } |
2573 | |
2574 | // Lower VECTOR_SHUFFLE into SHF (if possible). |
2575 | // |
2576 | // SHF splits the vector into blocks of four elements, then shuffles these |
2577 | // elements according to a <4 x i2> constant (encoded as an integer immediate). |
2578 | // |
2579 | // It is therefore possible to lower into SHF when the mask takes the form: |
2580 | // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> |
2581 | // When undef's appear they are treated as if they were whatever value is |
2582 | // necessary in order to fit the above forms. |
2583 | // |
2584 | // For example: |
2585 | // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, |
2586 | // <8 x i32> <i32 3, i32 2, i32 1, i32 0, |
2587 | // i32 7, i32 6, i32 5, i32 4> |
2588 | // is lowered to: |
2589 | // (SHF_H $w0, $w1, 27) |
2590 | // where the 27 comes from: |
2591 | // 3 + (2 << 2) + (1 << 4) + (0 << 6) |
2592 | static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, |
2593 | SmallVector<int, 16> Indices, |
2594 | SelectionDAG &DAG) { |
2595 | int SHFIndices[4] = { -1, -1, -1, -1 }; |
2596 | |
2597 | if (Indices.size() < 4) |
2598 | return SDValue(); |
2599 | |
2600 | for (unsigned i = 0; i < 4; ++i) { |
2601 | for (unsigned j = i; j < Indices.size(); j += 4) { |
2602 | int Idx = Indices[j]; |
2603 | |
2604 | // Convert from vector index to 4-element subvector index |
2605 | // If an index refers to an element outside of the subvector then give up |
2606 | if (Idx != -1) { |
2607 | Idx -= 4 * (j / 4); |
2608 | if (Idx < 0 || Idx >= 4) |
2609 | return SDValue(); |
2610 | } |
2611 | |
2612 | // If the mask has an undef, replace it with the current index. |
2613 | // Note that it might still be undef if the current index is also undef |
2614 | if (SHFIndices[i] == -1) |
2615 | SHFIndices[i] = Idx; |
2616 | |
2617 | // Check that non-undef values are the same as in the mask. If they |
2618 | // aren't then give up |
2619 | if (!(Idx == -1 || Idx == SHFIndices[i])) |
2620 | return SDValue(); |
2621 | } |
2622 | } |
2623 | |
2624 | // Calculate the immediate. Replace any remaining undefs with zero |
2625 | APInt Imm(32, 0); |
2626 | for (int i = 3; i >= 0; --i) { |
2627 | int Idx = SHFIndices[i]; |
2628 | |
2629 | if (Idx == -1) |
2630 | Idx = 0; |
2631 | |
2632 | Imm <<= 2; |
2633 | Imm |= Idx & 0x3; |
2634 | } |
2635 | |
2636 | SDLoc DL(Op); |
2637 | return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: ResTy, |
2638 | N1: DAG.getTargetConstant(Val: Imm, DL, VT: MVT::i32), |
2639 | N2: Op->getOperand(Num: 0)); |
2640 | } |
2641 | |
2642 | /// Determine whether a range fits a regular pattern of values. |
2643 | /// This function accounts for the possibility of jumping over the End iterator. |
2644 | template <typename ValType> |
2645 | static bool |
2646 | fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, |
2647 | unsigned CheckStride, |
2648 | typename SmallVectorImpl<ValType>::const_iterator End, |
2649 | ValType ExpectedIndex, unsigned ExpectedIndexStride) { |
2650 | auto &I = Begin; |
2651 | |
2652 | while (I != End) { |
2653 | if (*I != -1 && *I != ExpectedIndex) |
2654 | return false; |
2655 | ExpectedIndex += ExpectedIndexStride; |
2656 | |
2657 | // Incrementing past End is undefined behaviour so we must increment one |
2658 | // step at a time and check for End at each step. |
2659 | for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) |
2660 | ; // Empty loop body. |
2661 | } |
2662 | return true; |
2663 | } |
2664 | |
2665 | // Determine whether VECTOR_SHUFFLE is a SPLATI. |
2666 | // |
2667 | // It is a SPLATI when the mask is: |
2668 | // <x, x, x, ...> |
2669 | // where x is any valid index. |
2670 | // |
2671 | // When undef's appear in the mask they are treated as if they were whatever |
2672 | // value is necessary in order to fit the above form. |
2673 | static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, |
2674 | SmallVector<int, 16> Indices, |
2675 | SelectionDAG &DAG) { |
2676 | assert((Indices.size() % 2) == 0); |
2677 | |
2678 | int SplatIndex = -1; |
2679 | for (const auto &V : Indices) { |
2680 | if (V != -1) { |
2681 | SplatIndex = V; |
2682 | break; |
2683 | } |
2684 | } |
2685 | |
2686 | return fitsRegularPattern<int>(Begin: Indices.begin(), CheckStride: 1, End: Indices.end(), ExpectedIndex: SplatIndex, |
2687 | ExpectedIndexStride: 0); |
2688 | } |
2689 | |
2690 | // Lower VECTOR_SHUFFLE into ILVEV (if possible). |
2691 | // |
2692 | // ILVEV interleaves the even elements from each vector. |
2693 | // |
2694 | // It is possible to lower into ILVEV when the mask consists of two of the |
2695 | // following forms interleaved: |
2696 | // <0, 2, 4, ...> |
2697 | // <n, n+2, n+4, ...> |
2698 | // where n is the number of elements in the vector. |
2699 | // For example: |
2700 | // <0, 0, 2, 2, 4, 4, ...> |
2701 | // <0, n, 2, n+2, 4, n+4, ...> |
2702 | // |
2703 | // When undef's appear in the mask they are treated as if they were whatever |
2704 | // value is necessary in order to fit the above forms. |
2705 | static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, |
2706 | SmallVector<int, 16> Indices, |
2707 | SelectionDAG &DAG) { |
2708 | assert((Indices.size() % 2) == 0); |
2709 | |
2710 | SDValue Wt; |
2711 | SDValue Ws; |
2712 | const auto &Begin = Indices.begin(); |
2713 | const auto &End = Indices.end(); |
2714 | |
2715 | // Check even elements are taken from the even elements of one half or the |
2716 | // other and pick an operand accordingly. |
2717 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2718 | Wt = Op->getOperand(Num: 0); |
2719 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2720 | Wt = Op->getOperand(Num: 1); |
2721 | else |
2722 | return SDValue(); |
2723 | |
2724 | // Check odd elements are taken from the even elements of one half or the |
2725 | // other and pick an operand accordingly. |
2726 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2727 | Ws = Op->getOperand(Num: 0); |
2728 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2729 | Ws = Op->getOperand(Num: 1); |
2730 | else |
2731 | return SDValue(); |
2732 | |
2733 | return DAG.getNode(Opcode: MipsISD::ILVEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2734 | } |
2735 | |
2736 | // Lower VECTOR_SHUFFLE into ILVOD (if possible). |
2737 | // |
2738 | // ILVOD interleaves the odd elements from each vector. |
2739 | // |
2740 | // It is possible to lower into ILVOD when the mask consists of two of the |
2741 | // following forms interleaved: |
2742 | // <1, 3, 5, ...> |
2743 | // <n+1, n+3, n+5, ...> |
2744 | // where n is the number of elements in the vector. |
2745 | // For example: |
2746 | // <1, 1, 3, 3, 5, 5, ...> |
2747 | // <1, n+1, 3, n+3, 5, n+5, ...> |
2748 | // |
2749 | // When undef's appear in the mask they are treated as if they were whatever |
2750 | // value is necessary in order to fit the above forms. |
2751 | static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, |
2752 | SmallVector<int, 16> Indices, |
2753 | SelectionDAG &DAG) { |
2754 | assert((Indices.size() % 2) == 0); |
2755 | |
2756 | SDValue Wt; |
2757 | SDValue Ws; |
2758 | const auto &Begin = Indices.begin(); |
2759 | const auto &End = Indices.end(); |
2760 | |
2761 | // Check even elements are taken from the odd elements of one half or the |
2762 | // other and pick an operand accordingly. |
2763 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2764 | Wt = Op->getOperand(Num: 0); |
2765 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2766 | Wt = Op->getOperand(Num: 1); |
2767 | else |
2768 | return SDValue(); |
2769 | |
2770 | // Check odd elements are taken from the odd elements of one half or the |
2771 | // other and pick an operand accordingly. |
2772 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2773 | Ws = Op->getOperand(Num: 0); |
2774 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2775 | Ws = Op->getOperand(Num: 1); |
2776 | else |
2777 | return SDValue(); |
2778 | |
2779 | return DAG.getNode(Opcode: MipsISD::ILVOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2780 | } |
2781 | |
2782 | // Lower VECTOR_SHUFFLE into ILVR (if possible). |
2783 | // |
2784 | // ILVR interleaves consecutive elements from the right (lowest-indexed) half of |
2785 | // each vector. |
2786 | // |
2787 | // It is possible to lower into ILVR when the mask consists of two of the |
2788 | // following forms interleaved: |
2789 | // <0, 1, 2, ...> |
2790 | // <n, n+1, n+2, ...> |
2791 | // where n is the number of elements in the vector. |
2792 | // For example: |
2793 | // <0, 0, 1, 1, 2, 2, ...> |
2794 | // <0, n, 1, n+1, 2, n+2, ...> |
2795 | // |
2796 | // When undef's appear in the mask they are treated as if they were whatever |
2797 | // value is necessary in order to fit the above forms. |
2798 | static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, |
2799 | SmallVector<int, 16> Indices, |
2800 | SelectionDAG &DAG) { |
2801 | assert((Indices.size() % 2) == 0); |
2802 | |
2803 | SDValue Wt; |
2804 | SDValue Ws; |
2805 | const auto &Begin = Indices.begin(); |
2806 | const auto &End = Indices.end(); |
2807 | |
2808 | // Check even elements are taken from the right (lowest-indexed) elements of |
2809 | // one half or the other and pick an operand accordingly. |
2810 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
2811 | Wt = Op->getOperand(Num: 0); |
2812 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1)) |
2813 | Wt = Op->getOperand(Num: 1); |
2814 | else |
2815 | return SDValue(); |
2816 | |
2817 | // Check odd elements are taken from the right (lowest-indexed) elements of |
2818 | // one half or the other and pick an operand accordingly. |
2819 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1)) |
2820 | Ws = Op->getOperand(Num: 0); |
2821 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1)) |
2822 | Ws = Op->getOperand(Num: 1); |
2823 | else |
2824 | return SDValue(); |
2825 | |
2826 | return DAG.getNode(Opcode: MipsISD::ILVR, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2827 | } |
2828 | |
2829 | // Lower VECTOR_SHUFFLE into ILVL (if possible). |
2830 | // |
2831 | // ILVL interleaves consecutive elements from the left (highest-indexed) half |
2832 | // of each vector. |
2833 | // |
2834 | // It is possible to lower into ILVL when the mask consists of two of the |
2835 | // following forms interleaved: |
2836 | // <x, x+1, x+2, ...> |
2837 | // <n+x, n+x+1, n+x+2, ...> |
2838 | // where n is the number of elements in the vector and x is half n. |
2839 | // For example: |
2840 | // <x, x, x+1, x+1, x+2, x+2, ...> |
2841 | // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> |
2842 | // |
2843 | // When undef's appear in the mask they are treated as if they were whatever |
2844 | // value is necessary in order to fit the above forms. |
2845 | static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, |
2846 | SmallVector<int, 16> Indices, |
2847 | SelectionDAG &DAG) { |
2848 | assert((Indices.size() % 2) == 0); |
2849 | |
2850 | unsigned HalfSize = Indices.size() / 2; |
2851 | SDValue Wt; |
2852 | SDValue Ws; |
2853 | const auto &Begin = Indices.begin(); |
2854 | const auto &End = Indices.end(); |
2855 | |
2856 | // Check even elements are taken from the left (highest-indexed) elements of |
2857 | // one half or the other and pick an operand accordingly. |
2858 | if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
2859 | Wt = Op->getOperand(Num: 0); |
2860 | else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, ExpectedIndexStride: 1)) |
2861 | Wt = Op->getOperand(Num: 1); |
2862 | else |
2863 | return SDValue(); |
2864 | |
2865 | // Check odd elements are taken from the left (highest-indexed) elements of |
2866 | // one half or the other and pick an operand accordingly. |
2867 | if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1)) |
2868 | Ws = Op->getOperand(Num: 0); |
2869 | else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, |
2870 | ExpectedIndexStride: 1)) |
2871 | Ws = Op->getOperand(Num: 1); |
2872 | else |
2873 | return SDValue(); |
2874 | |
2875 | return DAG.getNode(Opcode: MipsISD::ILVL, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2876 | } |
2877 | |
2878 | // Lower VECTOR_SHUFFLE into PCKEV (if possible). |
2879 | // |
2880 | // PCKEV copies the even elements of each vector into the result vector. |
2881 | // |
2882 | // It is possible to lower into PCKEV when the mask consists of two of the |
2883 | // following forms concatenated: |
2884 | // <0, 2, 4, ...> |
2885 | // <n, n+2, n+4, ...> |
2886 | // where n is the number of elements in the vector. |
2887 | // For example: |
2888 | // <0, 2, 4, ..., 0, 2, 4, ...> |
2889 | // <0, 2, 4, ..., n, n+2, n+4, ...> |
2890 | // |
2891 | // When undef's appear in the mask they are treated as if they were whatever |
2892 | // value is necessary in order to fit the above forms. |
2893 | static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, |
2894 | SmallVector<int, 16> Indices, |
2895 | SelectionDAG &DAG) { |
2896 | assert((Indices.size() % 2) == 0); |
2897 | |
2898 | SDValue Wt; |
2899 | SDValue Ws; |
2900 | const auto &Begin = Indices.begin(); |
2901 | const auto &Mid = Indices.begin() + Indices.size() / 2; |
2902 | const auto &End = Indices.end(); |
2903 | |
2904 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2905 | Wt = Op->getOperand(Num: 0); |
2906 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2907 | Wt = Op->getOperand(Num: 1); |
2908 | else |
2909 | return SDValue(); |
2910 | |
2911 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2)) |
2912 | Ws = Op->getOperand(Num: 0); |
2913 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2)) |
2914 | Ws = Op->getOperand(Num: 1); |
2915 | else |
2916 | return SDValue(); |
2917 | |
2918 | return DAG.getNode(Opcode: MipsISD::PCKEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2919 | } |
2920 | |
2921 | // Lower VECTOR_SHUFFLE into PCKOD (if possible). |
2922 | // |
2923 | // PCKOD copies the odd elements of each vector into the result vector. |
2924 | // |
2925 | // It is possible to lower into PCKOD when the mask consists of two of the |
2926 | // following forms concatenated: |
2927 | // <1, 3, 5, ...> |
2928 | // <n+1, n+3, n+5, ...> |
2929 | // where n is the number of elements in the vector. |
2930 | // For example: |
2931 | // <1, 3, 5, ..., 1, 3, 5, ...> |
2932 | // <1, 3, 5, ..., n+1, n+3, n+5, ...> |
2933 | // |
2934 | // When undef's appear in the mask they are treated as if they were whatever |
2935 | // value is necessary in order to fit the above forms. |
2936 | static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, |
2937 | SmallVector<int, 16> Indices, |
2938 | SelectionDAG &DAG) { |
2939 | assert((Indices.size() % 2) == 0); |
2940 | |
2941 | SDValue Wt; |
2942 | SDValue Ws; |
2943 | const auto &Begin = Indices.begin(); |
2944 | const auto &Mid = Indices.begin() + Indices.size() / 2; |
2945 | const auto &End = Indices.end(); |
2946 | |
2947 | if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2948 | Wt = Op->getOperand(Num: 0); |
2949 | else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2950 | Wt = Op->getOperand(Num: 1); |
2951 | else |
2952 | return SDValue(); |
2953 | |
2954 | if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2)) |
2955 | Ws = Op->getOperand(Num: 0); |
2956 | else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2)) |
2957 | Ws = Op->getOperand(Num: 1); |
2958 | else |
2959 | return SDValue(); |
2960 | |
2961 | return DAG.getNode(Opcode: MipsISD::PCKOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt); |
2962 | } |
2963 | |
2964 | // Lower VECTOR_SHUFFLE into VSHF. |
2965 | // |
2966 | // This mostly consists of converting the shuffle indices in Indices into a |
2967 | // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is |
2968 | // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, |
2969 | // if the type is v8i16 and all the indices are less than 8 then the second |
2970 | // operand is unused and can be replaced with anything. We choose to replace it |
2971 | // with the used operand since this reduces the number of instructions overall. |
2972 | // |
2973 | // NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats |
2974 | // UNDEFs as same as SPLATI index. |
2975 | // For other instances we use the last valid index if UNDEF is |
2976 | // encountered. |
2977 | static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, |
2978 | const SmallVector<int, 16> &Indices, |
2979 | const bool isSPLATI, |
2980 | SelectionDAG &DAG) { |
2981 | SmallVector<SDValue, 16> Ops; |
2982 | SDValue Op0; |
2983 | SDValue Op1; |
2984 | EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); |
2985 | EVT MaskEltTy = MaskVecTy.getVectorElementType(); |
2986 | bool Using1stVec = false; |
2987 | bool Using2ndVec = false; |
2988 | SDLoc DL(Op); |
2989 | int ResTyNumElts = ResTy.getVectorNumElements(); |
2990 | |
2991 | assert(Indices[0] >= 0 && |
2992 | "shuffle mask starts with an UNDEF, which is not expected" ); |
2993 | |
2994 | for (int i = 0; i < ResTyNumElts; ++i) { |
2995 | // Idx == -1 means UNDEF |
2996 | int Idx = Indices[i]; |
2997 | |
2998 | if (0 <= Idx && Idx < ResTyNumElts) |
2999 | Using1stVec = true; |
3000 | if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) |
3001 | Using2ndVec = true; |
3002 | } |
3003 | int LastValidIndex = 0; |
3004 | for (size_t i = 0; i < Indices.size(); i++) { |
3005 | int Idx = Indices[i]; |
3006 | if (Idx < 0) { |
3007 | // Continue using splati index or use the last valid index. |
3008 | Idx = isSPLATI ? Indices[0] : LastValidIndex; |
3009 | } else { |
3010 | LastValidIndex = Idx; |
3011 | } |
3012 | Ops.push_back(Elt: DAG.getTargetConstant(Val: Idx, DL, VT: MaskEltTy)); |
3013 | } |
3014 | |
3015 | SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops); |
3016 | |
3017 | if (Using1stVec && Using2ndVec) { |
3018 | Op0 = Op->getOperand(Num: 0); |
3019 | Op1 = Op->getOperand(Num: 1); |
3020 | } else if (Using1stVec) |
3021 | Op0 = Op1 = Op->getOperand(Num: 0); |
3022 | else if (Using2ndVec) |
3023 | Op0 = Op1 = Op->getOperand(Num: 1); |
3024 | else |
3025 | llvm_unreachable("shuffle vector mask references neither vector operand?" ); |
3026 | |
3027 | // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. |
3028 | // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> |
3029 | // VSHF concatenates the vectors in a bitwise fashion: |
3030 | // <0b00, 0b01> + <0b10, 0b11> -> |
3031 | // 0b0100 + 0b1110 -> 0b01001110 |
3032 | // <0b10, 0b11, 0b00, 0b01> |
3033 | // We must therefore swap the operands to get the correct result. |
3034 | return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: ResTy, N1: MaskVec, N2: Op1, N3: Op0); |
3035 | } |
3036 | |
3037 | // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the |
3038 | // indices in the shuffle. |
3039 | SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
3040 | SelectionDAG &DAG) const { |
3041 | ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Val&: Op); |
3042 | EVT ResTy = Op->getValueType(ResNo: 0); |
3043 | |
3044 | if (!ResTy.is128BitVector()) |
3045 | return SDValue(); |
3046 | |
3047 | int ResTyNumElts = ResTy.getVectorNumElements(); |
3048 | SmallVector<int, 16> Indices; |
3049 | |
3050 | for (int i = 0; i < ResTyNumElts; ++i) |
3051 | Indices.push_back(Elt: Node->getMaskElt(Idx: i)); |
3052 | |
3053 | // splati.[bhwd] is preferable to the others but is matched from |
3054 | // MipsISD::VSHF. |
3055 | if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) |
3056 | return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: true, DAG); |
3057 | SDValue Result; |
3058 | if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) |
3059 | return Result; |
3060 | if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) |
3061 | return Result; |
3062 | if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) |
3063 | return Result; |
3064 | if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) |
3065 | return Result; |
3066 | if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) |
3067 | return Result; |
3068 | if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) |
3069 | return Result; |
3070 | if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) |
3071 | return Result; |
3072 | return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: false, DAG); |
3073 | } |
3074 | |
3075 | MachineBasicBlock * |
3076 | MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, |
3077 | MachineBasicBlock *BB) const { |
3078 | // $bb: |
3079 | // bposge32_pseudo $vr0 |
3080 | // => |
3081 | // $bb: |
3082 | // bposge32 $tbb |
3083 | // $fbb: |
3084 | // li $vr2, 0 |
3085 | // b $sink |
3086 | // $tbb: |
3087 | // li $vr1, 1 |
3088 | // $sink: |
3089 | // $vr0 = phi($vr2, $fbb, $vr1, $tbb) |
3090 | |
3091 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3092 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3093 | const TargetRegisterClass *RC = &Mips::GPR32RegClass; |
3094 | DebugLoc DL = MI.getDebugLoc(); |
3095 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
3096 | MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB)); |
3097 | MachineFunction *F = BB->getParent(); |
3098 | MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3099 | MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3100 | MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3101 | F->insert(MBBI: It, MBB: FBB); |
3102 | F->insert(MBBI: It, MBB: TBB); |
3103 | F->insert(MBBI: It, MBB: Sink); |
3104 | |
3105 | // Transfer the remainder of BB and its successor edges to Sink. |
3106 | Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)), |
3107 | To: BB->end()); |
3108 | Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
3109 | |
3110 | // Add successors. |
3111 | BB->addSuccessor(Succ: FBB); |
3112 | BB->addSuccessor(Succ: TBB); |
3113 | FBB->addSuccessor(Succ: Sink); |
3114 | TBB->addSuccessor(Succ: Sink); |
3115 | |
3116 | // Insert the real bposge32 instruction to $BB. |
3117 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32)).addMBB(MBB: TBB); |
3118 | // Insert the real bposge32c instruction to $BB. |
3119 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32C_MMR3)).addMBB(MBB: TBB); |
3120 | |
3121 | // Fill $FBB. |
3122 | Register VR2 = RegInfo.createVirtualRegister(RegClass: RC); |
3123 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR2) |
3124 | .addReg(RegNo: Mips::ZERO).addImm(Val: 0); |
3125 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink); |
3126 | |
3127 | // Fill $TBB. |
3128 | Register VR1 = RegInfo.createVirtualRegister(RegClass: RC); |
3129 | BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR1) |
3130 | .addReg(RegNo: Mips::ZERO).addImm(Val: 1); |
3131 | |
3132 | // Insert phi function to $Sink. |
3133 | BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI), |
3134 | DestReg: MI.getOperand(i: 0).getReg()) |
3135 | .addReg(RegNo: VR2) |
3136 | .addMBB(MBB: FBB) |
3137 | .addReg(RegNo: VR1) |
3138 | .addMBB(MBB: TBB); |
3139 | |
3140 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3141 | return Sink; |
3142 | } |
3143 | |
3144 | MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( |
3145 | MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { |
3146 | // $bb: |
3147 | // vany_nonzero $rd, $ws |
3148 | // => |
3149 | // $bb: |
3150 | // bnz.b $ws, $tbb |
3151 | // b $fbb |
3152 | // $fbb: |
3153 | // li $rd1, 0 |
3154 | // b $sink |
3155 | // $tbb: |
3156 | // li $rd2, 1 |
3157 | // $sink: |
3158 | // $rd = phi($rd1, $fbb, $rd2, $tbb) |
3159 | |
3160 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3161 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3162 | const TargetRegisterClass *RC = &Mips::GPR32RegClass; |
3163 | DebugLoc DL = MI.getDebugLoc(); |
3164 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
3165 | MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB)); |
3166 | MachineFunction *F = BB->getParent(); |
3167 | MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3168 | MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3169 | MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB); |
3170 | F->insert(MBBI: It, MBB: FBB); |
3171 | F->insert(MBBI: It, MBB: TBB); |
3172 | F->insert(MBBI: It, MBB: Sink); |
3173 | |
3174 | // Transfer the remainder of BB and its successor edges to Sink. |
3175 | Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)), |
3176 | To: BB->end()); |
3177 | Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB); |
3178 | |
3179 | // Add successors. |
3180 | BB->addSuccessor(Succ: FBB); |
3181 | BB->addSuccessor(Succ: TBB); |
3182 | FBB->addSuccessor(Succ: Sink); |
3183 | TBB->addSuccessor(Succ: Sink); |
3184 | |
3185 | // Insert the real bnz.b instruction to $BB. |
3186 | BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: BranchOp)) |
3187 | .addReg(RegNo: MI.getOperand(i: 1).getReg()) |
3188 | .addMBB(MBB: TBB); |
3189 | |
3190 | // Fill $FBB. |
3191 | Register RD1 = RegInfo.createVirtualRegister(RegClass: RC); |
3192 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD1) |
3193 | .addReg(RegNo: Mips::ZERO).addImm(Val: 0); |
3194 | BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink); |
3195 | |
3196 | // Fill $TBB. |
3197 | Register RD2 = RegInfo.createVirtualRegister(RegClass: RC); |
3198 | BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD2) |
3199 | .addReg(RegNo: Mips::ZERO).addImm(Val: 1); |
3200 | |
3201 | // Insert phi function to $Sink. |
3202 | BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI), |
3203 | DestReg: MI.getOperand(i: 0).getReg()) |
3204 | .addReg(RegNo: RD1) |
3205 | .addMBB(MBB: FBB) |
3206 | .addReg(RegNo: RD2) |
3207 | .addMBB(MBB: TBB); |
3208 | |
3209 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3210 | return Sink; |
3211 | } |
3212 | |
3213 | // Emit the COPY_FW pseudo instruction. |
3214 | // |
3215 | // copy_fw_pseudo $fd, $ws, n |
3216 | // => |
3217 | // copy_u_w $rt, $ws, $n |
3218 | // mtc1 $rt, $fd |
3219 | // |
3220 | // When n is zero, the equivalent operation can be performed with (potentially) |
3221 | // zero instructions due to register overlaps. This optimization is never valid |
3222 | // for lane 1 because it would require FR=0 mode which isn't supported by MSA. |
3223 | MachineBasicBlock * |
3224 | MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, |
3225 | MachineBasicBlock *BB) const { |
3226 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3227 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3228 | DebugLoc DL = MI.getDebugLoc(); |
3229 | Register Fd = MI.getOperand(i: 0).getReg(); |
3230 | Register Ws = MI.getOperand(i: 1).getReg(); |
3231 | unsigned Lane = MI.getOperand(i: 2).getImm(); |
3232 | |
3233 | if (Lane == 0) { |
3234 | unsigned Wt = Ws; |
3235 | if (!Subtarget.useOddSPReg()) { |
3236 | // We must copy to an even-numbered MSA register so that the |
3237 | // single-precision sub-register is also guaranteed to be even-numbered. |
3238 | Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WEvensRegClass); |
3239 | |
3240 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Wt).addReg(RegNo: Ws); |
3241 | } |
3242 | |
3243 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, flags: 0, SubReg: Mips::sub_lo); |
3244 | } else { |
3245 | Register Wt = RegInfo.createVirtualRegister( |
3246 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3247 | : &Mips::MSA128WEvensRegClass); |
3248 | |
3249 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: Lane); |
3250 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, flags: 0, SubReg: Mips::sub_lo); |
3251 | } |
3252 | |
3253 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3254 | return BB; |
3255 | } |
3256 | |
3257 | // Emit the COPY_FD pseudo instruction. |
3258 | // |
3259 | // copy_fd_pseudo $fd, $ws, n |
3260 | // => |
3261 | // splati.d $wt, $ws, $n |
3262 | // copy $fd, $wt:sub_64 |
3263 | // |
3264 | // When n is zero, the equivalent operation can be performed with (potentially) |
3265 | // zero instructions due to register overlaps. This optimization is always |
3266 | // valid because FR=1 mode which is the only supported mode in MSA. |
3267 | MachineBasicBlock * |
3268 | MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, |
3269 | MachineBasicBlock *BB) const { |
3270 | assert(Subtarget.isFP64bit()); |
3271 | |
3272 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3273 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3274 | Register Fd = MI.getOperand(i: 0).getReg(); |
3275 | Register Ws = MI.getOperand(i: 1).getReg(); |
3276 | unsigned Lane = MI.getOperand(i: 2).getImm() * 2; |
3277 | DebugLoc DL = MI.getDebugLoc(); |
3278 | |
3279 | if (Lane == 0) |
3280 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Ws, flags: 0, SubReg: Mips::sub_64); |
3281 | else { |
3282 | Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3283 | |
3284 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: 1); |
3285 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, flags: 0, SubReg: Mips::sub_64); |
3286 | } |
3287 | |
3288 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3289 | return BB; |
3290 | } |
3291 | |
3292 | // Emit the INSERT_FW pseudo instruction. |
3293 | // |
3294 | // insert_fw_pseudo $wd, $wd_in, $n, $fs |
3295 | // => |
3296 | // subreg_to_reg $wt:sub_lo, $fs |
3297 | // insve_w $wd[$n], $wd_in, $wt[0] |
3298 | MachineBasicBlock * |
3299 | MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, |
3300 | MachineBasicBlock *BB) const { |
3301 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3302 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3303 | DebugLoc DL = MI.getDebugLoc(); |
3304 | Register Wd = MI.getOperand(i: 0).getReg(); |
3305 | Register Wd_in = MI.getOperand(i: 1).getReg(); |
3306 | unsigned Lane = MI.getOperand(i: 2).getImm(); |
3307 | Register Fs = MI.getOperand(i: 3).getReg(); |
3308 | Register Wt = RegInfo.createVirtualRegister( |
3309 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3310 | : &Mips::MSA128WEvensRegClass); |
3311 | |
3312 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt) |
3313 | .addImm(Val: 0) |
3314 | .addReg(RegNo: Fs) |
3315 | .addImm(Val: Mips::sub_lo); |
3316 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_W), DestReg: Wd) |
3317 | .addReg(RegNo: Wd_in) |
3318 | .addImm(Val: Lane) |
3319 | .addReg(RegNo: Wt) |
3320 | .addImm(Val: 0); |
3321 | |
3322 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3323 | return BB; |
3324 | } |
3325 | |
3326 | // Emit the INSERT_FD pseudo instruction. |
3327 | // |
3328 | // insert_fd_pseudo $wd, $fs, n |
3329 | // => |
3330 | // subreg_to_reg $wt:sub_64, $fs |
3331 | // insve_d $wd[$n], $wd_in, $wt[0] |
3332 | MachineBasicBlock * |
3333 | MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, |
3334 | MachineBasicBlock *BB) const { |
3335 | assert(Subtarget.isFP64bit()); |
3336 | |
3337 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3338 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3339 | DebugLoc DL = MI.getDebugLoc(); |
3340 | Register Wd = MI.getOperand(i: 0).getReg(); |
3341 | Register Wd_in = MI.getOperand(i: 1).getReg(); |
3342 | unsigned Lane = MI.getOperand(i: 2).getImm(); |
3343 | Register Fs = MI.getOperand(i: 3).getReg(); |
3344 | Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3345 | |
3346 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt) |
3347 | .addImm(Val: 0) |
3348 | .addReg(RegNo: Fs) |
3349 | .addImm(Val: Mips::sub_64); |
3350 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_D), DestReg: Wd) |
3351 | .addReg(RegNo: Wd_in) |
3352 | .addImm(Val: Lane) |
3353 | .addReg(RegNo: Wt) |
3354 | .addImm(Val: 0); |
3355 | |
3356 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3357 | return BB; |
3358 | } |
3359 | |
3360 | // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. |
3361 | // |
3362 | // For integer: |
3363 | // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) |
3364 | // => |
3365 | // (SLL $lanetmp1, $lane, <log2size) |
3366 | // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) |
3367 | // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) |
3368 | // (NEG $lanetmp2, $lanetmp1) |
3369 | // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) |
3370 | // |
3371 | // For floating point: |
3372 | // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) |
3373 | // => |
3374 | // (SUBREG_TO_REG $wt, $fs, <subreg>) |
3375 | // (SLL $lanetmp1, $lane, <log2size) |
3376 | // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) |
3377 | // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) |
3378 | // (NEG $lanetmp2, $lanetmp1) |
3379 | // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) |
3380 | MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( |
3381 | MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, |
3382 | bool IsFP) const { |
3383 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3384 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3385 | DebugLoc DL = MI.getDebugLoc(); |
3386 | Register Wd = MI.getOperand(i: 0).getReg(); |
3387 | Register SrcVecReg = MI.getOperand(i: 1).getReg(); |
3388 | Register LaneReg = MI.getOperand(i: 2).getReg(); |
3389 | Register SrcValReg = MI.getOperand(i: 3).getReg(); |
3390 | |
3391 | const TargetRegisterClass *VecRC = nullptr; |
3392 | // FIXME: This should be true for N32 too. |
3393 | const TargetRegisterClass *GPRRC = |
3394 | Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; |
3395 | unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; |
3396 | unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; |
3397 | unsigned EltLog2Size; |
3398 | unsigned InsertOp = 0; |
3399 | unsigned InsveOp = 0; |
3400 | switch (EltSizeInBytes) { |
3401 | default: |
3402 | llvm_unreachable("Unexpected size" ); |
3403 | case 1: |
3404 | EltLog2Size = 0; |
3405 | InsertOp = Mips::INSERT_B; |
3406 | InsveOp = Mips::INSVE_B; |
3407 | VecRC = &Mips::MSA128BRegClass; |
3408 | break; |
3409 | case 2: |
3410 | EltLog2Size = 1; |
3411 | InsertOp = Mips::INSERT_H; |
3412 | InsveOp = Mips::INSVE_H; |
3413 | VecRC = &Mips::MSA128HRegClass; |
3414 | break; |
3415 | case 4: |
3416 | EltLog2Size = 2; |
3417 | InsertOp = Mips::INSERT_W; |
3418 | InsveOp = Mips::INSVE_W; |
3419 | VecRC = &Mips::MSA128WRegClass; |
3420 | break; |
3421 | case 8: |
3422 | EltLog2Size = 3; |
3423 | InsertOp = Mips::INSERT_D; |
3424 | InsveOp = Mips::INSVE_D; |
3425 | VecRC = &Mips::MSA128DRegClass; |
3426 | break; |
3427 | } |
3428 | |
3429 | if (IsFP) { |
3430 | Register Wt = RegInfo.createVirtualRegister(RegClass: VecRC); |
3431 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt) |
3432 | .addImm(Val: 0) |
3433 | .addReg(RegNo: SrcValReg) |
3434 | .addImm(Val: EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); |
3435 | SrcValReg = Wt; |
3436 | } |
3437 | |
3438 | // Convert the lane index into a byte index |
3439 | if (EltSizeInBytes != 1) { |
3440 | Register LaneTmp1 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3441 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: ShiftOp), DestReg: LaneTmp1) |
3442 | .addReg(RegNo: LaneReg) |
3443 | .addImm(Val: EltLog2Size); |
3444 | LaneReg = LaneTmp1; |
3445 | } |
3446 | |
3447 | // Rotate bytes around so that the desired lane is element zero |
3448 | Register WdTmp1 = RegInfo.createVirtualRegister(RegClass: VecRC); |
3449 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: WdTmp1) |
3450 | .addReg(RegNo: SrcVecReg) |
3451 | .addReg(RegNo: SrcVecReg) |
3452 | .addReg(RegNo: LaneReg, flags: 0, SubReg: SubRegIdx); |
3453 | |
3454 | Register WdTmp2 = RegInfo.createVirtualRegister(RegClass: VecRC); |
3455 | if (IsFP) { |
3456 | // Use insve.df to insert to element zero |
3457 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsveOp), DestReg: WdTmp2) |
3458 | .addReg(RegNo: WdTmp1) |
3459 | .addImm(Val: 0) |
3460 | .addReg(RegNo: SrcValReg) |
3461 | .addImm(Val: 0); |
3462 | } else { |
3463 | // Use insert.df to insert to element zero |
3464 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsertOp), DestReg: WdTmp2) |
3465 | .addReg(RegNo: WdTmp1) |
3466 | .addReg(RegNo: SrcValReg) |
3467 | .addImm(Val: 0); |
3468 | } |
3469 | |
3470 | // Rotate elements the rest of the way for a full rotation. |
3471 | // sld.df inteprets $rt modulo the number of columns so we only need to negate |
3472 | // the lane index to do this. |
3473 | Register LaneTmp2 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3474 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), |
3475 | DestReg: LaneTmp2) |
3476 | .addReg(RegNo: Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) |
3477 | .addReg(RegNo: LaneReg); |
3478 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: Wd) |
3479 | .addReg(RegNo: WdTmp2) |
3480 | .addReg(RegNo: WdTmp2) |
3481 | .addReg(RegNo: LaneTmp2, flags: 0, SubReg: SubRegIdx); |
3482 | |
3483 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3484 | return BB; |
3485 | } |
3486 | |
3487 | // Emit the FILL_FW pseudo instruction. |
3488 | // |
3489 | // fill_fw_pseudo $wd, $fs |
3490 | // => |
3491 | // implicit_def $wt1 |
3492 | // insert_subreg $wt2:subreg_lo, $wt1, $fs |
3493 | // splati.w $wd, $wt2[0] |
3494 | MachineBasicBlock * |
3495 | MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, |
3496 | MachineBasicBlock *BB) const { |
3497 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3498 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3499 | DebugLoc DL = MI.getDebugLoc(); |
3500 | Register Wd = MI.getOperand(i: 0).getReg(); |
3501 | Register Fs = MI.getOperand(i: 1).getReg(); |
3502 | Register Wt1 = RegInfo.createVirtualRegister( |
3503 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3504 | : &Mips::MSA128WEvensRegClass); |
3505 | Register Wt2 = RegInfo.createVirtualRegister( |
3506 | RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass |
3507 | : &Mips::MSA128WEvensRegClass); |
3508 | |
3509 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1); |
3510 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2) |
3511 | .addReg(RegNo: Wt1) |
3512 | .addReg(RegNo: Fs) |
3513 | .addImm(Val: Mips::sub_lo); |
3514 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0); |
3515 | |
3516 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3517 | return BB; |
3518 | } |
3519 | |
3520 | // Emit the FILL_FD pseudo instruction. |
3521 | // |
3522 | // fill_fd_pseudo $wd, $fs |
3523 | // => |
3524 | // implicit_def $wt1 |
3525 | // insert_subreg $wt2:subreg_64, $wt1, $fs |
3526 | // splati.d $wd, $wt2[0] |
3527 | MachineBasicBlock * |
3528 | MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, |
3529 | MachineBasicBlock *BB) const { |
3530 | assert(Subtarget.isFP64bit()); |
3531 | |
3532 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3533 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3534 | DebugLoc DL = MI.getDebugLoc(); |
3535 | Register Wd = MI.getOperand(i: 0).getReg(); |
3536 | Register Fs = MI.getOperand(i: 1).getReg(); |
3537 | Register Wt1 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3538 | Register Wt2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3539 | |
3540 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1); |
3541 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2) |
3542 | .addReg(RegNo: Wt1) |
3543 | .addReg(RegNo: Fs) |
3544 | .addImm(Val: Mips::sub_64); |
3545 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0); |
3546 | |
3547 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3548 | return BB; |
3549 | } |
3550 | |
3551 | // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA |
3552 | // register. |
3553 | // |
3554 | // STF16 MSA128F16:$wd, mem_simm10:$addr |
3555 | // => |
3556 | // copy_u.h $rtemp,$wd[0] |
3557 | // sh $rtemp, $addr |
3558 | // |
3559 | // Safety: We can't use st.h & co as they would over write the memory after |
3560 | // the destination. It would require half floats be allocated 16 bytes(!) of |
3561 | // space. |
3562 | MachineBasicBlock * |
3563 | MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, |
3564 | MachineBasicBlock *BB) const { |
3565 | |
3566 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3567 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3568 | DebugLoc DL = MI.getDebugLoc(); |
3569 | Register Ws = MI.getOperand(i: 0).getReg(); |
3570 | Register Rt = MI.getOperand(i: 1).getReg(); |
3571 | const MachineMemOperand &MMO = **MI.memoperands_begin(); |
3572 | unsigned Imm = MMO.getOffset(); |
3573 | |
3574 | // Caution: A load via the GOT can expand to a GPR32 operand, a load via |
3575 | // spill and reload can expand as a GPR64 operand. Examine the |
3576 | // operand in detail and default to ABI. |
3577 | const TargetRegisterClass *RC = |
3578 | MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg()) |
3579 | : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass |
3580 | : &Mips::GPR64RegClass); |
3581 | const bool UsingMips32 = RC == &Mips::GPR32RegClass; |
3582 | Register Rs = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass); |
3583 | |
3584 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_U_H), DestReg: Rs).addReg(RegNo: Ws).addImm(Val: 0); |
3585 | if(!UsingMips32) { |
3586 | Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR64RegClass); |
3587 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Tmp) |
3588 | .addImm(Val: 0) |
3589 | .addReg(RegNo: Rs) |
3590 | .addImm(Val: Mips::sub_32); |
3591 | Rs = Tmp; |
3592 | } |
3593 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::SH : Mips::SH64)) |
3594 | .addReg(RegNo: Rs) |
3595 | .addReg(RegNo: Rt) |
3596 | .addImm(Val: Imm) |
3597 | .addMemOperand(MMO: BB->getParent()->getMachineMemOperand( |
3598 | MMO: &MMO, Offset: MMO.getOffset(), Size: MMO.getSize())); |
3599 | |
3600 | MI.eraseFromParent(); |
3601 | return BB; |
3602 | } |
3603 | |
3604 | // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. |
3605 | // |
3606 | // LD_F16 MSA128F16:$wd, mem_simm10:$addr |
3607 | // => |
3608 | // lh $rtemp, $addr |
3609 | // fill.h $wd, $rtemp |
3610 | // |
3611 | // Safety: We can't use ld.h & co as they over-read from the source. |
3612 | // Additionally, if the address is not modulo 16, 2 cases can occur: |
3613 | // a) Segmentation fault as the load instruction reads from a memory page |
3614 | // memory it's not supposed to. |
3615 | // b) The load crosses an implementation specific boundary, requiring OS |
3616 | // intervention. |
3617 | MachineBasicBlock * |
3618 | MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, |
3619 | MachineBasicBlock *BB) const { |
3620 | |
3621 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3622 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3623 | DebugLoc DL = MI.getDebugLoc(); |
3624 | Register Wd = MI.getOperand(i: 0).getReg(); |
3625 | |
3626 | // Caution: A load via the GOT can expand to a GPR32 operand, a load via |
3627 | // spill and reload can expand as a GPR64 operand. Examine the |
3628 | // operand in detail and default to ABI. |
3629 | const TargetRegisterClass *RC = |
3630 | MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg()) |
3631 | : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass |
3632 | : &Mips::GPR64RegClass); |
3633 | |
3634 | const bool UsingMips32 = RC == &Mips::GPR32RegClass; |
3635 | Register Rt = RegInfo.createVirtualRegister(RegClass: RC); |
3636 | |
3637 | MachineInstrBuilder MIB = |
3638 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::LH : Mips::LH64), DestReg: Rt); |
3639 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) |
3640 | MIB.add(MO); |
3641 | |
3642 | if(!UsingMips32) { |
3643 | Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass); |
3644 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Tmp).addReg(RegNo: Rt, flags: 0, SubReg: Mips::sub_32); |
3645 | Rt = Tmp; |
3646 | } |
3647 | |
3648 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FILL_H), DestReg: Wd).addReg(RegNo: Rt); |
3649 | |
3650 | MI.eraseFromParent(); |
3651 | return BB; |
3652 | } |
3653 | |
3654 | // Emit the FPROUND_PSEUDO instruction. |
3655 | // |
3656 | // Round an FGR64Opnd, FGR32Opnd to an f16. |
3657 | // |
3658 | // Safety: Cycle the operand through the GPRs so the result always ends up |
3659 | // the correct MSA register. |
3660 | // |
3661 | // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs |
3662 | // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register |
3663 | // (which they can be, as the MSA registers are defined to alias the |
3664 | // FPU's 64 bit and 32 bit registers) the result can be accessed using |
3665 | // the correct register class. That requires operands be tie-able across |
3666 | // register classes which have a sub/super register class relationship. |
3667 | // |
3668 | // For FPG32Opnd: |
3669 | // |
3670 | // FPROUND MSA128F16:$wd, FGR32Opnd:$fs |
3671 | // => |
3672 | // mfc1 $rtemp, $fs |
3673 | // fill.w $rtemp, $wtemp |
3674 | // fexdo.w $wd, $wtemp, $wtemp |
3675 | // |
3676 | // For FPG64Opnd on mips32r2+: |
3677 | // |
3678 | // FPROUND MSA128F16:$wd, FGR64Opnd:$fs |
3679 | // => |
3680 | // mfc1 $rtemp, $fs |
3681 | // fill.w $rtemp, $wtemp |
3682 | // mfhc1 $rtemp2, $fs |
3683 | // insert.w $wtemp[1], $rtemp2 |
3684 | // insert.w $wtemp[3], $rtemp2 |
3685 | // fexdo.w $wtemp2, $wtemp, $wtemp |
3686 | // fexdo.h $wd, $temp2, $temp2 |
3687 | // |
3688 | // For FGR64Opnd on mips64r2+: |
3689 | // |
3690 | // FPROUND MSA128F16:$wd, FGR64Opnd:$fs |
3691 | // => |
3692 | // dmfc1 $rtemp, $fs |
3693 | // fill.d $rtemp, $wtemp |
3694 | // fexdo.w $wtemp2, $wtemp, $wtemp |
3695 | // fexdo.h $wd, $wtemp2, $wtemp2 |
3696 | // |
3697 | // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the |
3698 | // undef bits are "just right" and the exception enable bits are |
3699 | // set. By using fill.w to replicate $fs into all elements over |
3700 | // insert.w for one element, we avoid that potiential case. If |
3701 | // fexdo.[hw] causes an exception in, the exception is valid and it |
3702 | // occurs for all elements. |
3703 | MachineBasicBlock * |
3704 | MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, |
3705 | MachineBasicBlock *BB, |
3706 | bool IsFGR64) const { |
3707 | |
3708 | // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous |
3709 | // here. It's technically doable to support MIPS32 here, but the ISA forbids |
3710 | // it. |
3711 | assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); |
3712 | |
3713 | bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; |
3714 | bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; |
3715 | |
3716 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3717 | DebugLoc DL = MI.getDebugLoc(); |
3718 | Register Wd = MI.getOperand(i: 0).getReg(); |
3719 | Register Fs = MI.getOperand(i: 1).getReg(); |
3720 | |
3721 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3722 | Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3723 | const TargetRegisterClass *GPRRC = |
3724 | IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; |
3725 | unsigned MFC1Opc = IsFGR64onMips64 |
3726 | ? Mips::DMFC1 |
3727 | : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); |
3728 | unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; |
3729 | |
3730 | // Perform the register class copy as mentioned above. |
3731 | Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3732 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MFC1Opc), DestReg: Rtemp).addReg(RegNo: Fs); |
3733 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: FILLOpc), DestReg: Wtemp).addReg(RegNo: Rtemp); |
3734 | unsigned WPHI = Wtemp; |
3735 | |
3736 | if (IsFGR64onMips32) { |
3737 | Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3738 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MFHC1_D64), DestReg: Rtemp2).addReg(RegNo: Fs); |
3739 | Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3740 | Register Wtemp3 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3741 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp2) |
3742 | .addReg(RegNo: Wtemp) |
3743 | .addReg(RegNo: Rtemp2) |
3744 | .addImm(Val: 1); |
3745 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp3) |
3746 | .addReg(RegNo: Wtemp2) |
3747 | .addReg(RegNo: Rtemp2) |
3748 | .addImm(Val: 3); |
3749 | WPHI = Wtemp3; |
3750 | } |
3751 | |
3752 | if (IsFGR64) { |
3753 | Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3754 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_W), DestReg: Wtemp2) |
3755 | .addReg(RegNo: WPHI) |
3756 | .addReg(RegNo: WPHI); |
3757 | WPHI = Wtemp2; |
3758 | } |
3759 | |
3760 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_H), DestReg: Wd).addReg(RegNo: WPHI).addReg(RegNo: WPHI); |
3761 | |
3762 | MI.eraseFromParent(); |
3763 | return BB; |
3764 | } |
3765 | |
3766 | // Emit the FPEXTEND_PSEUDO instruction. |
3767 | // |
3768 | // Expand an f16 to either a FGR32Opnd or FGR64Opnd. |
3769 | // |
3770 | // Safety: Cycle the result through the GPRs so the result always ends up |
3771 | // the correct floating point register. |
3772 | // |
3773 | // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd |
3774 | // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register |
3775 | // (which they can be, as the MSA registers are defined to alias the |
3776 | // FPU's 64 bit and 32 bit registers) the result can be accessed using |
3777 | // the correct register class. That requires operands be tie-able across |
3778 | // register classes which have a sub/super register class relationship. I |
3779 | // haven't checked. |
3780 | // |
3781 | // For FGR32Opnd: |
3782 | // |
3783 | // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws |
3784 | // => |
3785 | // fexupr.w $wtemp, $ws |
3786 | // copy_s.w $rtemp, $ws[0] |
3787 | // mtc1 $rtemp, $fd |
3788 | // |
3789 | // For FGR64Opnd on Mips64: |
3790 | // |
3791 | // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws |
3792 | // => |
3793 | // fexupr.w $wtemp, $ws |
3794 | // fexupr.d $wtemp2, $wtemp |
3795 | // copy_s.d $rtemp, $wtemp2s[0] |
3796 | // dmtc1 $rtemp, $fd |
3797 | // |
3798 | // For FGR64Opnd on Mips32: |
3799 | // |
3800 | // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws |
3801 | // => |
3802 | // fexupr.w $wtemp, $ws |
3803 | // fexupr.d $wtemp2, $wtemp |
3804 | // copy_s.w $rtemp, $wtemp2[0] |
3805 | // mtc1 $rtemp, $ftemp |
3806 | // copy_s.w $rtemp2, $wtemp2[1] |
3807 | // $fd = mthc1 $rtemp2, $ftemp |
3808 | MachineBasicBlock * |
3809 | MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, |
3810 | MachineBasicBlock *BB, |
3811 | bool IsFGR64) const { |
3812 | |
3813 | // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous |
3814 | // here. It's technically doable to support MIPS32 here, but the ISA forbids |
3815 | // it. |
3816 | assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); |
3817 | |
3818 | bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; |
3819 | bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; |
3820 | |
3821 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3822 | DebugLoc DL = MI.getDebugLoc(); |
3823 | Register Fd = MI.getOperand(i: 0).getReg(); |
3824 | Register Ws = MI.getOperand(i: 1).getReg(); |
3825 | |
3826 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3827 | const TargetRegisterClass *GPRRC = |
3828 | IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; |
3829 | unsigned MTC1Opc = IsFGR64onMips64 |
3830 | ? Mips::DMTC1 |
3831 | : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); |
3832 | Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; |
3833 | |
3834 | Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass); |
3835 | Register WPHI = Wtemp; |
3836 | |
3837 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_W), DestReg: Wtemp).addReg(RegNo: Ws); |
3838 | if (IsFGR64) { |
3839 | WPHI = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass); |
3840 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_D), DestReg: WPHI).addReg(RegNo: Wtemp); |
3841 | } |
3842 | |
3843 | // Perform the safety regclass copy mentioned above. |
3844 | Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3845 | Register FPRPHI = IsFGR64onMips32 |
3846 | ? RegInfo.createVirtualRegister(RegClass: &Mips::FGR64RegClass) |
3847 | : Fd; |
3848 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: COPYOpc), DestReg: Rtemp).addReg(RegNo: WPHI).addImm(Val: 0); |
3849 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MTC1Opc), DestReg: FPRPHI).addReg(RegNo: Rtemp); |
3850 | |
3851 | if (IsFGR64onMips32) { |
3852 | Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC); |
3853 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_S_W), DestReg: Rtemp2) |
3854 | .addReg(RegNo: WPHI) |
3855 | .addImm(Val: 1); |
3856 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MTHC1_D64), DestReg: Fd) |
3857 | .addReg(RegNo: FPRPHI) |
3858 | .addReg(RegNo: Rtemp2); |
3859 | } |
3860 | |
3861 | MI.eraseFromParent(); |
3862 | return BB; |
3863 | } |
3864 | |
3865 | // Emit the FEXP2_W_1 pseudo instructions. |
3866 | // |
3867 | // fexp2_w_1_pseudo $wd, $wt |
3868 | // => |
3869 | // ldi.w $ws, 1 |
3870 | // fexp2.w $wd, $ws, $wt |
3871 | MachineBasicBlock * |
3872 | MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, |
3873 | MachineBasicBlock *BB) const { |
3874 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3875 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3876 | const TargetRegisterClass *RC = &Mips::MSA128WRegClass; |
3877 | Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC); |
3878 | Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC); |
3879 | DebugLoc DL = MI.getDebugLoc(); |
3880 | |
3881 | // Splat 1.0 into a vector |
3882 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_W), DestReg: Ws1).addImm(Val: 1); |
3883 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_W), DestReg: Ws2).addReg(RegNo: Ws1); |
3884 | |
3885 | // Emit 1.0 * fexp2(Wt) |
3886 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_W), DestReg: MI.getOperand(i: 0).getReg()) |
3887 | .addReg(RegNo: Ws2) |
3888 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
3889 | |
3890 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3891 | return BB; |
3892 | } |
3893 | |
3894 | // Emit the FEXP2_D_1 pseudo instructions. |
3895 | // |
3896 | // fexp2_d_1_pseudo $wd, $wt |
3897 | // => |
3898 | // ldi.d $ws, 1 |
3899 | // fexp2.d $wd, $ws, $wt |
3900 | MachineBasicBlock * |
3901 | MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, |
3902 | MachineBasicBlock *BB) const { |
3903 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
3904 | MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); |
3905 | const TargetRegisterClass *RC = &Mips::MSA128DRegClass; |
3906 | Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC); |
3907 | Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC); |
3908 | DebugLoc DL = MI.getDebugLoc(); |
3909 | |
3910 | // Splat 1.0 into a vector |
3911 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_D), DestReg: Ws1).addImm(Val: 1); |
3912 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_D), DestReg: Ws2).addReg(RegNo: Ws1); |
3913 | |
3914 | // Emit 1.0 * fexp2(Wt) |
3915 | BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_D), DestReg: MI.getOperand(i: 0).getReg()) |
3916 | .addReg(RegNo: Ws2) |
3917 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
3918 | |
3919 | MI.eraseFromParent(); // The pseudo instruction is gone now. |
3920 | return BB; |
3921 | } |
3922 | |