1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/ISDOpcodes.h"
22#include "llvm/CodeGen/MachineBasicBlock.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineMemOperand.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/CodeGen/TargetInstrInfo.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetSubtargetInfo.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/CodeGenTypes/MachineValueType.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
38#include "llvm/Support/Casting.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/TargetParser/Triple.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(Val: false),
56 cl::desc("Expand double precision loads and "
57 "stores to their single precision "
58 "counterparts"));
59
60// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
61// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
62// INST.h for v16, INST.w for v32, INST.d for v64.
63TargetLoweringBase::LegalizeTypeAction
64MipsSETargetLowering::getPreferredVectorAction(MVT VT) const {
65 if (this->Subtarget.hasMSA()) {
66 switch (VT.SimpleTy) {
67 // Leave v2i1 vectors to be promoted to larger ones.
68 // Other i1 types will be promoted by default.
69 case MVT::v2i1:
70 return TypePromoteInteger;
71 break;
72 // 16-bit vector types (v2 and longer)
73 case MVT::v2i8:
74 // 32-bit vector types (v2 and longer)
75 case MVT::v2i16:
76 case MVT::v4i8:
77 // 64-bit vector types (v2 and longer)
78 case MVT::v2i32:
79 case MVT::v4i16:
80 case MVT::v8i8:
81 return TypeWidenVector;
82 break;
83 // Only word (.w) and doubleword (.d) are available for floating point
84 // vectors. That means floating point vectors should be either v2f64
85 // or v4f32.
86 // Here we only explicitly widen the f32 types - f16 will be promoted
87 // by default.
88 case MVT::v2f32:
89 case MVT::v3f32:
90 return TypeWidenVector;
91 // v2i64 is already 128-bit wide.
92 default:
93 break;
94 }
95 }
96 return TargetLoweringBase::getPreferredVectorAction(VT);
97}
98
99MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
100 const MipsSubtarget &STI)
101 : MipsTargetLowering(TM, STI) {
102 // Set up the register classes
103 addRegisterClass(VT: MVT::i32, RC: &Mips::GPR32RegClass);
104
105 if (Subtarget.isGP64bit())
106 addRegisterClass(VT: MVT::i64, RC: &Mips::GPR64RegClass);
107
108 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
109 // Expand all truncating stores and extending loads.
110 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) {
111 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) {
112 setTruncStoreAction(ValVT: VT0, MemVT: VT1, Action: Expand);
113 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
114 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
115 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
116 }
117 }
118 }
119
120 if (Subtarget.hasDSP()) {
121 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
122
123 for (const auto &VecTy : VecTys) {
124 addRegisterClass(VT: VecTy, RC: &Mips::DSPRRegClass);
125
126 // Expand all builtin opcodes.
127 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
128 setOperationAction(Op: Opc, VT: VecTy, Action: Expand);
129
130 setOperationAction(Op: ISD::ADD, VT: VecTy, Action: Legal);
131 setOperationAction(Op: ISD::SUB, VT: VecTy, Action: Legal);
132 setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Legal);
133 setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Legal);
134 setOperationAction(Op: ISD::BITCAST, VT: VecTy, Action: Legal);
135 }
136
137 setTargetDAGCombine(
138 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT});
139
140 if (Subtarget.hasMips32r2()) {
141 setOperationAction(Op: ISD::ADDC, VT: MVT::i32, Action: Legal);
142 setOperationAction(Op: ISD::ADDE, VT: MVT::i32, Action: Legal);
143 }
144 }
145
146 if (Subtarget.hasDSPR2())
147 setOperationAction(Op: ISD::MUL, VT: MVT::v2i16, Action: Legal);
148
149 if (Subtarget.hasMSA()) {
150 addMSAIntType(Ty: MVT::v16i8, RC: &Mips::MSA128BRegClass);
151 addMSAIntType(Ty: MVT::v8i16, RC: &Mips::MSA128HRegClass);
152 addMSAIntType(Ty: MVT::v4i32, RC: &Mips::MSA128WRegClass);
153 addMSAIntType(Ty: MVT::v2i64, RC: &Mips::MSA128DRegClass);
154 addMSAFloatType(Ty: MVT::v8f16, RC: &Mips::MSA128HRegClass);
155 addMSAFloatType(Ty: MVT::v4f32, RC: &Mips::MSA128WRegClass);
156 addMSAFloatType(Ty: MVT::v2f64, RC: &Mips::MSA128DRegClass);
157
158 // We're using soft promotion for f16, but msa has some instructions for
159 // conversion to/from f16. Mark those conversions as custom so we can take
160 // advantage of these instructions.
161 for (MVT VT : {MVT::f32, MVT::f64}) {
162 setOperationAction(Op: ISD::FP16_TO_FP, VT, Action: Custom);
163 setOperationAction(Op: ISD::FP_TO_FP16, VT, Action: Custom);
164 }
165
166 setTargetDAGCombine(
167 {ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR, ISD::FP_TO_UINT});
168 }
169
170 if (!Subtarget.useSoftFloat()) {
171 addRegisterClass(VT: MVT::f32, RC: &Mips::FGR32RegClass);
172
173 // When dealing with single precision only, use libcalls
174 if (!Subtarget.isSingleFloat()) {
175 if (Subtarget.isFP64bit())
176 addRegisterClass(VT: MVT::f64, RC: &Mips::FGR64RegClass);
177 else
178 addRegisterClass(VT: MVT::f64, RC: &Mips::AFGR64RegClass);
179 }
180
181 for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
182 ISD::STRICT_FDIV, ISD::STRICT_FSQRT}) {
183 setOperationAction(Op, VT: MVT::f32, Action: Legal);
184 setOperationAction(Op, VT: MVT::f64, Action: Legal);
185 }
186 }
187
188 // Targets with 64bits integer registers, but no 64bit floating point register
189 // do not support conversion between them
190 if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
191 !Subtarget.useSoftFloat()) {
192 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Expand);
193 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
194 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Expand);
195 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
196 }
197
198 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Custom);
199 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Custom);
200 setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Custom);
201 setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Custom);
202
203 if (Subtarget.hasCnMips())
204 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal);
205 else if (Subtarget.isR5900()) {
206 // R5900 doesn't have DMULT/DMULTU/DDIV/DDIVU - expand to 32-bit ops
207 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Expand);
208 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
209 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
210 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Expand);
211 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Expand);
212 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
213 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
214 } else if (Subtarget.isGP64bit())
215 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom);
216
217 if (Subtarget.isGP64bit() && !Subtarget.isR5900()) {
218 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom);
219 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom);
220 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom);
221 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom);
222 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Custom);
223 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Custom);
224 }
225
226 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
227 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
228
229 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Custom);
230 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Custom);
231 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
232 if (Subtarget.hasMips32r6()) {
233 setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Legal);
234 setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Legal);
235 } else {
236 setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom);
237 setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Custom);
238 }
239
240 setTargetDAGCombine(ISD::MUL);
241
242 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
243 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
244 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
245
246 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
247 !Subtarget.hasMips64()) {
248 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
249 }
250
251 if (NoDPLoadStore) {
252 setOperationAction(Op: ISD::LOAD, VT: MVT::f64, Action: Custom);
253 setOperationAction(Op: ISD::STORE, VT: MVT::f64, Action: Custom);
254 }
255
256 if (Subtarget.hasMips32r6()) {
257 // MIPS32r6 replaces the accumulator-based multiplies with a three register
258 // instruction
259 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
260 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
261 setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Legal);
262 setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Legal);
263 setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Legal);
264
265 // MIPS32r6 replaces the accumulator-based division/remainder with separate
266 // three register division and remainder instructions.
267 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
268 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
269 setOperationAction(Op: ISD::SDIV, VT: MVT::i32, Action: Legal);
270 setOperationAction(Op: ISD::UDIV, VT: MVT::i32, Action: Legal);
271 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
272 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
273
274 // MIPS32r6 replaces conditional moves with an equivalent that removes the
275 // need for three GPR read ports.
276 setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Legal);
277 setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Legal);
278 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Expand);
279
280 setOperationAction(Op: ISD::SETCC, VT: MVT::f32, Action: Legal);
281 setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Legal);
282 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
283
284 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
285 setOperationAction(Op: ISD::SETCC, VT: MVT::f64, Action: Legal);
286 setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Legal);
287 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
288
289 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Legal);
290
291 // Floating point > and >= are supported via < and <=
292 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
293 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand);
294 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f32, Action: Expand);
295 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
296 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
297 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
298 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::f32, Action: Expand);
299 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f32, Action: Expand);
300
301 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
302 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f64, Action: Expand);
303 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f64, Action: Expand);
304 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
305 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
306 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
307 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::f64, Action: Expand);
308 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f64, Action: Expand);
309 }
310
311 if (Subtarget.hasMips64r6()) {
312 // MIPS64r6 replaces the accumulator-based multiplies with a three register
313 // instruction
314 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
315 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
316 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal);
317 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Legal);
318 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Legal);
319
320 // MIPS32r6 replaces the accumulator-based division/remainder with separate
321 // three register division and remainder instructions.
322 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
323 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
324 setOperationAction(Op: ISD::SDIV, VT: MVT::i64, Action: Legal);
325 setOperationAction(Op: ISD::UDIV, VT: MVT::i64, Action: Legal);
326 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
327 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
328
329 // MIPS64r6 replaces conditional moves with an equivalent that removes the
330 // need for three GPR read ports.
331 setOperationAction(Op: ISD::SETCC, VT: MVT::i64, Action: Legal);
332 setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Legal);
333 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: Expand);
334 }
335
336 if (Subtarget.isR5900()) {
337 // R5900 FPU only supports 4 compare conditions: C.F, C.EQ, C.OLT, C.OLE
338 // (and their inversions via bc1t/bc1f). Expand all conditions that would
339 // require C.UN, C.UEQ, C.ULT, or C.ULE instructions (not available on
340 // R5900). The legalizer resolves these via operand swapping, condition
341 // inversion, and decomposition into supported conditions.
342 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand);
343 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
344 setCondCodeAction(CCs: ISD::SETGT, VT: MVT::f32, Action: Expand);
345 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::f32, Action: Expand);
346 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
347 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::f32, Action: Expand);
348 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
349 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
350 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
351 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
352 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f32, Action: Expand);
353
354 // R5900 FPU does not support IEEE 754 special values (NaN, infinity). Use
355 // custom lowering to decide per-instruction: hardware when nnan+ninf flags
356 // guarantee no NaN or infinity, software libcall otherwise.
357 setOperationAction(Op: ISD::FADD, VT: MVT::f32, Action: Custom);
358 setOperationAction(Op: ISD::FSUB, VT: MVT::f32, Action: Custom);
359 setOperationAction(Op: ISD::FMUL, VT: MVT::f32, Action: Custom);
360 setOperationAction(Op: ISD::FDIV, VT: MVT::f32, Action: Custom);
361 setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Custom);
362 }
363
364 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
365}
366
367const MipsTargetLowering *
368llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
369 const MipsSubtarget &STI) {
370 return new MipsSETargetLowering(TM, STI);
371}
372
373const TargetRegisterClass *
374MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
375 if (VT == MVT::Untyped)
376 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
377
378 return TargetLowering::getRepRegClassFor(VT);
379}
380
381// Enable MSA support for the given integer type and Register class.
382void MipsSETargetLowering::
383addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
384 addRegisterClass(VT: Ty, RC);
385
386 // Expand all builtin opcodes.
387 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
388 setOperationAction(Op: Opc, VT: Ty, Action: Expand);
389
390 setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal);
391 setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal);
392 setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal);
393 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Custom);
394 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal);
395 setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom);
396 setOperationAction(Op: ISD::UNDEF, VT: Ty, Action: Legal);
397
398 setOperationAction(Op: ISD::ADD, VT: Ty, Action: Legal);
399 setOperationAction(Op: ISD::AND, VT: Ty, Action: Legal);
400 setOperationAction(Op: ISD::CTLZ, VT: Ty, Action: Legal);
401 setOperationAction(Op: ISD::CTPOP, VT: Ty, Action: Legal);
402 setOperationAction(Op: ISD::MUL, VT: Ty, Action: Legal);
403 setOperationAction(Op: ISD::OR, VT: Ty, Action: Legal);
404 setOperationAction(Op: ISD::SDIV, VT: Ty, Action: Legal);
405 setOperationAction(Op: ISD::SREM, VT: Ty, Action: Legal);
406 setOperationAction(Op: ISD::SHL, VT: Ty, Action: Legal);
407 setOperationAction(Op: ISD::SRA, VT: Ty, Action: Legal);
408 setOperationAction(Op: ISD::SRL, VT: Ty, Action: Legal);
409 setOperationAction(Op: ISD::SUB, VT: Ty, Action: Legal);
410 setOperationAction(Op: ISD::SMAX, VT: Ty, Action: Legal);
411 setOperationAction(Op: ISD::SMIN, VT: Ty, Action: Legal);
412 setOperationAction(Op: ISD::UDIV, VT: Ty, Action: Legal);
413 setOperationAction(Op: ISD::UREM, VT: Ty, Action: Legal);
414 setOperationAction(Op: ISD::UMAX, VT: Ty, Action: Legal);
415 setOperationAction(Op: ISD::UMIN, VT: Ty, Action: Legal);
416 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: Ty, Action: Custom);
417 setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal);
418 setOperationAction(Op: ISD::XOR, VT: Ty, Action: Legal);
419
420 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
421 setOperationAction(Op: ISD::FP_TO_SINT, VT: Ty, Action: Legal);
422 setOperationAction(Op: ISD::FP_TO_UINT, VT: Ty, Action: Legal);
423 setOperationAction(Op: ISD::SINT_TO_FP, VT: Ty, Action: Legal);
424 setOperationAction(Op: ISD::UINT_TO_FP, VT: Ty, Action: Legal);
425 }
426
427 setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal);
428 setCondCodeAction(CCs: ISD::SETNE, VT: Ty, Action: Expand);
429 setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand);
430 setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand);
431 setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand);
432 setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand);
433}
434
435// Enable MSA support for the given floating-point type and Register class.
436void MipsSETargetLowering::
437addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
438 addRegisterClass(VT: Ty, RC);
439
440 // Expand all builtin opcodes.
441 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
442 setOperationAction(Op: Opc, VT: Ty, Action: Expand);
443
444 setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal);
445 setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal);
446 setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal);
447 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Legal);
448 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal);
449 setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom);
450
451 if (Ty != MVT::v8f16) {
452 setOperationAction(Op: ISD::FABS, VT: Ty, Action: Legal);
453 setOperationAction(Op: ISD::FADD, VT: Ty, Action: Legal);
454 setOperationAction(Op: ISD::FDIV, VT: Ty, Action: Legal);
455 setOperationAction(Op: ISD::FEXP2, VT: Ty, Action: Legal);
456 setOperationAction(Op: ISD::FLOG2, VT: Ty, Action: Legal);
457 setOperationAction(Op: ISD::FMA, VT: Ty, Action: Legal);
458 setOperationAction(Op: ISD::FMUL, VT: Ty, Action: Legal);
459 setOperationAction(Op: ISD::FRINT, VT: Ty, Action: Legal);
460 setOperationAction(Op: ISD::FSQRT, VT: Ty, Action: Legal);
461 setOperationAction(Op: ISD::FSUB, VT: Ty, Action: Legal);
462 setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal);
463
464 setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal);
465 setCondCodeAction(CCs: ISD::SETOGE, VT: Ty, Action: Expand);
466 setCondCodeAction(CCs: ISD::SETOGT, VT: Ty, Action: Expand);
467 setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand);
468 setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand);
469 setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand);
470 setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand);
471 }
472}
473
474SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
475 if(!Subtarget.hasMips32r6())
476 return MipsTargetLowering::LowerOperation(Op, DAG);
477
478 EVT ResTy = Op->getValueType(ResNo: 0);
479 SDLoc DL(Op);
480
481 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
482 // floating point register are undefined. Not really an issue as sel.d, which
483 // is produced from an FSELECT node, only looks at bit 0.
484 SDValue Tmp = DAG.getNode(Opcode: MipsISD::MTC1_D64, DL, VT: MVT::f64, Operand: Op->getOperand(Num: 0));
485 return DAG.getNode(Opcode: MipsISD::FSELECT, DL, VT: ResTy, N1: Tmp, N2: Op->getOperand(Num: 1),
486 N3: Op->getOperand(Num: 2));
487}
488
489// Lower FP16_TO_FP (the soft-promote-half representation of an f16 -> f32/f64
490// conversion).
491SDValue MipsSETargetLowering::lowerFP16_TO_FP(SDValue Op,
492 SelectionDAG &DAG) const {
493 SDLoc DL(Op);
494 EVT ResTy = Op.getValueType();
495 assert((ResTy == MVT::f32 || ResTy == MVT::f64) && "Unexpected FP16_TO_FP");
496
497 // The operand type is i32 because i16 isn't actually legal on MIPS.
498 SDValue In = Op.getOperand(i: 0);
499 assert(In.getValueType() == MVT::i32 && "Unexpected FP16_TO_FP operand type");
500
501 // Splat into a v8i16 (the 32-bit In value is truncated to the lower 16 bits).
502 SDValue Splatted = DAG.getSplatBuildVector(VT: MVT::v8i16, DL, Op: In);
503
504 // Bitcast from v8i16 to v8f16.
505 SDValue HVec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v8f16, Operand: Splatted);
506
507 // Convert from v8f16 to v4f32.
508 SDValue F32Vec = DAG.getNode(
509 Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::v4f32,
510 N1: DAG.getConstant(Val: Intrinsic::mips_fexupr_w, DL, VT: MVT::i32), N2: HVec);
511 SDValue Res;
512 if (ResTy == MVT::f32) {
513 // Every lane has the converted value, just read it from lane 0.
514 Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: F32Vec,
515 N2: DAG.getVectorIdxConstant(Val: 0, DL));
516 } else {
517 // Convert from v4f32 to v2f64.
518 SDValue F64Vec = DAG.getNode(
519 Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::v2f64,
520 N1: DAG.getConstant(Val: Intrinsic::mips_fexupr_d, DL, VT: MVT::i32), N2: F32Vec);
521 // Every lane has the converted value, just read it from lane 0.
522 Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f64, N1: F64Vec,
523 N2: DAG.getVectorIdxConstant(Val: 0, DL));
524 }
525
526 return Res;
527}
528
529// Lower FP_TO_FP16 (the soft-promote-half representation of an f32/f64 -> f16
530// conversion)
531SDValue MipsSETargetLowering::lowerFP_TO_FP16(SDValue Op,
532 SelectionDAG &DAG) const {
533 SDLoc DL(Op);
534 EVT ResTy = Op.getValueType();
535 SDValue In = Op.getOperand(i: 0);
536 assert((In.getValueType() == MVT::f32 || In.getValueType() == MVT::f64) &&
537 "Unexpected FP_TO_FP16");
538
539 SDValue F32Vec;
540 if (In.getValueType() == MVT::f64) {
541 // Splat f64 to v2f64, then convert to v4f32.
542 SDValue F64Vec = DAG.getSplatBuildVector(VT: MVT::v2f64, DL, Op: In);
543 F32Vec = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::v4f32,
544 N1: DAG.getConstant(Val: Intrinsic::mips_fexdo_w, DL, VT: MVT::i32),
545 N2: F64Vec, N3: F64Vec);
546 } else {
547 // Splat f32 to v4f32.
548 F32Vec = DAG.getSplatBuildVector(VT: MVT::v4f32, DL, Op: In);
549 }
550
551 // Then convert from v4f32 to v8f16.
552 SDValue HVec = DAG.getNode(
553 Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::v8f16,
554 N1: DAG.getConstant(Val: Intrinsic::mips_fexdo_h, DL, VT: MVT::i32), N2: F32Vec, N3: F32Vec);
555
556 // Finally cast to v8i16 (f16 is soft-promoted).
557 SDValue IVec = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v8i16, Operand: HVec);
558 SDValue Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResTy, N1: IVec,
559 N2: DAG.getVectorIdxConstant(Val: 0, DL));
560
561 return Res;
562}
563
564bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
565 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
566 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
567
568 if (Subtarget.systemSupportsUnalignedAccess()) {
569 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
570 // implementation defined whether this is handled by hardware, software, or
571 // a hybrid of the two but it's expected that most implementations will
572 // handle the majority of cases in hardware.
573 if (Fast)
574 *Fast = 1;
575 return true;
576 } else if (Subtarget.hasMips32r6()) {
577 return false;
578 }
579
580 switch (SVT) {
581 case MVT::i64:
582 case MVT::i32:
583 if (Fast)
584 *Fast = 1;
585 return true;
586 default:
587 return false;
588 }
589}
590
591SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
592 SelectionDAG &DAG) const {
593 switch(Op.getOpcode()) {
594 case ISD::LOAD: return lowerLOAD(Op, DAG);
595 case ISD::STORE: return lowerSTORE(Op, DAG);
596 case ISD::SMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: true, DAG);
597 case ISD::UMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: true, HasHi: true, DAG);
598 case ISD::MULHS: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: false, HasHi: true, DAG);
599 case ISD::MULHU: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: false, HasHi: true, DAG);
600 case ISD::MUL: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: false, DAG);
601 case ISD::SDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRem, HasLo: true, HasHi: true, DAG);
602 case ISD::UDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRemU, HasLo: true, HasHi: true,
603 DAG);
604 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
605 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
606 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
607 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
608 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
609 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
610 case ISD::SELECT:
611 return lowerSELECT(Op, DAG);
612 case ISD::FP16_TO_FP:
613 case ISD::STRICT_FP16_TO_FP:
614 return lowerFP16_TO_FP(Op, DAG);
615 case ISD::FP_TO_FP16:
616 case ISD::STRICT_FP_TO_FP16:
617 return lowerFP_TO_FP16(Op, DAG);
618 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
619 case ISD::FADD:
620 return lowerR5900FPOp(Op, DAG, LC: RTLIB::ADD_F32);
621 case ISD::FSUB:
622 return lowerR5900FPOp(Op, DAG, LC: RTLIB::SUB_F32);
623 case ISD::FMUL:
624 return lowerR5900FPOp(Op, DAG, LC: RTLIB::MUL_F32);
625 case ISD::FDIV:
626 return lowerR5900FPOp(Op, DAG, LC: RTLIB::DIV_F32);
627 case ISD::FSQRT:
628 return lowerR5900FPOp(Op, DAG, LC: RTLIB::SQRT_F32);
629 }
630
631 return MipsTargetLowering::LowerOperation(Op, DAG);
632}
633
634SDValue MipsSETargetLowering::lowerR5900FPOp(SDValue Op, SelectionDAG &DAG,
635 RTLIB::Libcall LC) const {
636 assert(Subtarget.isR5900());
637 SDNodeFlags Flags = Op->getFlags();
638
639 if (Flags.hasNoNaNs() && Flags.hasNoInfs()) {
640 // Use the hardware FPU instruction if the operation is guaranteed to have
641 // no NaN or infinity inputs/outputs (nnan+ninf flags).
642 return Op;
643 }
644
645 // Fall back to a software libcall for IEEE correctness.
646 SDLoc DL(Op);
647 MVT VT = Op.getSimpleValueType();
648 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
649 TargetLowering::MakeLibCallOptions CallOptions;
650 auto [Result, Chain] = makeLibCall(DAG, LC, RetVT: VT, Ops, CallOptions, dl: DL);
651 return Result;
652}
653
654// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
655//
656// Performs the following transformations:
657// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
658// sign/zero-extension is completely overwritten by the new one performed by
659// the ISD::AND.
660// - Removes redundant zero extensions performed by an ISD::AND.
661static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
662 TargetLowering::DAGCombinerInfo &DCI,
663 const MipsSubtarget &Subtarget) {
664 if (!Subtarget.hasMSA())
665 return SDValue();
666
667 SDValue Op0 = N->getOperand(Num: 0);
668 SDValue Op1 = N->getOperand(Num: 1);
669 unsigned Op0Opcode = Op0->getOpcode();
670
671 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
672 // where $d + 1 == 2^n and n == 32
673 // or $d + 1 == 2^n and n <= 32 and ZExt
674 // -> (MipsVExtractZExt $a, $b, $c)
675 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
676 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
677 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Val&: Op1);
678
679 if (!Mask)
680 return SDValue();
681
682 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
683
684 if (Log2IfPositive <= 0)
685 return SDValue(); // Mask+1 is not a power of 2
686
687 SDValue Op0Op2 = Op0->getOperand(Num: 2);
688 EVT ExtendTy = cast<VTSDNode>(Val&: Op0Op2)->getVT();
689 unsigned ExtendTySize = ExtendTy.getSizeInBits();
690 unsigned Log2 = Log2IfPositive;
691
692 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
693 Log2 == ExtendTySize) {
694 SDValue Ops[] = { Op0->getOperand(Num: 0), Op0->getOperand(Num: 1), Op0Op2 };
695 return DAG.getNode(Opcode: MipsISD::VEXTRACT_ZEXT_ELT, DL: SDLoc(Op0),
696 VTList: Op0->getVTList(),
697 Ops: ArrayRef(Ops, Op0->getNumOperands()));
698 }
699 }
700
701 return SDValue();
702}
703
704// Determine if the specified node is a constant vector splat.
705//
706// Returns true and sets Imm if:
707// * N is a ISD::BUILD_VECTOR representing a constant splat
708//
709// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
710// differences are that it assumes the MSA has already been checked and the
711// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
712// must not be in order for binsri.d to be selectable).
713static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
714 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(Val: N.getNode());
715
716 if (!Node)
717 return false;
718
719 APInt SplatValue, SplatUndef;
720 unsigned SplatBitSize;
721 bool HasAnyUndefs;
722
723 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
724 MinSplatBits: 8, isBigEndian: !IsLittleEndian))
725 return false;
726
727 Imm = SplatValue;
728
729 return true;
730}
731
732// Test whether the given node is an all-ones build_vector.
733static bool isVectorAllOnes(SDValue N) {
734 // Look through bitcasts. Endianness doesn't matter because we are looking
735 // for an all-ones value.
736 if (N->getOpcode() == ISD::BITCAST)
737 N = N->getOperand(Num: 0);
738
739 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val&: N);
740
741 if (!BVN)
742 return false;
743
744 APInt SplatValue, SplatUndef;
745 unsigned SplatBitSize;
746 bool HasAnyUndefs;
747
748 // Endianness doesn't matter in this context because we are looking for
749 // an all-ones value.
750 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
751 return SplatValue.isAllOnes();
752
753 return false;
754}
755
756// Test whether N is the bitwise inverse of OfNode.
757static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
758 if (N->getOpcode() != ISD::XOR)
759 return false;
760
761 if (isVectorAllOnes(N: N->getOperand(Num: 0)))
762 return N->getOperand(Num: 1) == OfNode;
763
764 if (isVectorAllOnes(N: N->getOperand(Num: 1)))
765 return N->getOperand(Num: 0) == OfNode;
766
767 return false;
768}
769
770// Perform combines where ISD::OR is the root node.
771//
772// Performs the following transformations:
773// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
774// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
775// vector type.
776static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
777 TargetLowering::DAGCombinerInfo &DCI,
778 const MipsSubtarget &Subtarget) {
779 if (!Subtarget.hasMSA())
780 return SDValue();
781
782 EVT Ty = N->getValueType(ResNo: 0);
783
784 if (!Ty.is128BitVector())
785 return SDValue();
786
787 SDValue Op0 = N->getOperand(Num: 0);
788 SDValue Op1 = N->getOperand(Num: 1);
789
790 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
791 SDValue Op0Op0 = Op0->getOperand(Num: 0);
792 SDValue Op0Op1 = Op0->getOperand(Num: 1);
793 SDValue Op1Op0 = Op1->getOperand(Num: 0);
794 SDValue Op1Op1 = Op1->getOperand(Num: 1);
795 bool IsLittleEndian = !Subtarget.isLittle();
796
797 SDValue IfSet, IfClr, Cond;
798 bool IsConstantMask = false;
799 APInt Mask, InvMask;
800
801 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
802 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
803 // looking.
804 // IfClr will be set if we find a valid match.
805 if (isVSplat(N: Op0Op0, Imm&: Mask, IsLittleEndian)) {
806 Cond = Op0Op0;
807 IfSet = Op0Op1;
808
809 if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) &&
810 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
811 IfClr = Op1Op1;
812 else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) &&
813 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
814 IfClr = Op1Op0;
815
816 IsConstantMask = true;
817 }
818
819 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
820 // thing again using this mask.
821 // IfClr will be set if we find a valid match.
822 if (!IfClr.getNode() && isVSplat(N: Op0Op1, Imm&: Mask, IsLittleEndian)) {
823 Cond = Op0Op1;
824 IfSet = Op0Op0;
825
826 if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) &&
827 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
828 IfClr = Op1Op1;
829 else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) &&
830 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
831 IfClr = Op1Op0;
832
833 IsConstantMask = true;
834 }
835
836 // If IfClr is not yet set, try looking for a non-constant match.
837 // IfClr will be set if we find a valid match amongst the eight
838 // possibilities.
839 if (!IfClr.getNode()) {
840 if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op0)) {
841 Cond = Op1Op0;
842 IfSet = Op1Op1;
843 IfClr = Op0Op1;
844 } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op0)) {
845 Cond = Op1Op0;
846 IfSet = Op1Op1;
847 IfClr = Op0Op0;
848 } else if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op1)) {
849 Cond = Op1Op1;
850 IfSet = Op1Op0;
851 IfClr = Op0Op1;
852 } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op1)) {
853 Cond = Op1Op1;
854 IfSet = Op1Op0;
855 IfClr = Op0Op0;
856 } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op0)) {
857 Cond = Op0Op0;
858 IfSet = Op0Op1;
859 IfClr = Op1Op1;
860 } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op0)) {
861 Cond = Op0Op0;
862 IfSet = Op0Op1;
863 IfClr = Op1Op0;
864 } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op1)) {
865 Cond = Op0Op1;
866 IfSet = Op0Op0;
867 IfClr = Op1Op1;
868 } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op1)) {
869 Cond = Op0Op1;
870 IfSet = Op0Op0;
871 IfClr = Op1Op0;
872 }
873 }
874
875 // At this point, IfClr will be set if we have a valid match.
876 if (!IfClr.getNode())
877 return SDValue();
878
879 assert(Cond.getNode() && IfSet.getNode());
880
881 // Fold degenerate cases.
882 if (IsConstantMask) {
883 if (Mask.isAllOnes())
884 return IfSet;
885 else if (Mask == 0)
886 return IfClr;
887 }
888
889 // Transform the DAG into an equivalent VSELECT.
890 return DAG.getNode(Opcode: ISD::VSELECT, DL: SDLoc(N), VT: Ty, N1: Cond, N2: IfSet, N3: IfClr);
891 }
892
893 return SDValue();
894}
895
896static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
897 SelectionDAG &DAG,
898 const MipsSubtarget &Subtarget) {
899 // Estimate the number of operations the below transform will turn a
900 // constant multiply into. The number is approximately equal to the minimal
901 // number of powers of two that constant can be broken down to by adding
902 // or subtracting them.
903 //
904 // If we have taken more than 12[1] / 8[2] steps to attempt the
905 // optimization for a native sized value, it is more than likely that this
906 // optimization will make things worse.
907 //
908 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
909 // multiplication requires at least 4 cycles, but another cycle (or two)
910 // to retrieve the result from the HI/LO registers.
911 //
912 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
913 // materialized in 2 instructions, multiplication requires at least 4
914 // cycles, but another cycle (or two) to retrieve the result from the
915 // HI/LO registers.
916 //
917 // TODO:
918 // - MaxSteps needs to consider the `VT` of the constant for the current
919 // target.
920 // - Consider to perform this optimization after type legalization.
921 // That allows to remove a workaround for types not supported natively.
922 // - Take in account `-Os, -Oz` flags because this optimization
923 // increases code size.
924 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
925
926 SmallVector<APInt, 16> WorkStack(1, C);
927 unsigned Steps = 0;
928 unsigned BitWidth = C.getBitWidth();
929
930 while (!WorkStack.empty()) {
931 APInt Val = WorkStack.pop_back_val();
932
933 if (Val == 0 || Val == 1)
934 continue;
935
936 if (Steps >= MaxSteps)
937 return false;
938
939 if (Val.isPowerOf2()) {
940 ++Steps;
941 continue;
942 }
943
944 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
945 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
946 : APInt(BitWidth, 1) << C.ceilLogBase2();
947 if ((Val - Floor).ule(RHS: Ceil - Val)) {
948 WorkStack.push_back(Elt: Floor);
949 WorkStack.push_back(Elt: Val - Floor);
950 } else {
951 WorkStack.push_back(Elt: Ceil);
952 WorkStack.push_back(Elt: Ceil - Val);
953 }
954
955 ++Steps;
956 }
957
958 // If the value being multiplied is not supported natively, we have to pay
959 // an additional legalization cost, conservatively assume an increase in the
960 // cost of 3 instructions per step. This values for this heuristic were
961 // determined experimentally.
962 unsigned RegisterSize = DAG.getTargetLoweringInfo()
963 .getRegisterType(Context&: *DAG.getContext(), VT)
964 .getSizeInBits();
965 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
966 if (Steps > 27)
967 return false;
968
969 return true;
970}
971
972static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
973 EVT ShiftTy, SelectionDAG &DAG) {
974 // Return 0.
975 if (C == 0)
976 return DAG.getConstant(Val: 0, DL, VT);
977
978 // Return x.
979 if (C == 1)
980 return X;
981
982 // If c is power of 2, return (shl x, log2(c)).
983 if (C.isPowerOf2())
984 return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X,
985 N2: DAG.getConstant(Val: C.logBase2(), DL, VT: ShiftTy));
986
987 unsigned BitWidth = C.getBitWidth();
988 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
989 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
990 APInt(BitWidth, 1) << C.ceilLogBase2();
991
992 // If |c - floor_c| <= |c - ceil_c|,
993 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
994 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
995 if ((C - Floor).ule(RHS: Ceil - C)) {
996 SDValue Op0 = genConstMult(X, C: Floor, DL, VT, ShiftTy, DAG);
997 SDValue Op1 = genConstMult(X, C: C - Floor, DL, VT, ShiftTy, DAG);
998 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
999 }
1000
1001 // If |c - floor_c| > |c - ceil_c|,
1002 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
1003 SDValue Op0 = genConstMult(X, C: Ceil, DL, VT, ShiftTy, DAG);
1004 SDValue Op1 = genConstMult(X, C: Ceil - C, DL, VT, ShiftTy, DAG);
1005 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
1006}
1007
1008static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
1009 const TargetLowering::DAGCombinerInfo &DCI,
1010 const MipsSETargetLowering *TL,
1011 const MipsSubtarget &Subtarget) {
1012 EVT VT = N->getValueType(ResNo: 0);
1013
1014 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))
1015 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
1016 C: C->getAPIntValue(), VT, DAG, Subtarget))
1017 return genConstMult(X: N->getOperand(Num: 0), C: C->getAPIntValue(), DL: SDLoc(N), VT,
1018 ShiftTy: TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
1019 DAG);
1020
1021 return SDValue(N, 0);
1022}
1023
1024static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
1025 SelectionDAG &DAG,
1026 const MipsSubtarget &Subtarget) {
1027 // See if this is a vector splat immediate node.
1028 APInt SplatValue, SplatUndef;
1029 unsigned SplatBitSize;
1030 bool HasAnyUndefs;
1031 unsigned EltSize = Ty.getScalarSizeInBits();
1032 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 1));
1033
1034 if (!Subtarget.hasDSP())
1035 return SDValue();
1036
1037 if (!BV ||
1038 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1039 MinSplatBits: EltSize, isBigEndian: !Subtarget.isLittle()) ||
1040 (SplatBitSize != EltSize) ||
1041 (SplatValue.getZExtValue() >= EltSize))
1042 return SDValue();
1043
1044 SDLoc DL(N);
1045 return DAG.getNode(Opcode: Opc, DL, VT: Ty, N1: N->getOperand(Num: 0),
1046 N2: DAG.getConstant(Val: SplatValue.getZExtValue(), DL, VT: MVT::i32));
1047}
1048
1049static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
1050 TargetLowering::DAGCombinerInfo &DCI,
1051 const MipsSubtarget &Subtarget) {
1052 EVT Ty = N->getValueType(ResNo: 0);
1053
1054 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1055 return SDValue();
1056
1057 return performDSPShiftCombine(Opc: MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
1058}
1059
1060// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
1061// constant splats into MipsISD::SHRA_DSP for DSPr2.
1062//
1063// Performs the following transformations:
1064// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
1065// sign/zero-extension is completely overwritten by the new one performed by
1066// the ISD::SRA and ISD::SHL nodes.
1067// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
1068// sequence.
1069//
1070// See performDSPShiftCombine for more information about the transformation
1071// used for DSPr2.
1072static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
1073 TargetLowering::DAGCombinerInfo &DCI,
1074 const MipsSubtarget &Subtarget) {
1075 EVT Ty = N->getValueType(ResNo: 0);
1076
1077 if (Subtarget.hasMSA()) {
1078 SDValue Op0 = N->getOperand(Num: 0);
1079 SDValue Op1 = N->getOperand(Num: 1);
1080
1081 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
1082 // where $d + sizeof($c) == 32
1083 // or $d + sizeof($c) <= 32 and SExt
1084 // -> (MipsVExtractSExt $a, $b, $c)
1085 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(Num: 1)) {
1086 SDValue Op0Op0 = Op0->getOperand(Num: 0);
1087 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Val&: Op1);
1088
1089 if (!ShAmount)
1090 return SDValue();
1091
1092 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
1093 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
1094 return SDValue();
1095
1096 EVT ExtendTy = cast<VTSDNode>(Val: Op0Op0->getOperand(Num: 2))->getVT();
1097 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
1098
1099 if (TotalBits == 32 ||
1100 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
1101 TotalBits <= 32)) {
1102 SDValue Ops[] = { Op0Op0->getOperand(Num: 0), Op0Op0->getOperand(Num: 1),
1103 Op0Op0->getOperand(Num: 2) };
1104 return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL: SDLoc(Op0Op0),
1105 VTList: Op0Op0->getVTList(),
1106 Ops: ArrayRef(Ops, Op0Op0->getNumOperands()));
1107 }
1108 }
1109 }
1110
1111 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
1112 return SDValue();
1113
1114 return performDSPShiftCombine(Opc: MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
1115}
1116
1117
1118static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
1119 TargetLowering::DAGCombinerInfo &DCI,
1120 const MipsSubtarget &Subtarget) {
1121 EVT Ty = N->getValueType(ResNo: 0);
1122
1123 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
1124 return SDValue();
1125
1126 return performDSPShiftCombine(Opc: MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
1127}
1128
1129static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
1130 bool IsV216 = (Ty == MVT::v2i16);
1131
1132 switch (CC) {
1133 case ISD::SETEQ:
1134 case ISD::SETNE: return true;
1135 case ISD::SETLT:
1136 case ISD::SETLE:
1137 case ISD::SETGT:
1138 case ISD::SETGE: return IsV216;
1139 case ISD::SETULT:
1140 case ISD::SETULE:
1141 case ISD::SETUGT:
1142 case ISD::SETUGE: return !IsV216;
1143 default: return false;
1144 }
1145}
1146
1147static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
1148 EVT Ty = N->getValueType(ResNo: 0);
1149
1150 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1151 return SDValue();
1152
1153 if (!isLegalDSPCondCode(Ty, CC: cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get()))
1154 return SDValue();
1155
1156 return DAG.getNode(Opcode: MipsISD::SETCC_DSP, DL: SDLoc(N), VT: Ty, N1: N->getOperand(Num: 0),
1157 N2: N->getOperand(Num: 1), N3: N->getOperand(Num: 2));
1158}
1159
1160static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
1161 EVT Ty = N->getValueType(ResNo: 0);
1162
1163 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
1164 SDValue SetCC = N->getOperand(Num: 0);
1165
1166 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
1167 return SDValue();
1168
1169 return DAG.getNode(Opcode: MipsISD::SELECT_CC_DSP, DL: SDLoc(N), VT: Ty,
1170 N1: SetCC.getOperand(i: 0), N2: SetCC.getOperand(i: 1),
1171 N3: N->getOperand(Num: 1), N4: N->getOperand(Num: 2), N5: SetCC.getOperand(i: 2));
1172 }
1173
1174 return SDValue();
1175}
1176
1177static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1178 const MipsSubtarget &Subtarget) {
1179 EVT Ty = N->getValueType(ResNo: 0);
1180
1181 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1182 // Try the following combines:
1183 // (xor (or $a, $b), (build_vector allones))
1184 // (xor (or $a, $b), (bitcast (build_vector allones)))
1185 SDValue Op0 = N->getOperand(Num: 0);
1186 SDValue Op1 = N->getOperand(Num: 1);
1187 SDValue NotOp;
1188
1189 if (ISD::isBuildVectorAllOnes(N: Op0.getNode()))
1190 NotOp = Op1;
1191 else if (ISD::isBuildVectorAllOnes(N: Op1.getNode()))
1192 NotOp = Op0;
1193 else
1194 return SDValue();
1195
1196 if (NotOp->getOpcode() == ISD::OR)
1197 return DAG.getNode(Opcode: MipsISD::VNOR, DL: SDLoc(N), VT: Ty, N1: NotOp->getOperand(Num: 0),
1198 N2: NotOp->getOperand(Num: 1));
1199 }
1200
1201 return SDValue();
1202}
1203
1204// Convert (fp_to_uint (fp16_to_fp x)) into (fp_to_sint (fp16_to_fp x)).
1205static SDValue performFP_TO_UINTCombine(SDNode *N, SelectionDAG &DAG) {
1206 SDValue Src = N->getOperand(Num: 0);
1207 EVT VT = N->getValueType(ResNo: 0);
1208
1209 // Use a trick from TargetLowering::expandFP_TO_UINT: we know that every
1210 // integer value that can be represented by f16 is <= 65504, i.e. a signed
1211 // integer of 17 bits or more can represent all values and fptoui and fptosi
1212 // are equivalent.
1213 //
1214 // NOTE: the result of fptoui is poison when the value does not fit in the
1215 // destination type (e.g. because it is negative).
1216 if (Src.getOpcode() != ISD::FP16_TO_FP || VT.getScalarSizeInBits() < 17)
1217 return SDValue();
1218 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: SDLoc(N), VT, Operand: Src);
1219}
1220
1221SDValue
1222MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
1223 SelectionDAG &DAG = DCI.DAG;
1224 SDValue Val;
1225
1226 switch (N->getOpcode()) {
1227 case ISD::AND:
1228 Val = performANDCombine(N, DAG, DCI, Subtarget);
1229 break;
1230 case ISD::OR:
1231 Val = performORCombine(N, DAG, DCI, Subtarget);
1232 break;
1233 case ISD::MUL:
1234 return performMULCombine(N, DAG, DCI, TL: this, Subtarget);
1235 case ISD::SHL:
1236 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1237 break;
1238 case ISD::SRA:
1239 return performSRACombine(N, DAG, DCI, Subtarget);
1240 case ISD::SRL:
1241 return performSRLCombine(N, DAG, DCI, Subtarget);
1242 case ISD::VSELECT:
1243 return performVSELECTCombine(N, DAG);
1244 case ISD::XOR:
1245 Val = performXORCombine(N, DAG, Subtarget);
1246 break;
1247 case ISD::SETCC:
1248 Val = performSETCCCombine(N, DAG);
1249 break;
1250 case ISD::FP_TO_UINT:
1251 Val = performFP_TO_UINTCombine(N, DAG);
1252 break;
1253 }
1254
1255 if (Val.getNode()) {
1256 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1257 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1258 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1259 return Val;
1260 }
1261
1262 return MipsTargetLowering::PerformDAGCombine(N, DCI);
1263}
1264
1265MachineBasicBlock *
1266MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1267 MachineBasicBlock *BB) const {
1268 switch (MI.getOpcode()) {
1269 default:
1270 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, MBB: BB);
1271 case Mips::BPOSGE32_PSEUDO:
1272 return emitBPOSGE32(MI, BB);
1273 case Mips::SNZ_B_PSEUDO:
1274 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_B);
1275 case Mips::SNZ_H_PSEUDO:
1276 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_H);
1277 case Mips::SNZ_W_PSEUDO:
1278 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_W);
1279 case Mips::SNZ_D_PSEUDO:
1280 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_D);
1281 case Mips::SNZ_V_PSEUDO:
1282 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_V);
1283 case Mips::SZ_B_PSEUDO:
1284 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_B);
1285 case Mips::SZ_H_PSEUDO:
1286 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_H);
1287 case Mips::SZ_W_PSEUDO:
1288 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_W);
1289 case Mips::SZ_D_PSEUDO:
1290 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_D);
1291 case Mips::SZ_V_PSEUDO:
1292 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_V);
1293 case Mips::COPY_FW_PSEUDO:
1294 return emitCOPY_FW(MI, BB);
1295 case Mips::COPY_FD_PSEUDO:
1296 return emitCOPY_FD(MI, BB);
1297 case Mips::INSERT_FW_PSEUDO:
1298 return emitINSERT_FW(MI, BB);
1299 case Mips::INSERT_FD_PSEUDO:
1300 return emitINSERT_FD(MI, BB);
1301 case Mips::INSERT_B_VIDX_PSEUDO:
1302 case Mips::INSERT_B_VIDX64_PSEUDO:
1303 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 1, IsFP: false);
1304 case Mips::INSERT_H_VIDX_PSEUDO:
1305 case Mips::INSERT_H_VIDX64_PSEUDO:
1306 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 2, IsFP: false);
1307 case Mips::INSERT_W_VIDX_PSEUDO:
1308 case Mips::INSERT_W_VIDX64_PSEUDO:
1309 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: false);
1310 case Mips::INSERT_D_VIDX_PSEUDO:
1311 case Mips::INSERT_D_VIDX64_PSEUDO:
1312 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: false);
1313 case Mips::INSERT_FW_VIDX_PSEUDO:
1314 case Mips::INSERT_FW_VIDX64_PSEUDO:
1315 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: true);
1316 case Mips::INSERT_FD_VIDX_PSEUDO:
1317 case Mips::INSERT_FD_VIDX64_PSEUDO:
1318 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: true);
1319 case Mips::FILL_FW_PSEUDO:
1320 return emitFILL_FW(MI, BB);
1321 case Mips::FILL_FD_PSEUDO:
1322 return emitFILL_FD(MI, BB);
1323 case Mips::FEXP2_W_1_PSEUDO:
1324 return emitFEXP2_W_1(MI, BB);
1325 case Mips::FEXP2_D_1_PSEUDO:
1326 return emitFEXP2_D_1(MI, BB);
1327 }
1328}
1329
1330bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1331 const CCState &CCInfo, unsigned NextStackOffset,
1332 const MipsFunctionInfo &FI) const {
1333 // Exception has to be cleared with eret.
1334 if (FI.isISR())
1335 return false;
1336
1337 // Return false if either the callee or caller has a byval argument.
1338 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1339 return false;
1340
1341 // Return true if the callee's argument area is no larger than the caller's.
1342 return NextStackOffset <= FI.getIncomingArgSize();
1343}
1344
1345void MipsSETargetLowering::
1346getOpndList(SmallVectorImpl<SDValue> &Ops,
1347 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1348 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1349 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1350 SDValue Chain) const {
1351 Ops.push_back(Elt: Callee);
1352 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1353 InternalLinkage, IsCallReloc, CLI, Callee,
1354 Chain);
1355}
1356
1357SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1358 LoadSDNode &Nd = *cast<LoadSDNode>(Val&: Op);
1359
1360 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1361 return MipsTargetLowering::lowerLOAD(Op, DAG);
1362
1363 // Replace a double precision load with two i32 loads and a buildpair64.
1364 SDLoc DL(Op);
1365 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1366 EVT PtrVT = Ptr.getValueType();
1367
1368 // i32 load from lower address.
1369 SDValue Lo = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo: MachinePointerInfo(),
1370 Alignment: Nd.getAlign(), MMOFlags: Nd.getMemOperand()->getFlags());
1371
1372 // i32 load from higher address.
1373 Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT));
1374 SDValue Hi = DAG.getLoad(
1375 VT: MVT::i32, dl: DL, Chain: Lo.getValue(R: 1), Ptr, PtrInfo: MachinePointerInfo(),
1376 Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), MMOFlags: Nd.getMemOperand()->getFlags());
1377
1378 if (!Subtarget.isLittle())
1379 std::swap(a&: Lo, b&: Hi);
1380
1381 SDValue BP = DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
1382 SDValue Ops[2] = {BP, Hi.getValue(R: 1)};
1383 return DAG.getMergeValues(Ops, dl: DL);
1384}
1385
1386SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1387 StoreSDNode &Nd = *cast<StoreSDNode>(Val&: Op);
1388
1389 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1390 return MipsTargetLowering::lowerSTORE(Op, DAG);
1391
1392 // Replace a double precision store with two extractelement64s and i32 stores.
1393 SDLoc DL(Op);
1394 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1395 EVT PtrVT = Ptr.getValueType();
1396 SDValue Lo = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32,
1397 N1: Val, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1398 SDValue Hi = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32,
1399 N1: Val, N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
1400
1401 if (!Subtarget.isLittle())
1402 std::swap(a&: Lo, b&: Hi);
1403
1404 // i32 store to lower address.
1405 Chain = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: MachinePointerInfo(), Alignment: Nd.getAlign(),
1406 MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo());
1407
1408 // i32 store to higher address.
1409 Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT));
1410 return DAG.getStore(Chain, dl: DL, Val: Hi, Ptr, PtrInfo: MachinePointerInfo(),
1411 Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4),
1412 MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo());
1413}
1414
1415SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1416 SelectionDAG &DAG) const {
1417 SDLoc DL(Op);
1418 MVT Src = Op.getOperand(i: 0).getValueType().getSimpleVT();
1419 MVT Dest = Op.getValueType().getSimpleVT();
1420
1421 // Bitcast i64 to double.
1422 if (Src == MVT::i64 && Dest == MVT::f64) {
1423 SDValue Lo, Hi;
1424 std::tie(args&: Lo, args&: Hi) =
1425 DAG.SplitScalar(N: Op.getOperand(i: 0), DL, LoVT: MVT::i32, HiVT: MVT::i32);
1426 return DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
1427 }
1428
1429 // Bitcast double to i64.
1430 if (Src == MVT::f64 && Dest == MVT::i64) {
1431 // Skip lower bitcast when operand0 has converted float results to integer
1432 // which was done by function SoftenFloatResult.
1433 if (getTypeAction(Context&: *DAG.getContext(), VT: Op.getOperand(i: 0).getValueType()) ==
1434 TargetLowering::TypeSoftenFloat)
1435 return SDValue();
1436 SDValue Lo =
1437 DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0),
1438 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1439 SDValue Hi =
1440 DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0),
1441 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
1442 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
1443 }
1444
1445 // Skip other cases of bitcast and use default lowering.
1446 return SDValue();
1447}
1448
1449SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1450 bool HasLo, bool HasHi,
1451 SelectionDAG &DAG) const {
1452 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1453 assert(!Subtarget.hasMips32r6());
1454
1455 EVT Ty = Op.getOperand(i: 0).getValueType();
1456 SDLoc DL(Op);
1457 SDValue Mult = DAG.getNode(Opcode: NewOpc, DL, VT: MVT::Untyped,
1458 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
1459 SDValue Lo, Hi;
1460
1461 if (HasLo)
1462 Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: Ty, Operand: Mult);
1463 if (HasHi)
1464 Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: Ty, Operand: Mult);
1465
1466 if (!HasLo || !HasHi)
1467 return HasLo ? Lo : Hi;
1468
1469 SDValue Vals[] = { Lo, Hi };
1470 return DAG.getMergeValues(Ops: Vals, dl: DL);
1471}
1472
1473static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) {
1474 SDValue InLo, InHi;
1475 std::tie(args&: InLo, args&: InHi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i32, HiVT: MVT::i32);
1476 return DAG.getNode(Opcode: MipsISD::MTLOHI, DL, VT: MVT::Untyped, N1: InLo, N2: InHi);
1477}
1478
1479static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) {
1480 SDValue Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: MVT::i32, Operand: Op);
1481 SDValue Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: MVT::i32, Operand: Op);
1482 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
1483}
1484
1485// This function expands mips intrinsic nodes which have 64-bit input operands
1486// or output values.
1487//
1488// out64 = intrinsic-node in64
1489// =>
1490// lo = copy (extract-element (in64, 0))
1491// hi = copy (extract-element (in64, 1))
1492// mips-specific-node
1493// v0 = copy lo
1494// v1 = copy hi
1495// out64 = merge-values (v0, v1)
1496//
1497static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1498 SDLoc DL(Op);
1499 bool HasChainIn = Op->getOperand(Num: 0).getValueType() == MVT::Other;
1500 SmallVector<SDValue, 3> Ops;
1501 unsigned OpNo = 0;
1502
1503 // See if Op has a chain input.
1504 if (HasChainIn)
1505 Ops.push_back(Elt: Op->getOperand(Num: OpNo++));
1506
1507 // The next operand is the intrinsic opcode.
1508 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1509
1510 // See if the next operand has type i64.
1511 SDValue Opnd = Op->getOperand(Num: ++OpNo), In64;
1512
1513 if (Opnd.getValueType() == MVT::i64)
1514 In64 = initAccumulator(In: Opnd, DL, DAG);
1515 else
1516 Ops.push_back(Elt: Opnd);
1517
1518 // Push the remaining operands.
1519 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1520 Ops.push_back(Elt: Op->getOperand(Num: OpNo));
1521
1522 // Add In64 to the end of the list.
1523 if (In64.getNode())
1524 Ops.push_back(Elt: In64);
1525
1526 // Scan output.
1527 SmallVector<EVT, 2> ResTys;
1528
1529 for (EVT Ty : Op->values())
1530 ResTys.push_back(Elt: (Ty == MVT::i64) ? MVT::Untyped : Ty);
1531
1532 // Create node.
1533 SDValue Val = DAG.getNode(Opcode: Opc, DL, ResultTys: ResTys, Ops);
1534 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Op: Val, DL, DAG) : Val;
1535
1536 if (!HasChainIn)
1537 return Out;
1538
1539 assert(Val->getValueType(1) == MVT::Other);
1540 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1541 return DAG.getMergeValues(Ops: Vals, dl: DL);
1542}
1543
1544// Lower an MSA copy intrinsic into the specified SelectionDAG node
1545static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1546 SDLoc DL(Op);
1547 SDValue Vec = Op->getOperand(Num: 1);
1548 SDValue Idx = Op->getOperand(Num: 2);
1549 EVT ResTy = Op->getValueType(ResNo: 0);
1550 EVT EltTy = Vec->getValueType(ResNo: 0).getVectorElementType();
1551
1552 SDValue Result = DAG.getNode(Opcode: Opc, DL, VT: ResTy, N1: Vec, N2: Idx,
1553 N3: DAG.getValueType(EltTy));
1554
1555 return Result;
1556}
1557
1558static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1559 EVT ResVecTy = Op->getValueType(ResNo: 0);
1560 EVT ViaVecTy = ResVecTy;
1561 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1562 SDLoc DL(Op);
1563
1564 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1565 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1566 // lanes.
1567 SDValue LaneA = Op->getOperand(Num: OpNr);
1568 SDValue LaneB;
1569
1570 if (ResVecTy == MVT::v2i64) {
1571 // In case of the index being passed as an immediate value, set the upper
1572 // lane to 0 so that the splati.d instruction can be matched.
1573 if (isa<ConstantSDNode>(Val: LaneA))
1574 LaneB = DAG.getConstant(Val: 0, DL, VT: MVT::i32);
1575 // Having the index passed in a register, set the upper lane to the same
1576 // value as the lower - this results in the BUILD_VECTOR node not being
1577 // expanded through stack. This way we are able to pattern match the set of
1578 // nodes created here to splat.d.
1579 else
1580 LaneB = LaneA;
1581 ViaVecTy = MVT::v4i32;
1582 if(BigEndian)
1583 std::swap(a&: LaneA, b&: LaneB);
1584 } else
1585 LaneB = LaneA;
1586
1587 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1588 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1589
1590 SDValue Result = DAG.getBuildVector(
1591 VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1592
1593 if (ViaVecTy != ResVecTy) {
1594 SDValue One = DAG.getConstant(Val: 1, DL, VT: ViaVecTy);
1595 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVecTy,
1596 Operand: DAG.getNode(Opcode: ISD::AND, DL, VT: ViaVecTy, N1: Result, N2: One));
1597 }
1598
1599 return Result;
1600}
1601
1602static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1603 bool IsSigned = false) {
1604 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
1605 return DAG.getConstant(
1606 Val: APInt(Op->getValueType(ResNo: 0).getScalarType().getSizeInBits(),
1607 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1608 DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0));
1609}
1610
1611static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1612 bool BigEndian, SelectionDAG &DAG) {
1613 EVT ViaVecTy = VecTy;
1614 SDValue SplatValueA = SplatValue;
1615 SDValue SplatValueB = SplatValue;
1616 SDLoc DL(SplatValue);
1617
1618 if (VecTy == MVT::v2i64) {
1619 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1620 ViaVecTy = MVT::v4i32;
1621
1622 SplatValueA = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValue);
1623 SplatValueB = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SplatValue,
1624 N2: DAG.getConstant(Val: 32, DL, VT: MVT::i32));
1625 SplatValueB = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValueB);
1626 }
1627
1628 // We currently hold the parts in little endian order. Swap them if
1629 // necessary.
1630 if (BigEndian)
1631 std::swap(a&: SplatValueA, b&: SplatValueB);
1632
1633 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1634 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1635 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1636 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1637
1638 SDValue Result = DAG.getBuildVector(
1639 VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1640
1641 if (VecTy != ViaVecTy)
1642 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecTy, Operand: Result);
1643
1644 return Result;
1645}
1646
1647static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
1648 unsigned Opc, SDValue Imm,
1649 bool BigEndian) {
1650 EVT VecTy = Op->getValueType(ResNo: 0);
1651 SDValue Exp2Imm;
1652 SDLoc DL(Op);
1653
1654 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1655 // here for now.
1656 if (VecTy == MVT::v2i64) {
1657 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Val&: Imm)) {
1658 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1659
1660 SDValue BitImmHiOp = DAG.getConstant(Val: BitImm.lshr(shiftAmt: 32).trunc(width: 32), DL,
1661 VT: MVT::i32);
1662 SDValue BitImmLoOp = DAG.getConstant(Val: BitImm.trunc(width: 32), DL, VT: MVT::i32);
1663
1664 if (BigEndian)
1665 std::swap(a&: BitImmLoOp, b&: BitImmHiOp);
1666
1667 Exp2Imm = DAG.getNode(
1668 Opcode: ISD::BITCAST, DL, VT: MVT::v2i64,
1669 Operand: DAG.getBuildVector(VT: MVT::v4i32, DL,
1670 Ops: {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1671 }
1672 }
1673
1674 if (!Exp2Imm.getNode()) {
1675 // We couldnt constant fold, do a vector shift instead
1676
1677 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1678 // only values 0-63 are valid.
1679 if (VecTy == MVT::v2i64)
1680 Imm = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Imm);
1681
1682 Exp2Imm = getBuildVectorSplat(VecTy, SplatValue: Imm, BigEndian, DAG);
1683
1684 Exp2Imm = DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: DAG.getConstant(Val: 1, DL, VT: VecTy),
1685 N2: Exp2Imm);
1686 }
1687
1688 return DAG.getNode(Opcode: Opc, DL, VT: VecTy, N1: Op->getOperand(Num: 1), N2: Exp2Imm);
1689}
1690
1691static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
1692 SDLoc DL(Op);
1693 EVT ResTy = Op->getValueType(ResNo: 0);
1694 SDValue Vec = Op->getOperand(Num: 2);
1695 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1696 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1697 SDValue ConstValue = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1,
1698 DL, VT: ResEltTy);
1699 SDValue SplatVec = getBuildVectorSplat(VecTy: ResTy, SplatValue: ConstValue, BigEndian, DAG);
1700
1701 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: SplatVec);
1702}
1703
1704static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
1705 EVT ResTy = Op->getValueType(ResNo: 0);
1706 SDLoc DL(Op);
1707 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
1708 SDValue Bit = DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Op, DAG));
1709
1710 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1),
1711 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
1712}
1713
1714static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
1715 SDLoc DL(Op);
1716 EVT ResTy = Op->getValueType(ResNo: 0);
1717 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1718 << Op->getConstantOperandAPInt(Num: 2);
1719 SDValue BitMask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
1720
1721 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), N2: BitMask);
1722}
1723
1724SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1725 SelectionDAG &DAG) const {
1726 SDLoc DL(Op);
1727 unsigned Intrinsic = Op->getConstantOperandVal(Num: 0);
1728 switch (Intrinsic) {
1729 default:
1730 return SDValue();
1731 case Intrinsic::mips_shilo:
1732 return lowerDSPIntr(Op, DAG, Opc: MipsISD::SHILO);
1733 case Intrinsic::mips_dpau_h_qbl:
1734 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBL);
1735 case Intrinsic::mips_dpau_h_qbr:
1736 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBR);
1737 case Intrinsic::mips_dpsu_h_qbl:
1738 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBL);
1739 case Intrinsic::mips_dpsu_h_qbr:
1740 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBR);
1741 case Intrinsic::mips_dpa_w_ph:
1742 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPA_W_PH);
1743 case Intrinsic::mips_dps_w_ph:
1744 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPS_W_PH);
1745 case Intrinsic::mips_dpax_w_ph:
1746 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAX_W_PH);
1747 case Intrinsic::mips_dpsx_w_ph:
1748 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSX_W_PH);
1749 case Intrinsic::mips_mulsa_w_ph:
1750 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSA_W_PH);
1751 case Intrinsic::mips_mult:
1752 return lowerDSPIntr(Op, DAG, Opc: MipsISD::Mult);
1753 case Intrinsic::mips_multu:
1754 return lowerDSPIntr(Op, DAG, Opc: MipsISD::Multu);
1755 case Intrinsic::mips_madd:
1756 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAdd);
1757 case Intrinsic::mips_maddu:
1758 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAddu);
1759 case Intrinsic::mips_msub:
1760 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSub);
1761 case Intrinsic::mips_msubu:
1762 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSubu);
1763 case Intrinsic::mips_addv_b:
1764 case Intrinsic::mips_addv_h:
1765 case Intrinsic::mips_addv_w:
1766 case Intrinsic::mips_addv_d:
1767 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1768 N2: Op->getOperand(Num: 2));
1769 case Intrinsic::mips_addvi_b:
1770 case Intrinsic::mips_addvi_h:
1771 case Intrinsic::mips_addvi_w:
1772 case Intrinsic::mips_addvi_d:
1773 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1774 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
1775 case Intrinsic::mips_and_v:
1776 return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1777 N2: Op->getOperand(Num: 2));
1778 case Intrinsic::mips_andi_b:
1779 return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1780 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
1781 case Intrinsic::mips_bclr_b:
1782 case Intrinsic::mips_bclr_h:
1783 case Intrinsic::mips_bclr_w:
1784 case Intrinsic::mips_bclr_d:
1785 return lowerMSABitClear(Op, DAG);
1786 case Intrinsic::mips_bclri_b:
1787 case Intrinsic::mips_bclri_h:
1788 case Intrinsic::mips_bclri_w:
1789 case Intrinsic::mips_bclri_d:
1790 return lowerMSABitClearImm(Op, DAG);
1791 case Intrinsic::mips_binsli_b:
1792 case Intrinsic::mips_binsli_h:
1793 case Intrinsic::mips_binsli_w:
1794 case Intrinsic::mips_binsli_d: {
1795 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1796 EVT VecTy = Op->getValueType(ResNo: 0);
1797 EVT EltTy = VecTy.getVectorElementType();
1798 if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits())
1799 report_fatal_error(reason: "Immediate out of range");
1800 APInt Mask = APInt::getHighBitsSet(numBits: EltTy.getSizeInBits(),
1801 hiBitsSet: Op->getConstantOperandVal(Num: 3) + 1);
1802 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy,
1803 N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true),
1804 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1805 }
1806 case Intrinsic::mips_binsri_b:
1807 case Intrinsic::mips_binsri_h:
1808 case Intrinsic::mips_binsri_w:
1809 case Intrinsic::mips_binsri_d: {
1810 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1811 EVT VecTy = Op->getValueType(ResNo: 0);
1812 EVT EltTy = VecTy.getVectorElementType();
1813 if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits())
1814 report_fatal_error(reason: "Immediate out of range");
1815 APInt Mask = APInt::getLowBitsSet(numBits: EltTy.getSizeInBits(),
1816 loBitsSet: Op->getConstantOperandVal(Num: 3) + 1);
1817 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy,
1818 N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true),
1819 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1820 }
1821 case Intrinsic::mips_bmnz_v:
1822 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3),
1823 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1824 case Intrinsic::mips_bmnzi_b:
1825 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1826 N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 2),
1827 N3: Op->getOperand(Num: 1));
1828 case Intrinsic::mips_bmz_v:
1829 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3),
1830 N2: Op->getOperand(Num: 1), N3: Op->getOperand(Num: 2));
1831 case Intrinsic::mips_bmzi_b:
1832 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1833 N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 1),
1834 N3: Op->getOperand(Num: 2));
1835 case Intrinsic::mips_bneg_b:
1836 case Intrinsic::mips_bneg_h:
1837 case Intrinsic::mips_bneg_w:
1838 case Intrinsic::mips_bneg_d: {
1839 EVT VecTy = Op->getValueType(ResNo: 0);
1840 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
1841
1842 return DAG.getNode(Opcode: ISD::XOR, DL, VT: VecTy, N1: Op->getOperand(Num: 1),
1843 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One,
1844 N2: truncateVecElts(Op, DAG)));
1845 }
1846 case Intrinsic::mips_bnegi_b:
1847 case Intrinsic::mips_bnegi_h:
1848 case Intrinsic::mips_bnegi_w:
1849 case Intrinsic::mips_bnegi_d:
1850 return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::XOR, Imm: Op->getOperand(Num: 2),
1851 BigEndian: !Subtarget.isLittle());
1852 case Intrinsic::mips_bnz_b:
1853 case Intrinsic::mips_bnz_h:
1854 case Intrinsic::mips_bnz_w:
1855 case Intrinsic::mips_bnz_d:
1856 return DAG.getNode(Opcode: MipsISD::VALL_NONZERO, DL, VT: Op->getValueType(ResNo: 0),
1857 Operand: Op->getOperand(Num: 1));
1858 case Intrinsic::mips_bnz_v:
1859 return DAG.getNode(Opcode: MipsISD::VANY_NONZERO, DL, VT: Op->getValueType(ResNo: 0),
1860 Operand: Op->getOperand(Num: 1));
1861 case Intrinsic::mips_bsel_v:
1862 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1863 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1864 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3),
1865 N3: Op->getOperand(Num: 2));
1866 case Intrinsic::mips_bseli_b:
1867 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1868 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1869 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 3, DAG),
1870 N3: Op->getOperand(Num: 2));
1871 case Intrinsic::mips_bset_b:
1872 case Intrinsic::mips_bset_h:
1873 case Intrinsic::mips_bset_w:
1874 case Intrinsic::mips_bset_d: {
1875 EVT VecTy = Op->getValueType(ResNo: 0);
1876 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
1877
1878 return DAG.getNode(Opcode: ISD::OR, DL, VT: VecTy, N1: Op->getOperand(Num: 1),
1879 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One,
1880 N2: truncateVecElts(Op, DAG)));
1881 }
1882 case Intrinsic::mips_bseti_b:
1883 case Intrinsic::mips_bseti_h:
1884 case Intrinsic::mips_bseti_w:
1885 case Intrinsic::mips_bseti_d:
1886 return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::OR, Imm: Op->getOperand(Num: 2),
1887 BigEndian: !Subtarget.isLittle());
1888 case Intrinsic::mips_bz_b:
1889 case Intrinsic::mips_bz_h:
1890 case Intrinsic::mips_bz_w:
1891 case Intrinsic::mips_bz_d:
1892 return DAG.getNode(Opcode: MipsISD::VALL_ZERO, DL, VT: Op->getValueType(ResNo: 0),
1893 Operand: Op->getOperand(Num: 1));
1894 case Intrinsic::mips_bz_v:
1895 return DAG.getNode(Opcode: MipsISD::VANY_ZERO, DL, VT: Op->getValueType(ResNo: 0),
1896 Operand: Op->getOperand(Num: 1));
1897 case Intrinsic::mips_ceq_b:
1898 case Intrinsic::mips_ceq_h:
1899 case Intrinsic::mips_ceq_w:
1900 case Intrinsic::mips_ceq_d:
1901 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1902 RHS: Op->getOperand(Num: 2), Cond: ISD::SETEQ);
1903 case Intrinsic::mips_ceqi_b:
1904 case Intrinsic::mips_ceqi_h:
1905 case Intrinsic::mips_ceqi_w:
1906 case Intrinsic::mips_ceqi_d:
1907 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1908 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETEQ);
1909 case Intrinsic::mips_cle_s_b:
1910 case Intrinsic::mips_cle_s_h:
1911 case Intrinsic::mips_cle_s_w:
1912 case Intrinsic::mips_cle_s_d:
1913 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1914 RHS: Op->getOperand(Num: 2), Cond: ISD::SETLE);
1915 case Intrinsic::mips_clei_s_b:
1916 case Intrinsic::mips_clei_s_h:
1917 case Intrinsic::mips_clei_s_w:
1918 case Intrinsic::mips_clei_s_d:
1919 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1920 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLE);
1921 case Intrinsic::mips_cle_u_b:
1922 case Intrinsic::mips_cle_u_h:
1923 case Intrinsic::mips_cle_u_w:
1924 case Intrinsic::mips_cle_u_d:
1925 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1926 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE);
1927 case Intrinsic::mips_clei_u_b:
1928 case Intrinsic::mips_clei_u_h:
1929 case Intrinsic::mips_clei_u_w:
1930 case Intrinsic::mips_clei_u_d:
1931 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1932 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULE);
1933 case Intrinsic::mips_clt_s_b:
1934 case Intrinsic::mips_clt_s_h:
1935 case Intrinsic::mips_clt_s_w:
1936 case Intrinsic::mips_clt_s_d:
1937 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1938 RHS: Op->getOperand(Num: 2), Cond: ISD::SETLT);
1939 case Intrinsic::mips_clti_s_b:
1940 case Intrinsic::mips_clti_s_h:
1941 case Intrinsic::mips_clti_s_w:
1942 case Intrinsic::mips_clti_s_d:
1943 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1944 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLT);
1945 case Intrinsic::mips_clt_u_b:
1946 case Intrinsic::mips_clt_u_h:
1947 case Intrinsic::mips_clt_u_w:
1948 case Intrinsic::mips_clt_u_d:
1949 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1950 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT);
1951 case Intrinsic::mips_clti_u_b:
1952 case Intrinsic::mips_clti_u_h:
1953 case Intrinsic::mips_clti_u_w:
1954 case Intrinsic::mips_clti_u_d:
1955 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1956 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULT);
1957 case Intrinsic::mips_copy_s_b:
1958 case Intrinsic::mips_copy_s_h:
1959 case Intrinsic::mips_copy_s_w:
1960 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT);
1961 case Intrinsic::mips_copy_s_d:
1962 if (Subtarget.hasMips64())
1963 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1964 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT);
1965 else {
1966 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1967 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1968 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op),
1969 VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1970 N2: Op->getOperand(Num: 2));
1971 }
1972 case Intrinsic::mips_copy_u_b:
1973 case Intrinsic::mips_copy_u_h:
1974 case Intrinsic::mips_copy_u_w:
1975 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT);
1976 case Intrinsic::mips_copy_u_d:
1977 if (Subtarget.hasMips64())
1978 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1979 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT);
1980 else {
1981 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1982 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1983 // Note: When i64 is illegal, this results in copy_s.w instructions
1984 // instead of copy_u.w instructions. This makes no difference to the
1985 // behaviour since i64 is only illegal when the register file is 32-bit.
1986 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op),
1987 VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1988 N2: Op->getOperand(Num: 2));
1989 }
1990 case Intrinsic::mips_div_s_b:
1991 case Intrinsic::mips_div_s_h:
1992 case Intrinsic::mips_div_s_w:
1993 case Intrinsic::mips_div_s_d:
1994 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1995 N2: Op->getOperand(Num: 2));
1996 case Intrinsic::mips_div_u_b:
1997 case Intrinsic::mips_div_u_h:
1998 case Intrinsic::mips_div_u_w:
1999 case Intrinsic::mips_div_u_d:
2000 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2001 N2: Op->getOperand(Num: 2));
2002 case Intrinsic::mips_fadd_w:
2003 case Intrinsic::mips_fadd_d:
2004 // TODO: If intrinsics have fast-math-flags, propagate them.
2005 return DAG.getNode(Opcode: ISD::FADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2006 N2: Op->getOperand(Num: 2));
2007 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
2008 case Intrinsic::mips_fceq_w:
2009 case Intrinsic::mips_fceq_d:
2010 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2011 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOEQ);
2012 case Intrinsic::mips_fcle_w:
2013 case Intrinsic::mips_fcle_d:
2014 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2015 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLE);
2016 case Intrinsic::mips_fclt_w:
2017 case Intrinsic::mips_fclt_d:
2018 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2019 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLT);
2020 case Intrinsic::mips_fcne_w:
2021 case Intrinsic::mips_fcne_d:
2022 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2023 RHS: Op->getOperand(Num: 2), Cond: ISD::SETONE);
2024 case Intrinsic::mips_fcor_w:
2025 case Intrinsic::mips_fcor_d:
2026 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2027 RHS: Op->getOperand(Num: 2), Cond: ISD::SETO);
2028 case Intrinsic::mips_fcueq_w:
2029 case Intrinsic::mips_fcueq_d:
2030 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2031 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUEQ);
2032 case Intrinsic::mips_fcule_w:
2033 case Intrinsic::mips_fcule_d:
2034 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2035 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE);
2036 case Intrinsic::mips_fcult_w:
2037 case Intrinsic::mips_fcult_d:
2038 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2039 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT);
2040 case Intrinsic::mips_fcun_w:
2041 case Intrinsic::mips_fcun_d:
2042 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2043 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUO);
2044 case Intrinsic::mips_fcune_w:
2045 case Intrinsic::mips_fcune_d:
2046 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
2047 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUNE);
2048 case Intrinsic::mips_fdiv_w:
2049 case Intrinsic::mips_fdiv_d:
2050 // TODO: If intrinsics have fast-math-flags, propagate them.
2051 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2052 N2: Op->getOperand(Num: 2));
2053 case Intrinsic::mips_ffint_u_w:
2054 case Intrinsic::mips_ffint_u_d:
2055 return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0),
2056 Operand: Op->getOperand(Num: 1));
2057 case Intrinsic::mips_ffint_s_w:
2058 case Intrinsic::mips_ffint_s_d:
2059 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0),
2060 Operand: Op->getOperand(Num: 1));
2061 case Intrinsic::mips_fill_b:
2062 case Intrinsic::mips_fill_h:
2063 case Intrinsic::mips_fill_w:
2064 case Intrinsic::mips_fill_d: {
2065 EVT ResTy = Op->getValueType(ResNo: 0);
2066 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(),
2067 Op->getOperand(Num: 1));
2068
2069 // If ResTy is v2i64 then the type legalizer will break this node down into
2070 // an equivalent v4i32.
2071 return DAG.getBuildVector(VT: ResTy, DL, Ops);
2072 }
2073 case Intrinsic::mips_fexp2_w:
2074 case Intrinsic::mips_fexp2_d: {
2075 // TODO: If intrinsics have fast-math-flags, propagate them.
2076 EVT ResTy = Op->getValueType(ResNo: 0);
2077 return DAG.getNode(
2078 Opcode: ISD::FMUL, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2079 N2: DAG.getNode(Opcode: ISD::FEXP2, DL: SDLoc(Op), VT: ResTy, Operand: Op->getOperand(Num: 2)));
2080 }
2081 case Intrinsic::mips_flog2_w:
2082 case Intrinsic::mips_flog2_d:
2083 return DAG.getNode(Opcode: ISD::FLOG2, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2084 case Intrinsic::mips_fmadd_w:
2085 case Intrinsic::mips_fmadd_d:
2086 return DAG.getNode(Opcode: ISD::FMA, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2087 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2088 case Intrinsic::mips_fmul_w:
2089 case Intrinsic::mips_fmul_d:
2090 // TODO: If intrinsics have fast-math-flags, propagate them.
2091 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2092 N2: Op->getOperand(Num: 2));
2093 case Intrinsic::mips_fmsub_w:
2094 case Intrinsic::mips_fmsub_d: {
2095 // TODO: If intrinsics have fast-math-flags, propagate them.
2096 return DAG.getNode(Opcode: MipsISD::FMS, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2097 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2098 }
2099 case Intrinsic::mips_frint_w:
2100 case Intrinsic::mips_frint_d:
2101 return DAG.getNode(Opcode: ISD::FRINT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2102 case Intrinsic::mips_fsqrt_w:
2103 case Intrinsic::mips_fsqrt_d:
2104 return DAG.getNode(Opcode: ISD::FSQRT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2105 case Intrinsic::mips_fsub_w:
2106 case Intrinsic::mips_fsub_d:
2107 // TODO: If intrinsics have fast-math-flags, propagate them.
2108 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2109 N2: Op->getOperand(Num: 2));
2110 case Intrinsic::mips_ftrunc_u_w:
2111 case Intrinsic::mips_ftrunc_u_d:
2112 return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL, VT: Op->getValueType(ResNo: 0),
2113 Operand: Op->getOperand(Num: 1));
2114 case Intrinsic::mips_ftrunc_s_w:
2115 case Intrinsic::mips_ftrunc_s_d:
2116 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL, VT: Op->getValueType(ResNo: 0),
2117 Operand: Op->getOperand(Num: 1));
2118 case Intrinsic::mips_ilvev_b:
2119 case Intrinsic::mips_ilvev_h:
2120 case Intrinsic::mips_ilvev_w:
2121 case Intrinsic::mips_ilvev_d:
2122 return DAG.getNode(Opcode: MipsISD::ILVEV, DL, VT: Op->getValueType(ResNo: 0),
2123 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2124 case Intrinsic::mips_ilvl_b:
2125 case Intrinsic::mips_ilvl_h:
2126 case Intrinsic::mips_ilvl_w:
2127 case Intrinsic::mips_ilvl_d:
2128 return DAG.getNode(Opcode: MipsISD::ILVL, DL, VT: Op->getValueType(ResNo: 0),
2129 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2130 case Intrinsic::mips_ilvod_b:
2131 case Intrinsic::mips_ilvod_h:
2132 case Intrinsic::mips_ilvod_w:
2133 case Intrinsic::mips_ilvod_d:
2134 return DAG.getNode(Opcode: MipsISD::ILVOD, DL, VT: Op->getValueType(ResNo: 0),
2135 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2136 case Intrinsic::mips_ilvr_b:
2137 case Intrinsic::mips_ilvr_h:
2138 case Intrinsic::mips_ilvr_w:
2139 case Intrinsic::mips_ilvr_d:
2140 return DAG.getNode(Opcode: MipsISD::ILVR, DL, VT: Op->getValueType(ResNo: 0),
2141 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2142 case Intrinsic::mips_insert_b:
2143 case Intrinsic::mips_insert_h:
2144 case Intrinsic::mips_insert_w:
2145 case Intrinsic::mips_insert_d:
2146 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2147 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), N3: Op->getOperand(Num: 2));
2148 case Intrinsic::mips_insve_b:
2149 case Intrinsic::mips_insve_h:
2150 case Intrinsic::mips_insve_w:
2151 case Intrinsic::mips_insve_d: {
2152 // Report an error for out of range values.
2153 int64_t Max;
2154 switch (Intrinsic) {
2155 case Intrinsic::mips_insve_b: Max = 15; break;
2156 case Intrinsic::mips_insve_h: Max = 7; break;
2157 case Intrinsic::mips_insve_w: Max = 3; break;
2158 case Intrinsic::mips_insve_d: Max = 1; break;
2159 default: llvm_unreachable("Unmatched intrinsic");
2160 }
2161 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2162 if (Value < 0 || Value > Max)
2163 report_fatal_error(reason: "Immediate out of range");
2164 return DAG.getNode(Opcode: MipsISD::INSVE, DL, VT: Op->getValueType(ResNo: 0),
2165 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3),
2166 N4: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
2167 }
2168 case Intrinsic::mips_ldi_b:
2169 case Intrinsic::mips_ldi_h:
2170 case Intrinsic::mips_ldi_w:
2171 case Intrinsic::mips_ldi_d:
2172 return lowerMSASplatImm(Op, ImmOp: 1, DAG, IsSigned: true);
2173 case Intrinsic::mips_lsa:
2174 case Intrinsic::mips_dlsa: {
2175 EVT ResTy = Op->getValueType(ResNo: 0);
2176 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2177 N2: DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Op), VT: ResTy,
2178 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2179 }
2180 case Intrinsic::mips_maddv_b:
2181 case Intrinsic::mips_maddv_h:
2182 case Intrinsic::mips_maddv_w:
2183 case Intrinsic::mips_maddv_d: {
2184 EVT ResTy = Op->getValueType(ResNo: 0);
2185 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2186 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy,
2187 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2188 }
2189 case Intrinsic::mips_max_s_b:
2190 case Intrinsic::mips_max_s_h:
2191 case Intrinsic::mips_max_s_w:
2192 case Intrinsic::mips_max_s_d:
2193 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0),
2194 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2195 case Intrinsic::mips_max_u_b:
2196 case Intrinsic::mips_max_u_h:
2197 case Intrinsic::mips_max_u_w:
2198 case Intrinsic::mips_max_u_d:
2199 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0),
2200 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2201 case Intrinsic::mips_maxi_s_b:
2202 case Intrinsic::mips_maxi_s_h:
2203 case Intrinsic::mips_maxi_s_w:
2204 case Intrinsic::mips_maxi_s_d:
2205 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0),
2206 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true));
2207 case Intrinsic::mips_maxi_u_b:
2208 case Intrinsic::mips_maxi_u_h:
2209 case Intrinsic::mips_maxi_u_w:
2210 case Intrinsic::mips_maxi_u_d:
2211 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0),
2212 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2213 case Intrinsic::mips_min_s_b:
2214 case Intrinsic::mips_min_s_h:
2215 case Intrinsic::mips_min_s_w:
2216 case Intrinsic::mips_min_s_d:
2217 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0),
2218 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2219 case Intrinsic::mips_min_u_b:
2220 case Intrinsic::mips_min_u_h:
2221 case Intrinsic::mips_min_u_w:
2222 case Intrinsic::mips_min_u_d:
2223 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0),
2224 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2225 case Intrinsic::mips_mini_s_b:
2226 case Intrinsic::mips_mini_s_h:
2227 case Intrinsic::mips_mini_s_w:
2228 case Intrinsic::mips_mini_s_d:
2229 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0),
2230 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true));
2231 case Intrinsic::mips_mini_u_b:
2232 case Intrinsic::mips_mini_u_h:
2233 case Intrinsic::mips_mini_u_w:
2234 case Intrinsic::mips_mini_u_d:
2235 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0),
2236 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2237 case Intrinsic::mips_mod_s_b:
2238 case Intrinsic::mips_mod_s_h:
2239 case Intrinsic::mips_mod_s_w:
2240 case Intrinsic::mips_mod_s_d:
2241 return DAG.getNode(Opcode: ISD::SREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2242 N2: Op->getOperand(Num: 2));
2243 case Intrinsic::mips_mod_u_b:
2244 case Intrinsic::mips_mod_u_h:
2245 case Intrinsic::mips_mod_u_w:
2246 case Intrinsic::mips_mod_u_d:
2247 return DAG.getNode(Opcode: ISD::UREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2248 N2: Op->getOperand(Num: 2));
2249 case Intrinsic::mips_mulv_b:
2250 case Intrinsic::mips_mulv_h:
2251 case Intrinsic::mips_mulv_w:
2252 case Intrinsic::mips_mulv_d:
2253 return DAG.getNode(Opcode: ISD::MUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2254 N2: Op->getOperand(Num: 2));
2255 case Intrinsic::mips_msubv_b:
2256 case Intrinsic::mips_msubv_h:
2257 case Intrinsic::mips_msubv_w:
2258 case Intrinsic::mips_msubv_d: {
2259 EVT ResTy = Op->getValueType(ResNo: 0);
2260 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2261 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy,
2262 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2263 }
2264 case Intrinsic::mips_nlzc_b:
2265 case Intrinsic::mips_nlzc_h:
2266 case Intrinsic::mips_nlzc_w:
2267 case Intrinsic::mips_nlzc_d:
2268 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2269 case Intrinsic::mips_nor_v: {
2270 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2271 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2272 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2273 }
2274 case Intrinsic::mips_nori_b: {
2275 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2276 N1: Op->getOperand(Num: 1),
2277 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2278 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2279 }
2280 case Intrinsic::mips_or_v:
2281 return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2282 N2: Op->getOperand(Num: 2));
2283 case Intrinsic::mips_ori_b:
2284 return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2285 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2286 case Intrinsic::mips_pckev_b:
2287 case Intrinsic::mips_pckev_h:
2288 case Intrinsic::mips_pckev_w:
2289 case Intrinsic::mips_pckev_d:
2290 return DAG.getNode(Opcode: MipsISD::PCKEV, DL, VT: Op->getValueType(ResNo: 0),
2291 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2292 case Intrinsic::mips_pckod_b:
2293 case Intrinsic::mips_pckod_h:
2294 case Intrinsic::mips_pckod_w:
2295 case Intrinsic::mips_pckod_d:
2296 return DAG.getNode(Opcode: MipsISD::PCKOD, DL, VT: Op->getValueType(ResNo: 0),
2297 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2298 case Intrinsic::mips_pcnt_b:
2299 case Intrinsic::mips_pcnt_h:
2300 case Intrinsic::mips_pcnt_w:
2301 case Intrinsic::mips_pcnt_d:
2302 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2303 case Intrinsic::mips_sat_s_b:
2304 case Intrinsic::mips_sat_s_h:
2305 case Intrinsic::mips_sat_s_w:
2306 case Intrinsic::mips_sat_s_d:
2307 case Intrinsic::mips_sat_u_b:
2308 case Intrinsic::mips_sat_u_h:
2309 case Intrinsic::mips_sat_u_w:
2310 case Intrinsic::mips_sat_u_d: {
2311 // Report an error for out of range values.
2312 int64_t Max;
2313 switch (Intrinsic) {
2314 case Intrinsic::mips_sat_s_b:
2315 case Intrinsic::mips_sat_u_b: Max = 7; break;
2316 case Intrinsic::mips_sat_s_h:
2317 case Intrinsic::mips_sat_u_h: Max = 15; break;
2318 case Intrinsic::mips_sat_s_w:
2319 case Intrinsic::mips_sat_u_w: Max = 31; break;
2320 case Intrinsic::mips_sat_s_d:
2321 case Intrinsic::mips_sat_u_d: Max = 63; break;
2322 default: llvm_unreachable("Unmatched intrinsic");
2323 }
2324 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2325 if (Value < 0 || Value > Max)
2326 report_fatal_error(reason: "Immediate out of range");
2327 return SDValue();
2328 }
2329 case Intrinsic::mips_shf_b:
2330 case Intrinsic::mips_shf_h:
2331 case Intrinsic::mips_shf_w: {
2332 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2333 if (Value < 0 || Value > 255)
2334 report_fatal_error(reason: "Immediate out of range");
2335 return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: Op->getValueType(ResNo: 0),
2336 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 1));
2337 }
2338 case Intrinsic::mips_sldi_b:
2339 case Intrinsic::mips_sldi_h:
2340 case Intrinsic::mips_sldi_w:
2341 case Intrinsic::mips_sldi_d: {
2342 // Report an error for out of range values.
2343 int64_t Max;
2344 switch (Intrinsic) {
2345 case Intrinsic::mips_sldi_b: Max = 15; break;
2346 case Intrinsic::mips_sldi_h: Max = 7; break;
2347 case Intrinsic::mips_sldi_w: Max = 3; break;
2348 case Intrinsic::mips_sldi_d: Max = 1; break;
2349 default: llvm_unreachable("Unmatched intrinsic");
2350 }
2351 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 3))->getSExtValue();
2352 if (Value < 0 || Value > Max)
2353 report_fatal_error(reason: "Immediate out of range");
2354 return SDValue();
2355 }
2356 case Intrinsic::mips_sll_b:
2357 case Intrinsic::mips_sll_h:
2358 case Intrinsic::mips_sll_w:
2359 case Intrinsic::mips_sll_d:
2360 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2361 N2: truncateVecElts(Op, DAG));
2362 case Intrinsic::mips_slli_b:
2363 case Intrinsic::mips_slli_h:
2364 case Intrinsic::mips_slli_w:
2365 case Intrinsic::mips_slli_d:
2366 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0),
2367 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2368 case Intrinsic::mips_splat_b:
2369 case Intrinsic::mips_splat_h:
2370 case Intrinsic::mips_splat_w:
2371 case Intrinsic::mips_splat_d:
2372 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2373 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2374 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2375 // Instead we lower to MipsISD::VSHF and match from there.
2376 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2377 N1: lowerMSASplatZExt(Op, OpNr: 2, DAG), N2: Op->getOperand(Num: 1),
2378 N3: Op->getOperand(Num: 1));
2379 case Intrinsic::mips_splati_b:
2380 case Intrinsic::mips_splati_h:
2381 case Intrinsic::mips_splati_w:
2382 case Intrinsic::mips_splati_d:
2383 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2384 N1: lowerMSASplatImm(Op, ImmOp: 2, DAG), N2: Op->getOperand(Num: 1),
2385 N3: Op->getOperand(Num: 1));
2386 case Intrinsic::mips_sra_b:
2387 case Intrinsic::mips_sra_h:
2388 case Intrinsic::mips_sra_w:
2389 case Intrinsic::mips_sra_d:
2390 return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2391 N2: truncateVecElts(Op, DAG));
2392 case Intrinsic::mips_srai_b:
2393 case Intrinsic::mips_srai_h:
2394 case Intrinsic::mips_srai_w:
2395 case Intrinsic::mips_srai_d:
2396 return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0),
2397 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2398 case Intrinsic::mips_srari_b:
2399 case Intrinsic::mips_srari_h:
2400 case Intrinsic::mips_srari_w:
2401 case Intrinsic::mips_srari_d: {
2402 // Report an error for out of range values.
2403 int64_t Max;
2404 switch (Intrinsic) {
2405 case Intrinsic::mips_srari_b: Max = 7; break;
2406 case Intrinsic::mips_srari_h: Max = 15; break;
2407 case Intrinsic::mips_srari_w: Max = 31; break;
2408 case Intrinsic::mips_srari_d: Max = 63; break;
2409 default: llvm_unreachable("Unmatched intrinsic");
2410 }
2411 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2412 if (Value < 0 || Value > Max)
2413 report_fatal_error(reason: "Immediate out of range");
2414 return SDValue();
2415 }
2416 case Intrinsic::mips_srl_b:
2417 case Intrinsic::mips_srl_h:
2418 case Intrinsic::mips_srl_w:
2419 case Intrinsic::mips_srl_d:
2420 return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2421 N2: truncateVecElts(Op, DAG));
2422 case Intrinsic::mips_srli_b:
2423 case Intrinsic::mips_srli_h:
2424 case Intrinsic::mips_srli_w:
2425 case Intrinsic::mips_srli_d:
2426 return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0),
2427 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2428 case Intrinsic::mips_srlri_b:
2429 case Intrinsic::mips_srlri_h:
2430 case Intrinsic::mips_srlri_w:
2431 case Intrinsic::mips_srlri_d: {
2432 // Report an error for out of range values.
2433 int64_t Max;
2434 switch (Intrinsic) {
2435 case Intrinsic::mips_srlri_b: Max = 7; break;
2436 case Intrinsic::mips_srlri_h: Max = 15; break;
2437 case Intrinsic::mips_srlri_w: Max = 31; break;
2438 case Intrinsic::mips_srlri_d: Max = 63; break;
2439 default: llvm_unreachable("Unmatched intrinsic");
2440 }
2441 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2442 if (Value < 0 || Value > Max)
2443 report_fatal_error(reason: "Immediate out of range");
2444 return SDValue();
2445 }
2446 case Intrinsic::mips_subv_b:
2447 case Intrinsic::mips_subv_h:
2448 case Intrinsic::mips_subv_w:
2449 case Intrinsic::mips_subv_d:
2450 return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2451 N2: Op->getOperand(Num: 2));
2452 case Intrinsic::mips_subvi_b:
2453 case Intrinsic::mips_subvi_h:
2454 case Intrinsic::mips_subvi_w:
2455 case Intrinsic::mips_subvi_d:
2456 return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0),
2457 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2458 case Intrinsic::mips_vshf_b:
2459 case Intrinsic::mips_vshf_h:
2460 case Intrinsic::mips_vshf_w:
2461 case Intrinsic::mips_vshf_d:
2462 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2463 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2464 case Intrinsic::mips_xor_v:
2465 return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2466 N2: Op->getOperand(Num: 2));
2467 case Intrinsic::mips_xori_b:
2468 return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0),
2469 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2470 case Intrinsic::thread_pointer: {
2471 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2472 return DAG.getNode(Opcode: MipsISD::ThreadPointer, DL, VT: PtrVT);
2473 }
2474 }
2475}
2476
2477static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2478 const MipsSubtarget &Subtarget) {
2479 SDLoc DL(Op);
2480 SDValue ChainIn = Op->getOperand(Num: 0);
2481 SDValue Address = Op->getOperand(Num: 2);
2482 SDValue Offset = Op->getOperand(Num: 3);
2483 EVT ResTy = Op->getValueType(ResNo: 0);
2484 EVT PtrTy = Address->getValueType(ResNo: 0);
2485
2486 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2487 // however takes an i32 signed constant offset. The actual type of the
2488 // intrinsic is a scaled signed i10.
2489 if (Subtarget.isABI_N64())
2490 Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset);
2491
2492 Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset);
2493 return DAG.getLoad(VT: ResTy, dl: DL, Chain: ChainIn, Ptr: Address, PtrInfo: MachinePointerInfo(),
2494 Alignment: Align(16));
2495}
2496
2497SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2498 SelectionDAG &DAG) const {
2499 unsigned Intr = Op->getConstantOperandVal(Num: 1);
2500 switch (Intr) {
2501 default:
2502 return SDValue();
2503 case Intrinsic::mips_extp:
2504 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTP);
2505 case Intrinsic::mips_extpdp:
2506 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTPDP);
2507 case Intrinsic::mips_extr_w:
2508 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_W);
2509 case Intrinsic::mips_extr_r_w:
2510 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_R_W);
2511 case Intrinsic::mips_extr_rs_w:
2512 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_RS_W);
2513 case Intrinsic::mips_extr_s_h:
2514 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_S_H);
2515 case Intrinsic::mips_mthlip:
2516 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MTHLIP);
2517 case Intrinsic::mips_mulsaq_s_w_ph:
2518 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSAQ_S_W_PH);
2519 case Intrinsic::mips_maq_s_w_phl:
2520 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHL);
2521 case Intrinsic::mips_maq_s_w_phr:
2522 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHR);
2523 case Intrinsic::mips_maq_sa_w_phl:
2524 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHL);
2525 case Intrinsic::mips_maq_sa_w_phr:
2526 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHR);
2527 case Intrinsic::mips_dpaq_s_w_ph:
2528 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_S_W_PH);
2529 case Intrinsic::mips_dpsq_s_w_ph:
2530 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_S_W_PH);
2531 case Intrinsic::mips_dpaq_sa_l_w:
2532 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_SA_L_W);
2533 case Intrinsic::mips_dpsq_sa_l_w:
2534 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_SA_L_W);
2535 case Intrinsic::mips_dpaqx_s_w_ph:
2536 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_S_W_PH);
2537 case Intrinsic::mips_dpaqx_sa_w_ph:
2538 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_SA_W_PH);
2539 case Intrinsic::mips_dpsqx_s_w_ph:
2540 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_S_W_PH);
2541 case Intrinsic::mips_dpsqx_sa_w_ph:
2542 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_SA_W_PH);
2543 case Intrinsic::mips_ld_b:
2544 case Intrinsic::mips_ld_h:
2545 case Intrinsic::mips_ld_w:
2546 case Intrinsic::mips_ld_d:
2547 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2548 }
2549}
2550
2551static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2552 const MipsSubtarget &Subtarget) {
2553 SDLoc DL(Op);
2554 SDValue ChainIn = Op->getOperand(Num: 0);
2555 SDValue Value = Op->getOperand(Num: 2);
2556 SDValue Address = Op->getOperand(Num: 3);
2557 SDValue Offset = Op->getOperand(Num: 4);
2558 EVT PtrTy = Address->getValueType(ResNo: 0);
2559
2560 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2561 // however takes an i32 signed constant offset. The actual type of the
2562 // intrinsic is a scaled signed i10.
2563 if (Subtarget.isABI_N64())
2564 Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset);
2565
2566 Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset);
2567
2568 return DAG.getStore(Chain: ChainIn, dl: DL, Val: Value, Ptr: Address, PtrInfo: MachinePointerInfo(),
2569 Alignment: Align(16));
2570}
2571
2572SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2573 SelectionDAG &DAG) const {
2574 unsigned Intr = Op->getConstantOperandVal(Num: 1);
2575 switch (Intr) {
2576 default:
2577 return SDValue();
2578 case Intrinsic::mips_st_b:
2579 case Intrinsic::mips_st_h:
2580 case Intrinsic::mips_st_w:
2581 case Intrinsic::mips_st_d:
2582 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2583 }
2584}
2585
2586// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2587//
2588// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2589// choose to sign-extend but we could have equally chosen zero-extend. The
2590// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2591// result into this node later (possibly changing it to a zero-extend in the
2592// process).
2593SDValue MipsSETargetLowering::
2594lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2595 SDLoc DL(Op);
2596 EVT ResTy = Op->getValueType(ResNo: 0);
2597 SDValue Op0 = Op->getOperand(Num: 0);
2598 EVT VecTy = Op0->getValueType(ResNo: 0);
2599
2600 if (!VecTy.is128BitVector())
2601 return SDValue();
2602
2603 if (ResTy.isInteger()) {
2604 SDValue Op1 = Op->getOperand(Num: 1);
2605 EVT EltTy = VecTy.getVectorElementType();
2606 return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL, VT: ResTy, N1: Op0, N2: Op1,
2607 N3: DAG.getValueType(EltTy));
2608 }
2609
2610 return Op;
2611}
2612
2613static bool isConstantOrUndef(const SDValue Op) {
2614 if (Op->isUndef())
2615 return true;
2616 if (isa<ConstantSDNode>(Val: Op))
2617 return true;
2618 if (isa<ConstantFPSDNode>(Val: Op))
2619 return true;
2620 return false;
2621}
2622
2623static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2624 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2625 if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
2626 return true;
2627 return false;
2628}
2629
2630// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2631// backend.
2632//
2633// Lowers according to the following rules:
2634// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2635// 2 less than or equal to 64 and the value fits into a signed 10-bit
2636// immediate
2637// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2638// is a power of 2 less than or equal to 64 and the value does not fit into a
2639// signed 10-bit immediate
2640// - Non-constant splats are legal as-is.
2641// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2642// - All others are illegal and must be expanded.
2643SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2644 SelectionDAG &DAG) const {
2645 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
2646 EVT ResTy = Op->getValueType(ResNo: 0);
2647 SDLoc DL(Op);
2648 APInt SplatValue, SplatUndef;
2649 unsigned SplatBitSize;
2650 bool HasAnyUndefs;
2651
2652 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2653 return SDValue();
2654
2655 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2656 HasAnyUndefs, MinSplatBits: 8,
2657 isBigEndian: !Subtarget.isLittle()) && SplatBitSize <= 64) {
2658 // We can only cope with 8, 16, 32, or 64-bit elements
2659 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2660 SplatBitSize != 64)
2661 return SDValue();
2662
2663 // If the value isn't an integer type we will have to bitcast
2664 // from an integer type first. Also, if there are any undefs, we must
2665 // lower them to defined values first.
2666 if (ResTy.isInteger() && !HasAnyUndefs)
2667 return Op;
2668
2669 EVT ViaVecTy;
2670
2671 switch (SplatBitSize) {
2672 default:
2673 return SDValue();
2674 case 8:
2675 ViaVecTy = MVT::v16i8;
2676 break;
2677 case 16:
2678 ViaVecTy = MVT::v8i16;
2679 break;
2680 case 32:
2681 ViaVecTy = MVT::v4i32;
2682 break;
2683 case 64:
2684 // There's no fill.d to fall back on for 64-bit values
2685 return SDValue();
2686 }
2687
2688 // SelectionDAG::getConstant will promote SplatValue appropriately.
2689 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
2690
2691 // Bitcast to the type we originally wanted
2692 if (ViaVecTy != ResTy)
2693 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
2694
2695 return Result;
2696 } else if (DAG.isSplatValue(V: Op, /* AllowUndefs */ false))
2697 return Op;
2698 else if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
2699 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2700 // The resulting code is the same length as the expansion, but it doesn't
2701 // use memory operations
2702 EVT ResTy = Node->getValueType(ResNo: 0);
2703
2704 assert(ResTy.isVector());
2705
2706 unsigned NumElts = ResTy.getVectorNumElements();
2707 SDValue Vector = DAG.getUNDEF(VT: ResTy);
2708 for (unsigned i = 0; i < NumElts; ++i) {
2709 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
2710 N2: Node->getOperand(Num: i),
2711 N3: DAG.getConstant(Val: i, DL, VT: MVT::i32));
2712 }
2713 return Vector;
2714 }
2715
2716 return SDValue();
2717}
2718
2719// Lower VECTOR_SHUFFLE into SHF (if possible).
2720//
2721// SHF splits the vector into blocks of four elements, then shuffles these
2722// elements according to a <4 x i2> constant (encoded as an integer immediate).
2723//
2724// It is therefore possible to lower into SHF when the mask takes the form:
2725// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2726// When undef's appear they are treated as if they were whatever value is
2727// necessary in order to fit the above forms.
2728//
2729// For example:
2730// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2731// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2732// i32 7, i32 6, i32 5, i32 4>
2733// is lowered to:
2734// (SHF_H $w0, $w1, 27)
2735// where the 27 comes from:
2736// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2737static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
2738 SmallVector<int, 16> Indices,
2739 SelectionDAG &DAG) {
2740 int SHFIndices[4] = { -1, -1, -1, -1 };
2741
2742 if (Indices.size() < 4)
2743 return SDValue();
2744
2745 for (unsigned i = 0; i < 4; ++i) {
2746 for (unsigned j = i; j < Indices.size(); j += 4) {
2747 int Idx = Indices[j];
2748
2749 // Convert from vector index to 4-element subvector index
2750 // If an index refers to an element outside of the subvector then give up
2751 if (Idx != -1) {
2752 Idx -= 4 * (j / 4);
2753 if (Idx < 0 || Idx >= 4)
2754 return SDValue();
2755 }
2756
2757 // If the mask has an undef, replace it with the current index.
2758 // Note that it might still be undef if the current index is also undef
2759 if (SHFIndices[i] == -1)
2760 SHFIndices[i] = Idx;
2761
2762 // Check that non-undef values are the same as in the mask. If they
2763 // aren't then give up
2764 if (!(Idx == -1 || Idx == SHFIndices[i]))
2765 return SDValue();
2766 }
2767 }
2768
2769 // Calculate the immediate. Replace any remaining undefs with zero
2770 APInt Imm(32, 0);
2771 for (int i = 3; i >= 0; --i) {
2772 int Idx = SHFIndices[i];
2773
2774 if (Idx == -1)
2775 Idx = 0;
2776
2777 Imm <<= 2;
2778 Imm |= Idx & 0x3;
2779 }
2780
2781 SDLoc DL(Op);
2782 return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: ResTy,
2783 N1: DAG.getTargetConstant(Val: Imm, DL, VT: MVT::i32),
2784 N2: Op->getOperand(Num: 0));
2785}
2786
2787/// Determine whether a range fits a regular pattern of values.
2788/// This function accounts for the possibility of jumping over the End iterator.
2789template <typename ValType>
2790static bool
2791fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
2792 unsigned CheckStride,
2793 typename SmallVectorImpl<ValType>::const_iterator End,
2794 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2795 auto &I = Begin;
2796
2797 while (I != End) {
2798 if (*I != -1 && *I != ExpectedIndex)
2799 return false;
2800 ExpectedIndex += ExpectedIndexStride;
2801
2802 // Incrementing past End is undefined behaviour so we must increment one
2803 // step at a time and check for End at each step.
2804 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2805 ; // Empty loop body.
2806 }
2807 return true;
2808}
2809
2810// Determine whether VECTOR_SHUFFLE is a SPLATI.
2811//
2812// It is a SPLATI when the mask is:
2813// <x, x, x, ...>
2814// where x is any valid index.
2815//
2816// When undef's appear in the mask they are treated as if they were whatever
2817// value is necessary in order to fit the above form.
2818static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
2819 SmallVector<int, 16> Indices,
2820 SelectionDAG &DAG) {
2821 assert((Indices.size() % 2) == 0);
2822
2823 int SplatIndex = -1;
2824 for (const auto &V : Indices) {
2825 if (V != -1) {
2826 SplatIndex = V;
2827 break;
2828 }
2829 }
2830
2831 return fitsRegularPattern<int>(Begin: Indices.begin(), CheckStride: 1, End: Indices.end(), ExpectedIndex: SplatIndex,
2832 ExpectedIndexStride: 0);
2833}
2834
2835// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2836//
2837// ILVEV interleaves the even elements from each vector.
2838//
2839// It is possible to lower into ILVEV when the mask consists of two of the
2840// following forms interleaved:
2841// <0, 2, 4, ...>
2842// <n, n+2, n+4, ...>
2843// where n is the number of elements in the vector.
2844// For example:
2845// <0, 0, 2, 2, 4, 4, ...>
2846// <0, n, 2, n+2, 4, n+4, ...>
2847//
2848// When undef's appear in the mask they are treated as if they were whatever
2849// value is necessary in order to fit the above forms.
2850static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
2851 SmallVector<int, 16> Indices,
2852 SelectionDAG &DAG) {
2853 assert((Indices.size() % 2) == 0);
2854
2855 SDValue Wt;
2856 SDValue Ws;
2857 const auto &Begin = Indices.begin();
2858 const auto &End = Indices.end();
2859
2860 // Check even elements are taken from the even elements of one half or the
2861 // other and pick an operand accordingly.
2862 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2863 Wt = Op->getOperand(Num: 0);
2864 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2865 Wt = Op->getOperand(Num: 1);
2866 else
2867 return SDValue();
2868
2869 // Check odd elements are taken from the even elements of one half or the
2870 // other and pick an operand accordingly.
2871 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2872 Ws = Op->getOperand(Num: 0);
2873 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2874 Ws = Op->getOperand(Num: 1);
2875 else
2876 return SDValue();
2877
2878 return DAG.getNode(Opcode: MipsISD::ILVEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2879}
2880
2881// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2882//
2883// ILVOD interleaves the odd elements from each vector.
2884//
2885// It is possible to lower into ILVOD when the mask consists of two of the
2886// following forms interleaved:
2887// <1, 3, 5, ...>
2888// <n+1, n+3, n+5, ...>
2889// where n is the number of elements in the vector.
2890// For example:
2891// <1, 1, 3, 3, 5, 5, ...>
2892// <1, n+1, 3, n+3, 5, n+5, ...>
2893//
2894// When undef's appear in the mask they are treated as if they were whatever
2895// value is necessary in order to fit the above forms.
2896static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
2897 SmallVector<int, 16> Indices,
2898 SelectionDAG &DAG) {
2899 assert((Indices.size() % 2) == 0);
2900
2901 SDValue Wt;
2902 SDValue Ws;
2903 const auto &Begin = Indices.begin();
2904 const auto &End = Indices.end();
2905
2906 // Check even elements are taken from the odd elements of one half or the
2907 // other and pick an operand accordingly.
2908 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2909 Wt = Op->getOperand(Num: 0);
2910 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2911 Wt = Op->getOperand(Num: 1);
2912 else
2913 return SDValue();
2914
2915 // Check odd elements are taken from the odd elements of one half or the
2916 // other and pick an operand accordingly.
2917 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2918 Ws = Op->getOperand(Num: 0);
2919 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2920 Ws = Op->getOperand(Num: 1);
2921 else
2922 return SDValue();
2923
2924 return DAG.getNode(Opcode: MipsISD::ILVOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2925}
2926
2927// Lower VECTOR_SHUFFLE into ILVR (if possible).
2928//
2929// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2930// each vector.
2931//
2932// It is possible to lower into ILVR when the mask consists of two of the
2933// following forms interleaved:
2934// <0, 1, 2, ...>
2935// <n, n+1, n+2, ...>
2936// where n is the number of elements in the vector.
2937// For example:
2938// <0, 0, 1, 1, 2, 2, ...>
2939// <0, n, 1, n+1, 2, n+2, ...>
2940//
2941// When undef's appear in the mask they are treated as if they were whatever
2942// value is necessary in order to fit the above forms.
2943static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
2944 SmallVector<int, 16> Indices,
2945 SelectionDAG &DAG) {
2946 assert((Indices.size() % 2) == 0);
2947
2948 SDValue Wt;
2949 SDValue Ws;
2950 const auto &Begin = Indices.begin();
2951 const auto &End = Indices.end();
2952
2953 // Check even elements are taken from the right (lowest-indexed) elements of
2954 // one half or the other and pick an operand accordingly.
2955 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2956 Wt = Op->getOperand(Num: 0);
2957 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1))
2958 Wt = Op->getOperand(Num: 1);
2959 else
2960 return SDValue();
2961
2962 // Check odd elements are taken from the right (lowest-indexed) elements of
2963 // one half or the other and pick an operand accordingly.
2964 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2965 Ws = Op->getOperand(Num: 0);
2966 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1))
2967 Ws = Op->getOperand(Num: 1);
2968 else
2969 return SDValue();
2970
2971 return DAG.getNode(Opcode: MipsISD::ILVR, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2972}
2973
2974// Lower VECTOR_SHUFFLE into ILVL (if possible).
2975//
2976// ILVL interleaves consecutive elements from the left (highest-indexed) half
2977// of each vector.
2978//
2979// It is possible to lower into ILVL when the mask consists of two of the
2980// following forms interleaved:
2981// <x, x+1, x+2, ...>
2982// <n+x, n+x+1, n+x+2, ...>
2983// where n is the number of elements in the vector and x is half n.
2984// For example:
2985// <x, x, x+1, x+1, x+2, x+2, ...>
2986// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2987//
2988// When undef's appear in the mask they are treated as if they were whatever
2989// value is necessary in order to fit the above forms.
2990static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
2991 SmallVector<int, 16> Indices,
2992 SelectionDAG &DAG) {
2993 assert((Indices.size() % 2) == 0);
2994
2995 unsigned HalfSize = Indices.size() / 2;
2996 SDValue Wt;
2997 SDValue Ws;
2998 const auto &Begin = Indices.begin();
2999 const auto &End = Indices.end();
3000
3001 // Check even elements are taken from the left (highest-indexed) elements of
3002 // one half or the other and pick an operand accordingly.
3003 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
3004 Wt = Op->getOperand(Num: 0);
3005 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, ExpectedIndexStride: 1))
3006 Wt = Op->getOperand(Num: 1);
3007 else
3008 return SDValue();
3009
3010 // Check odd elements are taken from the left (highest-indexed) elements of
3011 // one half or the other and pick an operand accordingly.
3012 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
3013 Ws = Op->getOperand(Num: 0);
3014 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize,
3015 ExpectedIndexStride: 1))
3016 Ws = Op->getOperand(Num: 1);
3017 else
3018 return SDValue();
3019
3020 return DAG.getNode(Opcode: MipsISD::ILVL, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
3021}
3022
3023// Lower VECTOR_SHUFFLE into PCKEV (if possible).
3024//
3025// PCKEV copies the even elements of each vector into the result vector.
3026//
3027// It is possible to lower into PCKEV when the mask consists of two of the
3028// following forms concatenated:
3029// <0, 2, 4, ...>
3030// <n, n+2, n+4, ...>
3031// where n is the number of elements in the vector.
3032// For example:
3033// <0, 2, 4, ..., 0, 2, 4, ...>
3034// <0, 2, 4, ..., n, n+2, n+4, ...>
3035//
3036// When undef's appear in the mask they are treated as if they were whatever
3037// value is necessary in order to fit the above forms.
3038static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
3039 SmallVector<int, 16> Indices,
3040 SelectionDAG &DAG) {
3041 assert((Indices.size() % 2) == 0);
3042
3043 SDValue Wt;
3044 SDValue Ws;
3045 const auto &Begin = Indices.begin();
3046 const auto &Mid = Indices.begin() + Indices.size() / 2;
3047 const auto &End = Indices.end();
3048
3049 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
3050 Wt = Op->getOperand(Num: 0);
3051 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
3052 Wt = Op->getOperand(Num: 1);
3053 else
3054 return SDValue();
3055
3056 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
3057 Ws = Op->getOperand(Num: 0);
3058 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
3059 Ws = Op->getOperand(Num: 1);
3060 else
3061 return SDValue();
3062
3063 return DAG.getNode(Opcode: MipsISD::PCKEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
3064}
3065
3066// Lower VECTOR_SHUFFLE into PCKOD (if possible).
3067//
3068// PCKOD copies the odd elements of each vector into the result vector.
3069//
3070// It is possible to lower into PCKOD when the mask consists of two of the
3071// following forms concatenated:
3072// <1, 3, 5, ...>
3073// <n+1, n+3, n+5, ...>
3074// where n is the number of elements in the vector.
3075// For example:
3076// <1, 3, 5, ..., 1, 3, 5, ...>
3077// <1, 3, 5, ..., n+1, n+3, n+5, ...>
3078//
3079// When undef's appear in the mask they are treated as if they were whatever
3080// value is necessary in order to fit the above forms.
3081static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
3082 SmallVector<int, 16> Indices,
3083 SelectionDAG &DAG) {
3084 assert((Indices.size() % 2) == 0);
3085
3086 SDValue Wt;
3087 SDValue Ws;
3088 const auto &Begin = Indices.begin();
3089 const auto &Mid = Indices.begin() + Indices.size() / 2;
3090 const auto &End = Indices.end();
3091
3092 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
3093 Wt = Op->getOperand(Num: 0);
3094 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
3095 Wt = Op->getOperand(Num: 1);
3096 else
3097 return SDValue();
3098
3099 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
3100 Ws = Op->getOperand(Num: 0);
3101 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
3102 Ws = Op->getOperand(Num: 1);
3103 else
3104 return SDValue();
3105
3106 return DAG.getNode(Opcode: MipsISD::PCKOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
3107}
3108
3109// Lower VECTOR_SHUFFLE into VSHF.
3110//
3111// This mostly consists of converting the shuffle indices in Indices into a
3112// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
3113// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
3114// if the type is v8i16 and all the indices are less than 8 then the second
3115// operand is unused and can be replaced with anything. We choose to replace it
3116// with the used operand since this reduces the number of instructions overall.
3117//
3118// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
3119// UNDEFs as same as SPLATI index.
3120// For other instances we use the last valid index if UNDEF is
3121// encountered.
3122static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
3123 const SmallVector<int, 16> &Indices,
3124 const bool isSPLATI,
3125 SelectionDAG &DAG) {
3126 SmallVector<SDValue, 16> Ops;
3127 SDValue Op0;
3128 SDValue Op1;
3129 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
3130 EVT MaskEltTy = MaskVecTy.getVectorElementType();
3131 bool Using1stVec = false;
3132 bool Using2ndVec = false;
3133 SDLoc DL(Op);
3134 int ResTyNumElts = ResTy.getVectorNumElements();
3135
3136 for (int i = 0; i < ResTyNumElts; ++i) {
3137 // Idx == -1 means UNDEF/poison
3138 int Idx = Indices[i];
3139
3140 if (0 <= Idx && Idx < ResTyNumElts)
3141 Using1stVec = true;
3142 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
3143 Using2ndVec = true;
3144 }
3145
3146 // Find the first non-undef index. This index is used as a default when there
3147 // is a leading UNDEF/poison.
3148 int SplatIndex = 0;
3149 for (int Idx : Indices)
3150 if (Idx >= 0) {
3151 SplatIndex = Idx;
3152 break;
3153 }
3154
3155 int LastValidIndex = SplatIndex;
3156 for (size_t i = 0; i < Indices.size(); i++) {
3157 int Idx = Indices[i];
3158 if (Idx < 0) {
3159 // Continue using splati index or use the last valid index.
3160 Idx = isSPLATI ? SplatIndex : LastValidIndex;
3161 } else {
3162 LastValidIndex = Idx;
3163 }
3164 Ops.push_back(Elt: DAG.getTargetConstant(Val: Idx, DL, VT: MaskEltTy));
3165 }
3166
3167 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
3168
3169 if (Using1stVec && Using2ndVec) {
3170 Op0 = Op->getOperand(Num: 0);
3171 Op1 = Op->getOperand(Num: 1);
3172 } else if (Using1stVec)
3173 Op0 = Op1 = Op->getOperand(Num: 0);
3174 else if (Using2ndVec)
3175 Op0 = Op1 = Op->getOperand(Num: 1);
3176 else
3177 llvm_unreachable("shuffle vector mask references neither vector operand?");
3178
3179 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
3180 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
3181 // VSHF concatenates the vectors in a bitwise fashion:
3182 // <0b00, 0b01> + <0b10, 0b11> ->
3183 // 0b0100 + 0b1110 -> 0b01001110
3184 // <0b10, 0b11, 0b00, 0b01>
3185 // We must therefore swap the operands to get the correct result.
3186 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: ResTy, N1: MaskVec, N2: Op1, N3: Op0);
3187}
3188
3189// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
3190// indices in the shuffle.
3191SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3192 SelectionDAG &DAG) const {
3193 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Val&: Op);
3194 EVT ResTy = Op->getValueType(ResNo: 0);
3195
3196 if (!ResTy.is128BitVector())
3197 return SDValue();
3198
3199 int ResTyNumElts = ResTy.getVectorNumElements();
3200 SmallVector<int, 16> Indices;
3201
3202 for (int i = 0; i < ResTyNumElts; ++i)
3203 Indices.push_back(Elt: Node->getMaskElt(Idx: i));
3204
3205 // splati.[bhwd] is preferable to the others but is matched from
3206 // MipsISD::VSHF.
3207 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
3208 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: true, DAG);
3209 SDValue Result;
3210 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3211 return Result;
3212 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3213 return Result;
3214 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3215 return Result;
3216 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3217 return Result;
3218 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3219 return Result;
3220 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3221 return Result;
3222 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3223 return Result;
3224 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: false, DAG);
3225}
3226
3227MachineBasicBlock *
3228MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3229 MachineBasicBlock *BB) const {
3230 // $bb:
3231 // bposge32_pseudo $vr0
3232 // =>
3233 // $bb:
3234 // bposge32 $tbb
3235 // $fbb:
3236 // li $vr2, 0
3237 // b $sink
3238 // $tbb:
3239 // li $vr1, 1
3240 // $sink:
3241 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3242
3243 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3244 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3245 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3246 DebugLoc DL = MI.getDebugLoc();
3247 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3248 MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB));
3249 MachineFunction *F = BB->getParent();
3250 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3251 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3252 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB);
3253 F->insert(MBBI: It, MBB: FBB);
3254 F->insert(MBBI: It, MBB: TBB);
3255 F->insert(MBBI: It, MBB: Sink);
3256
3257 // Transfer the remainder of BB and its successor edges to Sink.
3258 Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)),
3259 To: BB->end());
3260 Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3261
3262 // Add successors.
3263 BB->addSuccessor(Succ: FBB);
3264 BB->addSuccessor(Succ: TBB);
3265 FBB->addSuccessor(Succ: Sink);
3266 TBB->addSuccessor(Succ: Sink);
3267
3268 // Insert the real bposge32 instruction to $BB.
3269 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32)).addMBB(MBB: TBB);
3270 // Insert the real bposge32c instruction to $BB.
3271 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32C_MMR3)).addMBB(MBB: TBB);
3272
3273 // Fill $FBB.
3274 Register VR2 = RegInfo.createVirtualRegister(RegClass: RC);
3275 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR2)
3276 .addReg(RegNo: Mips::ZERO).addImm(Val: 0);
3277 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink);
3278
3279 // Fill $TBB.
3280 Register VR1 = RegInfo.createVirtualRegister(RegClass: RC);
3281 BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR1)
3282 .addReg(RegNo: Mips::ZERO).addImm(Val: 1);
3283
3284 // Insert phi function to $Sink.
3285 BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI),
3286 DestReg: MI.getOperand(i: 0).getReg())
3287 .addReg(RegNo: VR2)
3288 .addMBB(MBB: FBB)
3289 .addReg(RegNo: VR1)
3290 .addMBB(MBB: TBB);
3291
3292 MI.eraseFromParent(); // The pseudo instruction is gone now.
3293 return Sink;
3294}
3295
3296MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3297 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3298 // $bb:
3299 // vany_nonzero $rd, $ws
3300 // =>
3301 // $bb:
3302 // bnz.b $ws, $tbb
3303 // b $fbb
3304 // $fbb:
3305 // li $rd1, 0
3306 // b $sink
3307 // $tbb:
3308 // li $rd2, 1
3309 // $sink:
3310 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3311
3312 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3313 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3314 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3315 DebugLoc DL = MI.getDebugLoc();
3316 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3317 MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB));
3318 MachineFunction *F = BB->getParent();
3319 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3320 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3321 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB);
3322 F->insert(MBBI: It, MBB: FBB);
3323 F->insert(MBBI: It, MBB: TBB);
3324 F->insert(MBBI: It, MBB: Sink);
3325
3326 // Transfer the remainder of BB and its successor edges to Sink.
3327 Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)),
3328 To: BB->end());
3329 Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3330
3331 // Add successors.
3332 BB->addSuccessor(Succ: FBB);
3333 BB->addSuccessor(Succ: TBB);
3334 FBB->addSuccessor(Succ: Sink);
3335 TBB->addSuccessor(Succ: Sink);
3336
3337 // Insert the real bnz.b instruction to $BB.
3338 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: BranchOp))
3339 .addReg(RegNo: MI.getOperand(i: 1).getReg())
3340 .addMBB(MBB: TBB);
3341
3342 // Fill $FBB.
3343 Register RD1 = RegInfo.createVirtualRegister(RegClass: RC);
3344 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD1)
3345 .addReg(RegNo: Mips::ZERO).addImm(Val: 0);
3346 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink);
3347
3348 // Fill $TBB.
3349 Register RD2 = RegInfo.createVirtualRegister(RegClass: RC);
3350 BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD2)
3351 .addReg(RegNo: Mips::ZERO).addImm(Val: 1);
3352
3353 // Insert phi function to $Sink.
3354 BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI),
3355 DestReg: MI.getOperand(i: 0).getReg())
3356 .addReg(RegNo: RD1)
3357 .addMBB(MBB: FBB)
3358 .addReg(RegNo: RD2)
3359 .addMBB(MBB: TBB);
3360
3361 MI.eraseFromParent(); // The pseudo instruction is gone now.
3362 return Sink;
3363}
3364
3365// Emit the COPY_FW pseudo instruction.
3366//
3367// copy_fw_pseudo $fd, $ws, n
3368// =>
3369// copy_u_w $rt, $ws, $n
3370// mtc1 $rt, $fd
3371//
3372// When n is zero, the equivalent operation can be performed with (potentially)
3373// zero instructions due to register overlaps. This optimization is never valid
3374// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3375MachineBasicBlock *
3376MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3377 MachineBasicBlock *BB) const {
3378 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3379 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3380 DebugLoc DL = MI.getDebugLoc();
3381 Register Fd = MI.getOperand(i: 0).getReg();
3382 Register Ws = MI.getOperand(i: 1).getReg();
3383 unsigned Lane = MI.getOperand(i: 2).getImm();
3384
3385 if (Lane == 0) {
3386 unsigned Wt = Ws;
3387 if (!Subtarget.useOddSPReg()) {
3388 // We must copy to an even-numbered MSA register so that the
3389 // single-precision sub-register is also guaranteed to be even-numbered.
3390 Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WEvensRegClass);
3391
3392 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Wt).addReg(RegNo: Ws);
3393 }
3394
3395 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_lo);
3396 } else {
3397 Register Wt = RegInfo.createVirtualRegister(
3398 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3399 : &Mips::MSA128WEvensRegClass);
3400
3401 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: Lane);
3402 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_lo);
3403 }
3404
3405 MI.eraseFromParent(); // The pseudo instruction is gone now.
3406 return BB;
3407}
3408
3409// Emit the COPY_FD pseudo instruction.
3410//
3411// copy_fd_pseudo $fd, $ws, n
3412// =>
3413// splati.d $wt, $ws, $n
3414// copy $fd, $wt:sub_64
3415//
3416// When n is zero, the equivalent operation can be performed with (potentially)
3417// zero instructions due to register overlaps. This optimization is always
3418// valid because FR=1 mode which is the only supported mode in MSA.
3419MachineBasicBlock *
3420MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3421 MachineBasicBlock *BB) const {
3422 assert(Subtarget.isFP64bit());
3423
3424 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3425 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3426 Register Fd = MI.getOperand(i: 0).getReg();
3427 Register Ws = MI.getOperand(i: 1).getReg();
3428 unsigned Lane = MI.getOperand(i: 2).getImm() * 2;
3429 DebugLoc DL = MI.getDebugLoc();
3430
3431 if (Lane == 0)
3432 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Ws, Flags: {}, SubReg: Mips::sub_64);
3433 else {
3434 Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3435
3436 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: 1);
3437 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_64);
3438 }
3439
3440 MI.eraseFromParent(); // The pseudo instruction is gone now.
3441 return BB;
3442}
3443
3444// Emit the INSERT_FW pseudo instruction.
3445//
3446// insert_fw_pseudo $wd, $wd_in, $n, $fs
3447// =>
3448// subreg_to_reg $wt:sub_lo, $fs
3449// insve_w $wd[$n], $wd_in, $wt[0]
3450MachineBasicBlock *
3451MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3452 MachineBasicBlock *BB) const {
3453 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3454 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3455 DebugLoc DL = MI.getDebugLoc();
3456 Register Wd = MI.getOperand(i: 0).getReg();
3457 Register Wd_in = MI.getOperand(i: 1).getReg();
3458 unsigned Lane = MI.getOperand(i: 2).getImm();
3459 Register Fs = MI.getOperand(i: 3).getReg();
3460 Register Wt = RegInfo.createVirtualRegister(
3461 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3462 : &Mips::MSA128WEvensRegClass);
3463
3464 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3465 .addReg(RegNo: Fs)
3466 .addImm(Val: Mips::sub_lo);
3467 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_W), DestReg: Wd)
3468 .addReg(RegNo: Wd_in)
3469 .addImm(Val: Lane)
3470 .addReg(RegNo: Wt)
3471 .addImm(Val: 0);
3472
3473 MI.eraseFromParent(); // The pseudo instruction is gone now.
3474 return BB;
3475}
3476
3477// Emit the INSERT_FD pseudo instruction.
3478//
3479// insert_fd_pseudo $wd, $fs, n
3480// =>
3481// subreg_to_reg $wt:sub_64, $fs
3482// insve_d $wd[$n], $wd_in, $wt[0]
3483MachineBasicBlock *
3484MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3485 MachineBasicBlock *BB) const {
3486 assert(Subtarget.isFP64bit());
3487
3488 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3489 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3490 DebugLoc DL = MI.getDebugLoc();
3491 Register Wd = MI.getOperand(i: 0).getReg();
3492 Register Wd_in = MI.getOperand(i: 1).getReg();
3493 unsigned Lane = MI.getOperand(i: 2).getImm();
3494 Register Fs = MI.getOperand(i: 3).getReg();
3495 Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3496
3497 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3498 .addReg(RegNo: Fs)
3499 .addImm(Val: Mips::sub_64);
3500 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_D), DestReg: Wd)
3501 .addReg(RegNo: Wd_in)
3502 .addImm(Val: Lane)
3503 .addReg(RegNo: Wt)
3504 .addImm(Val: 0);
3505
3506 MI.eraseFromParent(); // The pseudo instruction is gone now.
3507 return BB;
3508}
3509
3510// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3511//
3512// For integer:
3513// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3514// =>
3515// (SLL $lanetmp1, $lane, <log2size)
3516// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3517// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3518// (NEG $lanetmp2, $lanetmp1)
3519// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3520//
3521// For floating point:
3522// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3523// =>
3524// (SUBREG_TO_REG $wt, $fs, <subreg>)
3525// (SLL $lanetmp1, $lane, <log2size)
3526// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3527// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3528// (NEG $lanetmp2, $lanetmp1)
3529// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3530MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3531 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3532 bool IsFP) const {
3533 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3534 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3535 DebugLoc DL = MI.getDebugLoc();
3536 Register Wd = MI.getOperand(i: 0).getReg();
3537 Register SrcVecReg = MI.getOperand(i: 1).getReg();
3538 Register LaneReg = MI.getOperand(i: 2).getReg();
3539 Register SrcValReg = MI.getOperand(i: 3).getReg();
3540
3541 const TargetRegisterClass *VecRC = nullptr;
3542 // FIXME: This should be true for N32 too.
3543 const TargetRegisterClass *GPRRC =
3544 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3545 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3546 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3547 unsigned EltLog2Size;
3548 unsigned InsertOp = 0;
3549 unsigned InsveOp = 0;
3550 switch (EltSizeInBytes) {
3551 default:
3552 llvm_unreachable("Unexpected size");
3553 case 1:
3554 EltLog2Size = 0;
3555 InsertOp = Mips::INSERT_B;
3556 InsveOp = Mips::INSVE_B;
3557 VecRC = &Mips::MSA128BRegClass;
3558 break;
3559 case 2:
3560 EltLog2Size = 1;
3561 InsertOp = Mips::INSERT_H;
3562 InsveOp = Mips::INSVE_H;
3563 VecRC = &Mips::MSA128HRegClass;
3564 break;
3565 case 4:
3566 EltLog2Size = 2;
3567 InsertOp = Mips::INSERT_W;
3568 InsveOp = Mips::INSVE_W;
3569 VecRC = &Mips::MSA128WRegClass;
3570 break;
3571 case 8:
3572 EltLog2Size = 3;
3573 InsertOp = Mips::INSERT_D;
3574 InsveOp = Mips::INSVE_D;
3575 VecRC = &Mips::MSA128DRegClass;
3576 break;
3577 }
3578
3579 if (IsFP) {
3580 Register Wt = RegInfo.createVirtualRegister(RegClass: VecRC);
3581 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3582 .addReg(RegNo: SrcValReg)
3583 .addImm(Val: EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3584 SrcValReg = Wt;
3585 }
3586
3587 // Convert the lane index into a byte index
3588 if (EltSizeInBytes != 1) {
3589 Register LaneTmp1 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3590 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: ShiftOp), DestReg: LaneTmp1)
3591 .addReg(RegNo: LaneReg)
3592 .addImm(Val: EltLog2Size);
3593 LaneReg = LaneTmp1;
3594 }
3595
3596 // Rotate bytes around so that the desired lane is element zero
3597 Register WdTmp1 = RegInfo.createVirtualRegister(RegClass: VecRC);
3598 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: WdTmp1)
3599 .addReg(RegNo: SrcVecReg)
3600 .addReg(RegNo: SrcVecReg)
3601 .addReg(RegNo: LaneReg, Flags: {}, SubReg: SubRegIdx);
3602
3603 Register WdTmp2 = RegInfo.createVirtualRegister(RegClass: VecRC);
3604 if (IsFP) {
3605 // Use insve.df to insert to element zero
3606 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsveOp), DestReg: WdTmp2)
3607 .addReg(RegNo: WdTmp1)
3608 .addImm(Val: 0)
3609 .addReg(RegNo: SrcValReg)
3610 .addImm(Val: 0);
3611 } else {
3612 // Use insert.df to insert to element zero
3613 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsertOp), DestReg: WdTmp2)
3614 .addReg(RegNo: WdTmp1)
3615 .addReg(RegNo: SrcValReg)
3616 .addImm(Val: 0);
3617 }
3618
3619 // Rotate elements the rest of the way for a full rotation.
3620 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3621 // the lane index to do this.
3622 Register LaneTmp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3623 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3624 DestReg: LaneTmp2)
3625 .addReg(RegNo: Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3626 .addReg(RegNo: LaneReg);
3627 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: Wd)
3628 .addReg(RegNo: WdTmp2)
3629 .addReg(RegNo: WdTmp2)
3630 .addReg(RegNo: LaneTmp2, Flags: {}, SubReg: SubRegIdx);
3631
3632 MI.eraseFromParent(); // The pseudo instruction is gone now.
3633 return BB;
3634}
3635
3636// Emit the FILL_FW pseudo instruction.
3637//
3638// fill_fw_pseudo $wd, $fs
3639// =>
3640// implicit_def $wt1
3641// insert_subreg $wt2:subreg_lo, $wt1, $fs
3642// splati.w $wd, $wt2[0]
3643MachineBasicBlock *
3644MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3645 MachineBasicBlock *BB) const {
3646 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3647 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3648 DebugLoc DL = MI.getDebugLoc();
3649 Register Wd = MI.getOperand(i: 0).getReg();
3650 Register Fs = MI.getOperand(i: 1).getReg();
3651 Register Wt1 = RegInfo.createVirtualRegister(
3652 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3653 : &Mips::MSA128WEvensRegClass);
3654 Register Wt2 = RegInfo.createVirtualRegister(
3655 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3656 : &Mips::MSA128WEvensRegClass);
3657
3658 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1);
3659 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2)
3660 .addReg(RegNo: Wt1)
3661 .addReg(RegNo: Fs)
3662 .addImm(Val: Mips::sub_lo);
3663 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0);
3664
3665 MI.eraseFromParent(); // The pseudo instruction is gone now.
3666 return BB;
3667}
3668
3669// Emit the FILL_FD pseudo instruction.
3670//
3671// fill_fd_pseudo $wd, $fs
3672// =>
3673// implicit_def $wt1
3674// insert_subreg $wt2:subreg_64, $wt1, $fs
3675// splati.d $wd, $wt2[0]
3676MachineBasicBlock *
3677MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3678 MachineBasicBlock *BB) const {
3679 assert(Subtarget.isFP64bit());
3680
3681 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3682 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3683 DebugLoc DL = MI.getDebugLoc();
3684 Register Wd = MI.getOperand(i: 0).getReg();
3685 Register Fs = MI.getOperand(i: 1).getReg();
3686 Register Wt1 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3687 Register Wt2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3688
3689 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1);
3690 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2)
3691 .addReg(RegNo: Wt1)
3692 .addReg(RegNo: Fs)
3693 .addImm(Val: Mips::sub_64);
3694 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0);
3695
3696 MI.eraseFromParent(); // The pseudo instruction is gone now.
3697 return BB;
3698}
3699
3700// Emit the FEXP2_W_1 pseudo instructions.
3701//
3702// fexp2_w_1_pseudo $wd, $wt
3703// =>
3704// ldi.w $ws, 1
3705// fexp2.w $wd, $ws, $wt
3706MachineBasicBlock *
3707MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3708 MachineBasicBlock *BB) const {
3709 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3710 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3711 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3712 Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC);
3713 Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC);
3714 DebugLoc DL = MI.getDebugLoc();
3715
3716 // Splat 1.0 into a vector
3717 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_W), DestReg: Ws1).addImm(Val: 1);
3718 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_W), DestReg: Ws2).addReg(RegNo: Ws1);
3719
3720 // Emit 1.0 * fexp2(Wt)
3721 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_W), DestReg: MI.getOperand(i: 0).getReg())
3722 .addReg(RegNo: Ws2)
3723 .addReg(RegNo: MI.getOperand(i: 1).getReg());
3724
3725 MI.eraseFromParent(); // The pseudo instruction is gone now.
3726 return BB;
3727}
3728
3729// Emit the FEXP2_D_1 pseudo instructions.
3730//
3731// fexp2_d_1_pseudo $wd, $wt
3732// =>
3733// ldi.d $ws, 1
3734// fexp2.d $wd, $ws, $wt
3735MachineBasicBlock *
3736MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3737 MachineBasicBlock *BB) const {
3738 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3739 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3740 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3741 Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC);
3742 Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC);
3743 DebugLoc DL = MI.getDebugLoc();
3744
3745 // Splat 1.0 into a vector
3746 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_D), DestReg: Ws1).addImm(Val: 1);
3747 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_D), DestReg: Ws2).addReg(RegNo: Ws1);
3748
3749 // Emit 1.0 * fexp2(Wt)
3750 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_D), DestReg: MI.getOperand(i: 0).getReg())
3751 .addReg(RegNo: Ws2)
3752 .addReg(RegNo: MI.getOperand(i: 1).getReg());
3753
3754 MI.eraseFromParent(); // The pseudo instruction is gone now.
3755 return BB;
3756}
3757