1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/ISDOpcodes.h"
22#include "llvm/CodeGen/MachineBasicBlock.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineMemOperand.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/CodeGen/TargetInstrInfo.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetSubtargetInfo.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/CodeGenTypes/MachineValueType.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
38#include "llvm/Support/Casting.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/TargetParser/Triple.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(Val: false),
56 cl::desc("Expand double precision loads and "
57 "stores to their single precision "
58 "counterparts"));
59
60// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
61// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
62// INST.h for v16, INST.w for v32, INST.d for v64.
63TargetLoweringBase::LegalizeTypeAction
64MipsSETargetLowering::getPreferredVectorAction(MVT VT) const {
65 if (this->Subtarget.hasMSA()) {
66 switch (VT.SimpleTy) {
67 // Leave v2i1 vectors to be promoted to larger ones.
68 // Other i1 types will be promoted by default.
69 case MVT::v2i1:
70 return TypePromoteInteger;
71 break;
72 // 16-bit vector types (v2 and longer)
73 case MVT::v2i8:
74 // 32-bit vector types (v2 and longer)
75 case MVT::v2i16:
76 case MVT::v4i8:
77 // 64-bit vector types (v2 and longer)
78 case MVT::v2i32:
79 case MVT::v4i16:
80 case MVT::v8i8:
81 return TypeWidenVector;
82 break;
83 // Only word (.w) and doubleword (.d) are available for floating point
84 // vectors. That means floating point vectors should be either v2f64
85 // or v4f32.
86 // Here we only explicitly widen the f32 types - f16 will be promoted
87 // by default.
88 case MVT::v2f32:
89 case MVT::v3f32:
90 return TypeWidenVector;
91 // v2i64 is already 128-bit wide.
92 default:
93 break;
94 }
95 }
96 return TargetLoweringBase::getPreferredVectorAction(VT);
97}
98
99MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
100 const MipsSubtarget &STI)
101 : MipsTargetLowering(TM, STI) {
102 // Set up the register classes
103 addRegisterClass(VT: MVT::i32, RC: &Mips::GPR32RegClass);
104
105 if (Subtarget.isGP64bit())
106 addRegisterClass(VT: MVT::i64, RC: &Mips::GPR64RegClass);
107
108 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
109 // Expand all truncating stores and extending loads.
110 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) {
111 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) {
112 setTruncStoreAction(ValVT: VT0, MemVT: VT1, Action: Expand);
113 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
114 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
115 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
116 }
117 }
118 }
119
120 if (Subtarget.hasDSP()) {
121 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
122
123 for (const auto &VecTy : VecTys) {
124 addRegisterClass(VT: VecTy, RC: &Mips::DSPRRegClass);
125
126 // Expand all builtin opcodes.
127 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
128 setOperationAction(Op: Opc, VT: VecTy, Action: Expand);
129
130 setOperationAction(Op: ISD::ADD, VT: VecTy, Action: Legal);
131 setOperationAction(Op: ISD::SUB, VT: VecTy, Action: Legal);
132 setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Legal);
133 setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Legal);
134 setOperationAction(Op: ISD::BITCAST, VT: VecTy, Action: Legal);
135 }
136
137 setTargetDAGCombine(
138 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT});
139
140 if (Subtarget.hasMips32r2()) {
141 setOperationAction(Op: ISD::ADDC, VT: MVT::i32, Action: Legal);
142 setOperationAction(Op: ISD::ADDE, VT: MVT::i32, Action: Legal);
143 }
144 }
145
146 if (Subtarget.hasDSPR2())
147 setOperationAction(Op: ISD::MUL, VT: MVT::v2i16, Action: Legal);
148
149 if (Subtarget.hasMSA()) {
150 addMSAIntType(Ty: MVT::v16i8, RC: &Mips::MSA128BRegClass);
151 addMSAIntType(Ty: MVT::v8i16, RC: &Mips::MSA128HRegClass);
152 addMSAIntType(Ty: MVT::v4i32, RC: &Mips::MSA128WRegClass);
153 addMSAIntType(Ty: MVT::v2i64, RC: &Mips::MSA128DRegClass);
154 addMSAFloatType(Ty: MVT::v8f16, RC: &Mips::MSA128HRegClass);
155 addMSAFloatType(Ty: MVT::v4f32, RC: &Mips::MSA128WRegClass);
156 addMSAFloatType(Ty: MVT::v2f64, RC: &Mips::MSA128DRegClass);
157
158 // f16 is a storage-only type, always promote it to f32.
159 addRegisterClass(VT: MVT::f16, RC: &Mips::MSA128HRegClass);
160 setOperationAction(Op: ISD::SETCC, VT: MVT::f16, Action: Promote);
161 setOperationAction(Op: ISD::BR_CC, VT: MVT::f16, Action: Promote);
162 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f16, Action: Promote);
163 setOperationAction(Op: ISD::SELECT, VT: MVT::f16, Action: Promote);
164 setOperationAction(Op: ISD::FADD, VT: MVT::f16, Action: Promote);
165 setOperationAction(Op: ISD::FSUB, VT: MVT::f16, Action: Promote);
166 setOperationAction(Op: ISD::FMUL, VT: MVT::f16, Action: Promote);
167 setOperationAction(Op: ISD::FDIV, VT: MVT::f16, Action: Promote);
168 setOperationAction(Op: ISD::FREM, VT: MVT::f16, Action: Promote);
169 setOperationAction(Op: ISD::FMA, VT: MVT::f16, Action: Promote);
170 setOperationAction(Op: ISD::FNEG, VT: MVT::f16, Action: Promote);
171 setOperationAction(Op: ISD::FABS, VT: MVT::f16, Action: Promote);
172 setOperationAction(Op: ISD::FCEIL, VT: MVT::f16, Action: Promote);
173 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f16, Action: Promote);
174 setOperationAction(Op: ISD::FCOS, VT: MVT::f16, Action: Promote);
175 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f16, Action: Promote);
176 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f16, Action: Promote);
177 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f16, Action: Promote);
178 setOperationAction(Op: ISD::FPOW, VT: MVT::f16, Action: Promote);
179 setOperationAction(Op: ISD::FPOWI, VT: MVT::f16, Action: Promote);
180 setOperationAction(Op: ISD::FRINT, VT: MVT::f16, Action: Promote);
181 setOperationAction(Op: ISD::FSIN, VT: MVT::f16, Action: Promote);
182 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f16, Action: Promote);
183 setOperationAction(Op: ISD::FSQRT, VT: MVT::f16, Action: Promote);
184 setOperationAction(Op: ISD::FEXP, VT: MVT::f16, Action: Promote);
185 setOperationAction(Op: ISD::FEXP2, VT: MVT::f16, Action: Promote);
186 setOperationAction(Op: ISD::FLOG, VT: MVT::f16, Action: Promote);
187 setOperationAction(Op: ISD::FLOG2, VT: MVT::f16, Action: Promote);
188 setOperationAction(Op: ISD::FLOG10, VT: MVT::f16, Action: Promote);
189 setOperationAction(Op: ISD::FROUND, VT: MVT::f16, Action: Promote);
190 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f16, Action: Promote);
191 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f16, Action: Promote);
192 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f16, Action: Promote);
193 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f16, Action: Promote);
194 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f16, Action: Promote);
195
196 setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR});
197 }
198
199 if (!Subtarget.useSoftFloat()) {
200 addRegisterClass(VT: MVT::f32, RC: &Mips::FGR32RegClass);
201
202 // When dealing with single precision only, use libcalls
203 if (!Subtarget.isSingleFloat()) {
204 if (Subtarget.isFP64bit())
205 addRegisterClass(VT: MVT::f64, RC: &Mips::FGR64RegClass);
206 else
207 addRegisterClass(VT: MVT::f64, RC: &Mips::AFGR64RegClass);
208 }
209
210 for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
211 ISD::STRICT_FDIV, ISD::STRICT_FSQRT}) {
212 setOperationAction(Op, VT: MVT::f32, Action: Legal);
213 setOperationAction(Op, VT: MVT::f64, Action: Legal);
214 }
215 }
216
217 // Targets with 64bits integer registers, but no 64bit floating point register
218 // do not support conversion between them
219 if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
220 !Subtarget.useSoftFloat()) {
221 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Expand);
222 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
223 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Expand);
224 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
225 }
226
227 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Custom);
228 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Custom);
229 setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Custom);
230 setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Custom);
231
232 if (Subtarget.hasCnMips())
233 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal);
234 else if (Subtarget.isR5900()) {
235 // R5900 doesn't have DMULT/DMULTU/DDIV/DDIVU - expand to 32-bit ops
236 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Expand);
237 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
238 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
239 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Expand);
240 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Expand);
241 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
242 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
243 } else if (Subtarget.isGP64bit())
244 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom);
245
246 if (Subtarget.isGP64bit() && !Subtarget.isR5900()) {
247 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom);
248 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom);
249 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom);
250 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom);
251 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Custom);
252 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Custom);
253 }
254
255 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
256 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
257
258 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Custom);
259 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Custom);
260 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
261 if (Subtarget.hasMips32r6()) {
262 setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Legal);
263 setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Legal);
264 } else {
265 setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom);
266 setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Custom);
267 }
268
269 setTargetDAGCombine(ISD::MUL);
270
271 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
272 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
273 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
274
275 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
276 !Subtarget.hasMips64()) {
277 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
278 }
279
280 if (NoDPLoadStore) {
281 setOperationAction(Op: ISD::LOAD, VT: MVT::f64, Action: Custom);
282 setOperationAction(Op: ISD::STORE, VT: MVT::f64, Action: Custom);
283 }
284
285 if (Subtarget.hasMips32r6()) {
286 // MIPS32r6 replaces the accumulator-based multiplies with a three register
287 // instruction
288 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
289 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
290 setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Legal);
291 setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Legal);
292 setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Legal);
293
294 // MIPS32r6 replaces the accumulator-based division/remainder with separate
295 // three register division and remainder instructions.
296 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
297 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
298 setOperationAction(Op: ISD::SDIV, VT: MVT::i32, Action: Legal);
299 setOperationAction(Op: ISD::UDIV, VT: MVT::i32, Action: Legal);
300 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
301 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
302
303 // MIPS32r6 replaces conditional moves with an equivalent that removes the
304 // need for three GPR read ports.
305 setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Legal);
306 setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Legal);
307 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Expand);
308
309 setOperationAction(Op: ISD::SETCC, VT: MVT::f32, Action: Legal);
310 setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Legal);
311 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
312
313 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
314 setOperationAction(Op: ISD::SETCC, VT: MVT::f64, Action: Legal);
315 setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Legal);
316 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
317
318 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Legal);
319
320 // Floating point > and >= are supported via < and <=
321 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
322 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand);
323 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f32, Action: Expand);
324 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
325 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
326 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
327 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::f32, Action: Expand);
328 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f32, Action: Expand);
329
330 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
331 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f64, Action: Expand);
332 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f64, Action: Expand);
333 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
334 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
335 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
336 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::f64, Action: Expand);
337 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f64, Action: Expand);
338 }
339
340 if (Subtarget.hasMips64r6()) {
341 // MIPS64r6 replaces the accumulator-based multiplies with a three register
342 // instruction
343 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
344 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
345 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal);
346 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Legal);
347 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Legal);
348
349 // MIPS32r6 replaces the accumulator-based division/remainder with separate
350 // three register division and remainder instructions.
351 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
352 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
353 setOperationAction(Op: ISD::SDIV, VT: MVT::i64, Action: Legal);
354 setOperationAction(Op: ISD::UDIV, VT: MVT::i64, Action: Legal);
355 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
356 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
357
358 // MIPS64r6 replaces conditional moves with an equivalent that removes the
359 // need for three GPR read ports.
360 setOperationAction(Op: ISD::SETCC, VT: MVT::i64, Action: Legal);
361 setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Legal);
362 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: Expand);
363 }
364
365 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
366}
367
368const MipsTargetLowering *
369llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
370 const MipsSubtarget &STI) {
371 return new MipsSETargetLowering(TM, STI);
372}
373
374const TargetRegisterClass *
375MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
376 if (VT == MVT::Untyped)
377 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
378
379 return TargetLowering::getRepRegClassFor(VT);
380}
381
382// Enable MSA support for the given integer type and Register class.
383void MipsSETargetLowering::
384addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
385 addRegisterClass(VT: Ty, RC);
386
387 // Expand all builtin opcodes.
388 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
389 setOperationAction(Op: Opc, VT: Ty, Action: Expand);
390
391 setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal);
392 setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal);
393 setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal);
394 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Custom);
395 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal);
396 setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom);
397 setOperationAction(Op: ISD::UNDEF, VT: Ty, Action: Legal);
398
399 setOperationAction(Op: ISD::ADD, VT: Ty, Action: Legal);
400 setOperationAction(Op: ISD::AND, VT: Ty, Action: Legal);
401 setOperationAction(Op: ISD::CTLZ, VT: Ty, Action: Legal);
402 setOperationAction(Op: ISD::CTPOP, VT: Ty, Action: Legal);
403 setOperationAction(Op: ISD::MUL, VT: Ty, Action: Legal);
404 setOperationAction(Op: ISD::OR, VT: Ty, Action: Legal);
405 setOperationAction(Op: ISD::SDIV, VT: Ty, Action: Legal);
406 setOperationAction(Op: ISD::SREM, VT: Ty, Action: Legal);
407 setOperationAction(Op: ISD::SHL, VT: Ty, Action: Legal);
408 setOperationAction(Op: ISD::SRA, VT: Ty, Action: Legal);
409 setOperationAction(Op: ISD::SRL, VT: Ty, Action: Legal);
410 setOperationAction(Op: ISD::SUB, VT: Ty, Action: Legal);
411 setOperationAction(Op: ISD::SMAX, VT: Ty, Action: Legal);
412 setOperationAction(Op: ISD::SMIN, VT: Ty, Action: Legal);
413 setOperationAction(Op: ISD::UDIV, VT: Ty, Action: Legal);
414 setOperationAction(Op: ISD::UREM, VT: Ty, Action: Legal);
415 setOperationAction(Op: ISD::UMAX, VT: Ty, Action: Legal);
416 setOperationAction(Op: ISD::UMIN, VT: Ty, Action: Legal);
417 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: Ty, Action: Custom);
418 setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal);
419 setOperationAction(Op: ISD::XOR, VT: Ty, Action: Legal);
420
421 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
422 setOperationAction(Op: ISD::FP_TO_SINT, VT: Ty, Action: Legal);
423 setOperationAction(Op: ISD::FP_TO_UINT, VT: Ty, Action: Legal);
424 setOperationAction(Op: ISD::SINT_TO_FP, VT: Ty, Action: Legal);
425 setOperationAction(Op: ISD::UINT_TO_FP, VT: Ty, Action: Legal);
426 }
427
428 setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal);
429 setCondCodeAction(CCs: ISD::SETNE, VT: Ty, Action: Expand);
430 setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand);
431 setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand);
432 setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand);
433 setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand);
434}
435
436// Enable MSA support for the given floating-point type and Register class.
437void MipsSETargetLowering::
438addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
439 addRegisterClass(VT: Ty, RC);
440
441 // Expand all builtin opcodes.
442 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
443 setOperationAction(Op: Opc, VT: Ty, Action: Expand);
444
445 setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal);
446 setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal);
447 setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal);
448 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Legal);
449 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal);
450 setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom);
451
452 if (Ty != MVT::v8f16) {
453 setOperationAction(Op: ISD::FABS, VT: Ty, Action: Legal);
454 setOperationAction(Op: ISD::FADD, VT: Ty, Action: Legal);
455 setOperationAction(Op: ISD::FDIV, VT: Ty, Action: Legal);
456 setOperationAction(Op: ISD::FEXP2, VT: Ty, Action: Legal);
457 setOperationAction(Op: ISD::FLOG2, VT: Ty, Action: Legal);
458 setOperationAction(Op: ISD::FMA, VT: Ty, Action: Legal);
459 setOperationAction(Op: ISD::FMUL, VT: Ty, Action: Legal);
460 setOperationAction(Op: ISD::FRINT, VT: Ty, Action: Legal);
461 setOperationAction(Op: ISD::FSQRT, VT: Ty, Action: Legal);
462 setOperationAction(Op: ISD::FSUB, VT: Ty, Action: Legal);
463 setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal);
464
465 setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal);
466 setCondCodeAction(CCs: ISD::SETOGE, VT: Ty, Action: Expand);
467 setCondCodeAction(CCs: ISD::SETOGT, VT: Ty, Action: Expand);
468 setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand);
469 setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand);
470 setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand);
471 setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand);
472 }
473}
474
475SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
476 if(!Subtarget.hasMips32r6())
477 return MipsTargetLowering::LowerOperation(Op, DAG);
478
479 EVT ResTy = Op->getValueType(ResNo: 0);
480 SDLoc DL(Op);
481
482 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
483 // floating point register are undefined. Not really an issue as sel.d, which
484 // is produced from an FSELECT node, only looks at bit 0.
485 SDValue Tmp = DAG.getNode(Opcode: MipsISD::MTC1_D64, DL, VT: MVT::f64, Operand: Op->getOperand(Num: 0));
486 return DAG.getNode(Opcode: MipsISD::FSELECT, DL, VT: ResTy, N1: Tmp, N2: Op->getOperand(Num: 1),
487 N3: Op->getOperand(Num: 2));
488}
489
490bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
491 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
492 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
493
494 if (Subtarget.systemSupportsUnalignedAccess()) {
495 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
496 // implementation defined whether this is handled by hardware, software, or
497 // a hybrid of the two but it's expected that most implementations will
498 // handle the majority of cases in hardware.
499 if (Fast)
500 *Fast = 1;
501 return true;
502 } else if (Subtarget.hasMips32r6()) {
503 return false;
504 }
505
506 switch (SVT) {
507 case MVT::i64:
508 case MVT::i32:
509 if (Fast)
510 *Fast = 1;
511 return true;
512 default:
513 return false;
514 }
515}
516
517SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
518 SelectionDAG &DAG) const {
519 switch(Op.getOpcode()) {
520 case ISD::LOAD: return lowerLOAD(Op, DAG);
521 case ISD::STORE: return lowerSTORE(Op, DAG);
522 case ISD::SMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: true, DAG);
523 case ISD::UMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: true, HasHi: true, DAG);
524 case ISD::MULHS: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: false, HasHi: true, DAG);
525 case ISD::MULHU: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: false, HasHi: true, DAG);
526 case ISD::MUL: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: false, DAG);
527 case ISD::SDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRem, HasLo: true, HasHi: true, DAG);
528 case ISD::UDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRemU, HasLo: true, HasHi: true,
529 DAG);
530 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
531 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
532 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
533 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
534 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
535 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
536 case ISD::SELECT: return lowerSELECT(Op, DAG);
537 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
538 }
539
540 return MipsTargetLowering::LowerOperation(Op, DAG);
541}
542
543// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
544//
545// Performs the following transformations:
546// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
547// sign/zero-extension is completely overwritten by the new one performed by
548// the ISD::AND.
549// - Removes redundant zero extensions performed by an ISD::AND.
550static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
551 TargetLowering::DAGCombinerInfo &DCI,
552 const MipsSubtarget &Subtarget) {
553 if (!Subtarget.hasMSA())
554 return SDValue();
555
556 SDValue Op0 = N->getOperand(Num: 0);
557 SDValue Op1 = N->getOperand(Num: 1);
558 unsigned Op0Opcode = Op0->getOpcode();
559
560 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
561 // where $d + 1 == 2^n and n == 32
562 // or $d + 1 == 2^n and n <= 32 and ZExt
563 // -> (MipsVExtractZExt $a, $b, $c)
564 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
565 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
566 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Val&: Op1);
567
568 if (!Mask)
569 return SDValue();
570
571 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
572
573 if (Log2IfPositive <= 0)
574 return SDValue(); // Mask+1 is not a power of 2
575
576 SDValue Op0Op2 = Op0->getOperand(Num: 2);
577 EVT ExtendTy = cast<VTSDNode>(Val&: Op0Op2)->getVT();
578 unsigned ExtendTySize = ExtendTy.getSizeInBits();
579 unsigned Log2 = Log2IfPositive;
580
581 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
582 Log2 == ExtendTySize) {
583 SDValue Ops[] = { Op0->getOperand(Num: 0), Op0->getOperand(Num: 1), Op0Op2 };
584 return DAG.getNode(Opcode: MipsISD::VEXTRACT_ZEXT_ELT, DL: SDLoc(Op0),
585 VTList: Op0->getVTList(),
586 Ops: ArrayRef(Ops, Op0->getNumOperands()));
587 }
588 }
589
590 return SDValue();
591}
592
593// Determine if the specified node is a constant vector splat.
594//
595// Returns true and sets Imm if:
596// * N is a ISD::BUILD_VECTOR representing a constant splat
597//
598// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
599// differences are that it assumes the MSA has already been checked and the
600// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
601// must not be in order for binsri.d to be selectable).
602static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
603 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(Val: N.getNode());
604
605 if (!Node)
606 return false;
607
608 APInt SplatValue, SplatUndef;
609 unsigned SplatBitSize;
610 bool HasAnyUndefs;
611
612 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
613 MinSplatBits: 8, isBigEndian: !IsLittleEndian))
614 return false;
615
616 Imm = SplatValue;
617
618 return true;
619}
620
621// Test whether the given node is an all-ones build_vector.
622static bool isVectorAllOnes(SDValue N) {
623 // Look through bitcasts. Endianness doesn't matter because we are looking
624 // for an all-ones value.
625 if (N->getOpcode() == ISD::BITCAST)
626 N = N->getOperand(Num: 0);
627
628 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val&: N);
629
630 if (!BVN)
631 return false;
632
633 APInt SplatValue, SplatUndef;
634 unsigned SplatBitSize;
635 bool HasAnyUndefs;
636
637 // Endianness doesn't matter in this context because we are looking for
638 // an all-ones value.
639 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
640 return SplatValue.isAllOnes();
641
642 return false;
643}
644
645// Test whether N is the bitwise inverse of OfNode.
646static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
647 if (N->getOpcode() != ISD::XOR)
648 return false;
649
650 if (isVectorAllOnes(N: N->getOperand(Num: 0)))
651 return N->getOperand(Num: 1) == OfNode;
652
653 if (isVectorAllOnes(N: N->getOperand(Num: 1)))
654 return N->getOperand(Num: 0) == OfNode;
655
656 return false;
657}
658
659// Perform combines where ISD::OR is the root node.
660//
661// Performs the following transformations:
662// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
663// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
664// vector type.
665static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
666 TargetLowering::DAGCombinerInfo &DCI,
667 const MipsSubtarget &Subtarget) {
668 if (!Subtarget.hasMSA())
669 return SDValue();
670
671 EVT Ty = N->getValueType(ResNo: 0);
672
673 if (!Ty.is128BitVector())
674 return SDValue();
675
676 SDValue Op0 = N->getOperand(Num: 0);
677 SDValue Op1 = N->getOperand(Num: 1);
678
679 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
680 SDValue Op0Op0 = Op0->getOperand(Num: 0);
681 SDValue Op0Op1 = Op0->getOperand(Num: 1);
682 SDValue Op1Op0 = Op1->getOperand(Num: 0);
683 SDValue Op1Op1 = Op1->getOperand(Num: 1);
684 bool IsLittleEndian = !Subtarget.isLittle();
685
686 SDValue IfSet, IfClr, Cond;
687 bool IsConstantMask = false;
688 APInt Mask, InvMask;
689
690 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
691 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
692 // looking.
693 // IfClr will be set if we find a valid match.
694 if (isVSplat(N: Op0Op0, Imm&: Mask, IsLittleEndian)) {
695 Cond = Op0Op0;
696 IfSet = Op0Op1;
697
698 if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) &&
699 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
700 IfClr = Op1Op1;
701 else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) &&
702 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
703 IfClr = Op1Op0;
704
705 IsConstantMask = true;
706 }
707
708 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
709 // thing again using this mask.
710 // IfClr will be set if we find a valid match.
711 if (!IfClr.getNode() && isVSplat(N: Op0Op1, Imm&: Mask, IsLittleEndian)) {
712 Cond = Op0Op1;
713 IfSet = Op0Op0;
714
715 if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) &&
716 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
717 IfClr = Op1Op1;
718 else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) &&
719 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
720 IfClr = Op1Op0;
721
722 IsConstantMask = true;
723 }
724
725 // If IfClr is not yet set, try looking for a non-constant match.
726 // IfClr will be set if we find a valid match amongst the eight
727 // possibilities.
728 if (!IfClr.getNode()) {
729 if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op0)) {
730 Cond = Op1Op0;
731 IfSet = Op1Op1;
732 IfClr = Op0Op1;
733 } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op0)) {
734 Cond = Op1Op0;
735 IfSet = Op1Op1;
736 IfClr = Op0Op0;
737 } else if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op1)) {
738 Cond = Op1Op1;
739 IfSet = Op1Op0;
740 IfClr = Op0Op1;
741 } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op1)) {
742 Cond = Op1Op1;
743 IfSet = Op1Op0;
744 IfClr = Op0Op0;
745 } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op0)) {
746 Cond = Op0Op0;
747 IfSet = Op0Op1;
748 IfClr = Op1Op1;
749 } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op0)) {
750 Cond = Op0Op0;
751 IfSet = Op0Op1;
752 IfClr = Op1Op0;
753 } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op1)) {
754 Cond = Op0Op1;
755 IfSet = Op0Op0;
756 IfClr = Op1Op1;
757 } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op1)) {
758 Cond = Op0Op1;
759 IfSet = Op0Op0;
760 IfClr = Op1Op0;
761 }
762 }
763
764 // At this point, IfClr will be set if we have a valid match.
765 if (!IfClr.getNode())
766 return SDValue();
767
768 assert(Cond.getNode() && IfSet.getNode());
769
770 // Fold degenerate cases.
771 if (IsConstantMask) {
772 if (Mask.isAllOnes())
773 return IfSet;
774 else if (Mask == 0)
775 return IfClr;
776 }
777
778 // Transform the DAG into an equivalent VSELECT.
779 return DAG.getNode(Opcode: ISD::VSELECT, DL: SDLoc(N), VT: Ty, N1: Cond, N2: IfSet, N3: IfClr);
780 }
781
782 return SDValue();
783}
784
785static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
786 SelectionDAG &DAG,
787 const MipsSubtarget &Subtarget) {
788 // Estimate the number of operations the below transform will turn a
789 // constant multiply into. The number is approximately equal to the minimal
790 // number of powers of two that constant can be broken down to by adding
791 // or subtracting them.
792 //
793 // If we have taken more than 12[1] / 8[2] steps to attempt the
794 // optimization for a native sized value, it is more than likely that this
795 // optimization will make things worse.
796 //
797 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
798 // multiplication requires at least 4 cycles, but another cycle (or two)
799 // to retrieve the result from the HI/LO registers.
800 //
801 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
802 // materialized in 2 instructions, multiplication requires at least 4
803 // cycles, but another cycle (or two) to retrieve the result from the
804 // HI/LO registers.
805 //
806 // TODO:
807 // - MaxSteps needs to consider the `VT` of the constant for the current
808 // target.
809 // - Consider to perform this optimization after type legalization.
810 // That allows to remove a workaround for types not supported natively.
811 // - Take in account `-Os, -Oz` flags because this optimization
812 // increases code size.
813 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
814
815 SmallVector<APInt, 16> WorkStack(1, C);
816 unsigned Steps = 0;
817 unsigned BitWidth = C.getBitWidth();
818
819 while (!WorkStack.empty()) {
820 APInt Val = WorkStack.pop_back_val();
821
822 if (Val == 0 || Val == 1)
823 continue;
824
825 if (Steps >= MaxSteps)
826 return false;
827
828 if (Val.isPowerOf2()) {
829 ++Steps;
830 continue;
831 }
832
833 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
834 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
835 : APInt(BitWidth, 1) << C.ceilLogBase2();
836 if ((Val - Floor).ule(RHS: Ceil - Val)) {
837 WorkStack.push_back(Elt: Floor);
838 WorkStack.push_back(Elt: Val - Floor);
839 } else {
840 WorkStack.push_back(Elt: Ceil);
841 WorkStack.push_back(Elt: Ceil - Val);
842 }
843
844 ++Steps;
845 }
846
847 // If the value being multiplied is not supported natively, we have to pay
848 // an additional legalization cost, conservatively assume an increase in the
849 // cost of 3 instructions per step. This values for this heuristic were
850 // determined experimentally.
851 unsigned RegisterSize = DAG.getTargetLoweringInfo()
852 .getRegisterType(Context&: *DAG.getContext(), VT)
853 .getSizeInBits();
854 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
855 if (Steps > 27)
856 return false;
857
858 return true;
859}
860
861static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
862 EVT ShiftTy, SelectionDAG &DAG) {
863 // Return 0.
864 if (C == 0)
865 return DAG.getConstant(Val: 0, DL, VT);
866
867 // Return x.
868 if (C == 1)
869 return X;
870
871 // If c is power of 2, return (shl x, log2(c)).
872 if (C.isPowerOf2())
873 return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X,
874 N2: DAG.getConstant(Val: C.logBase2(), DL, VT: ShiftTy));
875
876 unsigned BitWidth = C.getBitWidth();
877 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
878 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
879 APInt(BitWidth, 1) << C.ceilLogBase2();
880
881 // If |c - floor_c| <= |c - ceil_c|,
882 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
883 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
884 if ((C - Floor).ule(RHS: Ceil - C)) {
885 SDValue Op0 = genConstMult(X, C: Floor, DL, VT, ShiftTy, DAG);
886 SDValue Op1 = genConstMult(X, C: C - Floor, DL, VT, ShiftTy, DAG);
887 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
888 }
889
890 // If |c - floor_c| > |c - ceil_c|,
891 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
892 SDValue Op0 = genConstMult(X, C: Ceil, DL, VT, ShiftTy, DAG);
893 SDValue Op1 = genConstMult(X, C: Ceil - C, DL, VT, ShiftTy, DAG);
894 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
895}
896
897static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
898 const TargetLowering::DAGCombinerInfo &DCI,
899 const MipsSETargetLowering *TL,
900 const MipsSubtarget &Subtarget) {
901 EVT VT = N->getValueType(ResNo: 0);
902
903 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))
904 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
905 C: C->getAPIntValue(), VT, DAG, Subtarget))
906 return genConstMult(X: N->getOperand(Num: 0), C: C->getAPIntValue(), DL: SDLoc(N), VT,
907 ShiftTy: TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
908 DAG);
909
910 return SDValue(N, 0);
911}
912
913static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
914 SelectionDAG &DAG,
915 const MipsSubtarget &Subtarget) {
916 // See if this is a vector splat immediate node.
917 APInt SplatValue, SplatUndef;
918 unsigned SplatBitSize;
919 bool HasAnyUndefs;
920 unsigned EltSize = Ty.getScalarSizeInBits();
921 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 1));
922
923 if (!Subtarget.hasDSP())
924 return SDValue();
925
926 if (!BV ||
927 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
928 MinSplatBits: EltSize, isBigEndian: !Subtarget.isLittle()) ||
929 (SplatBitSize != EltSize) ||
930 (SplatValue.getZExtValue() >= EltSize))
931 return SDValue();
932
933 SDLoc DL(N);
934 return DAG.getNode(Opcode: Opc, DL, VT: Ty, N1: N->getOperand(Num: 0),
935 N2: DAG.getConstant(Val: SplatValue.getZExtValue(), DL, VT: MVT::i32));
936}
937
938static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
939 TargetLowering::DAGCombinerInfo &DCI,
940 const MipsSubtarget &Subtarget) {
941 EVT Ty = N->getValueType(ResNo: 0);
942
943 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
944 return SDValue();
945
946 return performDSPShiftCombine(Opc: MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
947}
948
949// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
950// constant splats into MipsISD::SHRA_DSP for DSPr2.
951//
952// Performs the following transformations:
953// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
954// sign/zero-extension is completely overwritten by the new one performed by
955// the ISD::SRA and ISD::SHL nodes.
956// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
957// sequence.
958//
959// See performDSPShiftCombine for more information about the transformation
960// used for DSPr2.
961static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
962 TargetLowering::DAGCombinerInfo &DCI,
963 const MipsSubtarget &Subtarget) {
964 EVT Ty = N->getValueType(ResNo: 0);
965
966 if (Subtarget.hasMSA()) {
967 SDValue Op0 = N->getOperand(Num: 0);
968 SDValue Op1 = N->getOperand(Num: 1);
969
970 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
971 // where $d + sizeof($c) == 32
972 // or $d + sizeof($c) <= 32 and SExt
973 // -> (MipsVExtractSExt $a, $b, $c)
974 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(Num: 1)) {
975 SDValue Op0Op0 = Op0->getOperand(Num: 0);
976 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Val&: Op1);
977
978 if (!ShAmount)
979 return SDValue();
980
981 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
982 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
983 return SDValue();
984
985 EVT ExtendTy = cast<VTSDNode>(Val: Op0Op0->getOperand(Num: 2))->getVT();
986 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
987
988 if (TotalBits == 32 ||
989 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
990 TotalBits <= 32)) {
991 SDValue Ops[] = { Op0Op0->getOperand(Num: 0), Op0Op0->getOperand(Num: 1),
992 Op0Op0->getOperand(Num: 2) };
993 return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL: SDLoc(Op0Op0),
994 VTList: Op0Op0->getVTList(),
995 Ops: ArrayRef(Ops, Op0Op0->getNumOperands()));
996 }
997 }
998 }
999
1000 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
1001 return SDValue();
1002
1003 return performDSPShiftCombine(Opc: MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
1004}
1005
1006
1007static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
1008 TargetLowering::DAGCombinerInfo &DCI,
1009 const MipsSubtarget &Subtarget) {
1010 EVT Ty = N->getValueType(ResNo: 0);
1011
1012 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
1013 return SDValue();
1014
1015 return performDSPShiftCombine(Opc: MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
1016}
1017
1018static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
1019 bool IsV216 = (Ty == MVT::v2i16);
1020
1021 switch (CC) {
1022 case ISD::SETEQ:
1023 case ISD::SETNE: return true;
1024 case ISD::SETLT:
1025 case ISD::SETLE:
1026 case ISD::SETGT:
1027 case ISD::SETGE: return IsV216;
1028 case ISD::SETULT:
1029 case ISD::SETULE:
1030 case ISD::SETUGT:
1031 case ISD::SETUGE: return !IsV216;
1032 default: return false;
1033 }
1034}
1035
1036static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
1037 EVT Ty = N->getValueType(ResNo: 0);
1038
1039 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1040 return SDValue();
1041
1042 if (!isLegalDSPCondCode(Ty, CC: cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get()))
1043 return SDValue();
1044
1045 return DAG.getNode(Opcode: MipsISD::SETCC_DSP, DL: SDLoc(N), VT: Ty, N1: N->getOperand(Num: 0),
1046 N2: N->getOperand(Num: 1), N3: N->getOperand(Num: 2));
1047}
1048
1049static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
1050 EVT Ty = N->getValueType(ResNo: 0);
1051
1052 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
1053 SDValue SetCC = N->getOperand(Num: 0);
1054
1055 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
1056 return SDValue();
1057
1058 return DAG.getNode(Opcode: MipsISD::SELECT_CC_DSP, DL: SDLoc(N), VT: Ty,
1059 N1: SetCC.getOperand(i: 0), N2: SetCC.getOperand(i: 1),
1060 N3: N->getOperand(Num: 1), N4: N->getOperand(Num: 2), N5: SetCC.getOperand(i: 2));
1061 }
1062
1063 return SDValue();
1064}
1065
1066static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1067 const MipsSubtarget &Subtarget) {
1068 EVT Ty = N->getValueType(ResNo: 0);
1069
1070 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1071 // Try the following combines:
1072 // (xor (or $a, $b), (build_vector allones))
1073 // (xor (or $a, $b), (bitcast (build_vector allones)))
1074 SDValue Op0 = N->getOperand(Num: 0);
1075 SDValue Op1 = N->getOperand(Num: 1);
1076 SDValue NotOp;
1077
1078 if (ISD::isBuildVectorAllOnes(N: Op0.getNode()))
1079 NotOp = Op1;
1080 else if (ISD::isBuildVectorAllOnes(N: Op1.getNode()))
1081 NotOp = Op0;
1082 else
1083 return SDValue();
1084
1085 if (NotOp->getOpcode() == ISD::OR)
1086 return DAG.getNode(Opcode: MipsISD::VNOR, DL: SDLoc(N), VT: Ty, N1: NotOp->getOperand(Num: 0),
1087 N2: NotOp->getOperand(Num: 1));
1088 }
1089
1090 return SDValue();
1091}
1092
1093SDValue
1094MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
1095 SelectionDAG &DAG = DCI.DAG;
1096 SDValue Val;
1097
1098 switch (N->getOpcode()) {
1099 case ISD::AND:
1100 Val = performANDCombine(N, DAG, DCI, Subtarget);
1101 break;
1102 case ISD::OR:
1103 Val = performORCombine(N, DAG, DCI, Subtarget);
1104 break;
1105 case ISD::MUL:
1106 return performMULCombine(N, DAG, DCI, TL: this, Subtarget);
1107 case ISD::SHL:
1108 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1109 break;
1110 case ISD::SRA:
1111 return performSRACombine(N, DAG, DCI, Subtarget);
1112 case ISD::SRL:
1113 return performSRLCombine(N, DAG, DCI, Subtarget);
1114 case ISD::VSELECT:
1115 return performVSELECTCombine(N, DAG);
1116 case ISD::XOR:
1117 Val = performXORCombine(N, DAG, Subtarget);
1118 break;
1119 case ISD::SETCC:
1120 Val = performSETCCCombine(N, DAG);
1121 break;
1122 }
1123
1124 if (Val.getNode()) {
1125 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1126 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1127 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1128 return Val;
1129 }
1130
1131 return MipsTargetLowering::PerformDAGCombine(N, DCI);
1132}
1133
1134MachineBasicBlock *
1135MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1136 MachineBasicBlock *BB) const {
1137 switch (MI.getOpcode()) {
1138 default:
1139 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, MBB: BB);
1140 case Mips::BPOSGE32_PSEUDO:
1141 return emitBPOSGE32(MI, BB);
1142 case Mips::SNZ_B_PSEUDO:
1143 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_B);
1144 case Mips::SNZ_H_PSEUDO:
1145 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_H);
1146 case Mips::SNZ_W_PSEUDO:
1147 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_W);
1148 case Mips::SNZ_D_PSEUDO:
1149 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_D);
1150 case Mips::SNZ_V_PSEUDO:
1151 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_V);
1152 case Mips::SZ_B_PSEUDO:
1153 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_B);
1154 case Mips::SZ_H_PSEUDO:
1155 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_H);
1156 case Mips::SZ_W_PSEUDO:
1157 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_W);
1158 case Mips::SZ_D_PSEUDO:
1159 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_D);
1160 case Mips::SZ_V_PSEUDO:
1161 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_V);
1162 case Mips::COPY_FW_PSEUDO:
1163 return emitCOPY_FW(MI, BB);
1164 case Mips::COPY_FD_PSEUDO:
1165 return emitCOPY_FD(MI, BB);
1166 case Mips::INSERT_FW_PSEUDO:
1167 return emitINSERT_FW(MI, BB);
1168 case Mips::INSERT_FD_PSEUDO:
1169 return emitINSERT_FD(MI, BB);
1170 case Mips::INSERT_B_VIDX_PSEUDO:
1171 case Mips::INSERT_B_VIDX64_PSEUDO:
1172 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 1, IsFP: false);
1173 case Mips::INSERT_H_VIDX_PSEUDO:
1174 case Mips::INSERT_H_VIDX64_PSEUDO:
1175 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 2, IsFP: false);
1176 case Mips::INSERT_W_VIDX_PSEUDO:
1177 case Mips::INSERT_W_VIDX64_PSEUDO:
1178 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: false);
1179 case Mips::INSERT_D_VIDX_PSEUDO:
1180 case Mips::INSERT_D_VIDX64_PSEUDO:
1181 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: false);
1182 case Mips::INSERT_FW_VIDX_PSEUDO:
1183 case Mips::INSERT_FW_VIDX64_PSEUDO:
1184 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: true);
1185 case Mips::INSERT_FD_VIDX_PSEUDO:
1186 case Mips::INSERT_FD_VIDX64_PSEUDO:
1187 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: true);
1188 case Mips::FILL_FW_PSEUDO:
1189 return emitFILL_FW(MI, BB);
1190 case Mips::FILL_FD_PSEUDO:
1191 return emitFILL_FD(MI, BB);
1192 case Mips::FEXP2_W_1_PSEUDO:
1193 return emitFEXP2_W_1(MI, BB);
1194 case Mips::FEXP2_D_1_PSEUDO:
1195 return emitFEXP2_D_1(MI, BB);
1196 case Mips::ST_F16:
1197 return emitST_F16_PSEUDO(MI, BB);
1198 case Mips::LD_F16:
1199 return emitLD_F16_PSEUDO(MI, BB);
1200 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1201 return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: false);
1202 case Mips::MSA_FP_ROUND_W_PSEUDO:
1203 return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: false);
1204 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1205 return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: true);
1206 case Mips::MSA_FP_ROUND_D_PSEUDO:
1207 return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: true);
1208 }
1209}
1210
1211bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1212 const CCState &CCInfo, unsigned NextStackOffset,
1213 const MipsFunctionInfo &FI) const {
1214 // Exception has to be cleared with eret.
1215 if (FI.isISR())
1216 return false;
1217
1218 // Return false if either the callee or caller has a byval argument.
1219 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1220 return false;
1221
1222 // Return true if the callee's argument area is no larger than the caller's.
1223 return NextStackOffset <= FI.getIncomingArgSize();
1224}
1225
1226void MipsSETargetLowering::
1227getOpndList(SmallVectorImpl<SDValue> &Ops,
1228 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1229 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1230 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1231 SDValue Chain) const {
1232 Ops.push_back(Elt: Callee);
1233 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1234 InternalLinkage, IsCallReloc, CLI, Callee,
1235 Chain);
1236}
1237
1238SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1239 LoadSDNode &Nd = *cast<LoadSDNode>(Val&: Op);
1240
1241 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1242 return MipsTargetLowering::lowerLOAD(Op, DAG);
1243
1244 // Replace a double precision load with two i32 loads and a buildpair64.
1245 SDLoc DL(Op);
1246 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1247 EVT PtrVT = Ptr.getValueType();
1248
1249 // i32 load from lower address.
1250 SDValue Lo = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo: MachinePointerInfo(),
1251 Alignment: Nd.getAlign(), MMOFlags: Nd.getMemOperand()->getFlags());
1252
1253 // i32 load from higher address.
1254 Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT));
1255 SDValue Hi = DAG.getLoad(
1256 VT: MVT::i32, dl: DL, Chain: Lo.getValue(R: 1), Ptr, PtrInfo: MachinePointerInfo(),
1257 Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), MMOFlags: Nd.getMemOperand()->getFlags());
1258
1259 if (!Subtarget.isLittle())
1260 std::swap(a&: Lo, b&: Hi);
1261
1262 SDValue BP = DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
1263 SDValue Ops[2] = {BP, Hi.getValue(R: 1)};
1264 return DAG.getMergeValues(Ops, dl: DL);
1265}
1266
1267SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1268 StoreSDNode &Nd = *cast<StoreSDNode>(Val&: Op);
1269
1270 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1271 return MipsTargetLowering::lowerSTORE(Op, DAG);
1272
1273 // Replace a double precision store with two extractelement64s and i32 stores.
1274 SDLoc DL(Op);
1275 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1276 EVT PtrVT = Ptr.getValueType();
1277 SDValue Lo = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32,
1278 N1: Val, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1279 SDValue Hi = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32,
1280 N1: Val, N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
1281
1282 if (!Subtarget.isLittle())
1283 std::swap(a&: Lo, b&: Hi);
1284
1285 // i32 store to lower address.
1286 Chain = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: MachinePointerInfo(), Alignment: Nd.getAlign(),
1287 MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo());
1288
1289 // i32 store to higher address.
1290 Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT));
1291 return DAG.getStore(Chain, dl: DL, Val: Hi, Ptr, PtrInfo: MachinePointerInfo(),
1292 Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4),
1293 MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo());
1294}
1295
1296SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1297 SelectionDAG &DAG) const {
1298 SDLoc DL(Op);
1299 MVT Src = Op.getOperand(i: 0).getValueType().getSimpleVT();
1300 MVT Dest = Op.getValueType().getSimpleVT();
1301
1302 // Bitcast i64 to double.
1303 if (Src == MVT::i64 && Dest == MVT::f64) {
1304 SDValue Lo, Hi;
1305 std::tie(args&: Lo, args&: Hi) =
1306 DAG.SplitScalar(N: Op.getOperand(i: 0), DL, LoVT: MVT::i32, HiVT: MVT::i32);
1307 return DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
1308 }
1309
1310 // Bitcast double to i64.
1311 if (Src == MVT::f64 && Dest == MVT::i64) {
1312 // Skip lower bitcast when operand0 has converted float results to integer
1313 // which was done by function SoftenFloatResult.
1314 if (getTypeAction(Context&: *DAG.getContext(), VT: Op.getOperand(i: 0).getValueType()) ==
1315 TargetLowering::TypeSoftenFloat)
1316 return SDValue();
1317 SDValue Lo =
1318 DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0),
1319 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1320 SDValue Hi =
1321 DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0),
1322 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
1323 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
1324 }
1325
1326 // Skip other cases of bitcast and use default lowering.
1327 return SDValue();
1328}
1329
1330SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1331 bool HasLo, bool HasHi,
1332 SelectionDAG &DAG) const {
1333 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1334 assert(!Subtarget.hasMips32r6());
1335
1336 EVT Ty = Op.getOperand(i: 0).getValueType();
1337 SDLoc DL(Op);
1338 SDValue Mult = DAG.getNode(Opcode: NewOpc, DL, VT: MVT::Untyped,
1339 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
1340 SDValue Lo, Hi;
1341
1342 if (HasLo)
1343 Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: Ty, Operand: Mult);
1344 if (HasHi)
1345 Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: Ty, Operand: Mult);
1346
1347 if (!HasLo || !HasHi)
1348 return HasLo ? Lo : Hi;
1349
1350 SDValue Vals[] = { Lo, Hi };
1351 return DAG.getMergeValues(Ops: Vals, dl: DL);
1352}
1353
1354static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) {
1355 SDValue InLo, InHi;
1356 std::tie(args&: InLo, args&: InHi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i32, HiVT: MVT::i32);
1357 return DAG.getNode(Opcode: MipsISD::MTLOHI, DL, VT: MVT::Untyped, N1: InLo, N2: InHi);
1358}
1359
1360static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) {
1361 SDValue Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: MVT::i32, Operand: Op);
1362 SDValue Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: MVT::i32, Operand: Op);
1363 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
1364}
1365
1366// This function expands mips intrinsic nodes which have 64-bit input operands
1367// or output values.
1368//
1369// out64 = intrinsic-node in64
1370// =>
1371// lo = copy (extract-element (in64, 0))
1372// hi = copy (extract-element (in64, 1))
1373// mips-specific-node
1374// v0 = copy lo
1375// v1 = copy hi
1376// out64 = merge-values (v0, v1)
1377//
1378static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1379 SDLoc DL(Op);
1380 bool HasChainIn = Op->getOperand(Num: 0).getValueType() == MVT::Other;
1381 SmallVector<SDValue, 3> Ops;
1382 unsigned OpNo = 0;
1383
1384 // See if Op has a chain input.
1385 if (HasChainIn)
1386 Ops.push_back(Elt: Op->getOperand(Num: OpNo++));
1387
1388 // The next operand is the intrinsic opcode.
1389 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1390
1391 // See if the next operand has type i64.
1392 SDValue Opnd = Op->getOperand(Num: ++OpNo), In64;
1393
1394 if (Opnd.getValueType() == MVT::i64)
1395 In64 = initAccumulator(In: Opnd, DL, DAG);
1396 else
1397 Ops.push_back(Elt: Opnd);
1398
1399 // Push the remaining operands.
1400 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1401 Ops.push_back(Elt: Op->getOperand(Num: OpNo));
1402
1403 // Add In64 to the end of the list.
1404 if (In64.getNode())
1405 Ops.push_back(Elt: In64);
1406
1407 // Scan output.
1408 SmallVector<EVT, 2> ResTys;
1409
1410 for (EVT Ty : Op->values())
1411 ResTys.push_back(Elt: (Ty == MVT::i64) ? MVT::Untyped : Ty);
1412
1413 // Create node.
1414 SDValue Val = DAG.getNode(Opcode: Opc, DL, ResultTys: ResTys, Ops);
1415 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Op: Val, DL, DAG) : Val;
1416
1417 if (!HasChainIn)
1418 return Out;
1419
1420 assert(Val->getValueType(1) == MVT::Other);
1421 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1422 return DAG.getMergeValues(Ops: Vals, dl: DL);
1423}
1424
1425// Lower an MSA copy intrinsic into the specified SelectionDAG node
1426static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1427 SDLoc DL(Op);
1428 SDValue Vec = Op->getOperand(Num: 1);
1429 SDValue Idx = Op->getOperand(Num: 2);
1430 EVT ResTy = Op->getValueType(ResNo: 0);
1431 EVT EltTy = Vec->getValueType(ResNo: 0).getVectorElementType();
1432
1433 SDValue Result = DAG.getNode(Opcode: Opc, DL, VT: ResTy, N1: Vec, N2: Idx,
1434 N3: DAG.getValueType(EltTy));
1435
1436 return Result;
1437}
1438
1439static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1440 EVT ResVecTy = Op->getValueType(ResNo: 0);
1441 EVT ViaVecTy = ResVecTy;
1442 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1443 SDLoc DL(Op);
1444
1445 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1446 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1447 // lanes.
1448 SDValue LaneA = Op->getOperand(Num: OpNr);
1449 SDValue LaneB;
1450
1451 if (ResVecTy == MVT::v2i64) {
1452 // In case of the index being passed as an immediate value, set the upper
1453 // lane to 0 so that the splati.d instruction can be matched.
1454 if (isa<ConstantSDNode>(Val: LaneA))
1455 LaneB = DAG.getConstant(Val: 0, DL, VT: MVT::i32);
1456 // Having the index passed in a register, set the upper lane to the same
1457 // value as the lower - this results in the BUILD_VECTOR node not being
1458 // expanded through stack. This way we are able to pattern match the set of
1459 // nodes created here to splat.d.
1460 else
1461 LaneB = LaneA;
1462 ViaVecTy = MVT::v4i32;
1463 if(BigEndian)
1464 std::swap(a&: LaneA, b&: LaneB);
1465 } else
1466 LaneB = LaneA;
1467
1468 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1469 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1470
1471 SDValue Result = DAG.getBuildVector(
1472 VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1473
1474 if (ViaVecTy != ResVecTy) {
1475 SDValue One = DAG.getConstant(Val: 1, DL, VT: ViaVecTy);
1476 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVecTy,
1477 Operand: DAG.getNode(Opcode: ISD::AND, DL, VT: ViaVecTy, N1: Result, N2: One));
1478 }
1479
1480 return Result;
1481}
1482
1483static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1484 bool IsSigned = false) {
1485 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
1486 return DAG.getConstant(
1487 Val: APInt(Op->getValueType(ResNo: 0).getScalarType().getSizeInBits(),
1488 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1489 DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0));
1490}
1491
1492static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1493 bool BigEndian, SelectionDAG &DAG) {
1494 EVT ViaVecTy = VecTy;
1495 SDValue SplatValueA = SplatValue;
1496 SDValue SplatValueB = SplatValue;
1497 SDLoc DL(SplatValue);
1498
1499 if (VecTy == MVT::v2i64) {
1500 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1501 ViaVecTy = MVT::v4i32;
1502
1503 SplatValueA = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValue);
1504 SplatValueB = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SplatValue,
1505 N2: DAG.getConstant(Val: 32, DL, VT: MVT::i32));
1506 SplatValueB = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValueB);
1507 }
1508
1509 // We currently hold the parts in little endian order. Swap them if
1510 // necessary.
1511 if (BigEndian)
1512 std::swap(a&: SplatValueA, b&: SplatValueB);
1513
1514 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1515 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1516 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1517 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1518
1519 SDValue Result = DAG.getBuildVector(
1520 VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1521
1522 if (VecTy != ViaVecTy)
1523 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecTy, Operand: Result);
1524
1525 return Result;
1526}
1527
1528static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
1529 unsigned Opc, SDValue Imm,
1530 bool BigEndian) {
1531 EVT VecTy = Op->getValueType(ResNo: 0);
1532 SDValue Exp2Imm;
1533 SDLoc DL(Op);
1534
1535 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1536 // here for now.
1537 if (VecTy == MVT::v2i64) {
1538 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Val&: Imm)) {
1539 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1540
1541 SDValue BitImmHiOp = DAG.getConstant(Val: BitImm.lshr(shiftAmt: 32).trunc(width: 32), DL,
1542 VT: MVT::i32);
1543 SDValue BitImmLoOp = DAG.getConstant(Val: BitImm.trunc(width: 32), DL, VT: MVT::i32);
1544
1545 if (BigEndian)
1546 std::swap(a&: BitImmLoOp, b&: BitImmHiOp);
1547
1548 Exp2Imm = DAG.getNode(
1549 Opcode: ISD::BITCAST, DL, VT: MVT::v2i64,
1550 Operand: DAG.getBuildVector(VT: MVT::v4i32, DL,
1551 Ops: {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1552 }
1553 }
1554
1555 if (!Exp2Imm.getNode()) {
1556 // We couldnt constant fold, do a vector shift instead
1557
1558 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1559 // only values 0-63 are valid.
1560 if (VecTy == MVT::v2i64)
1561 Imm = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Imm);
1562
1563 Exp2Imm = getBuildVectorSplat(VecTy, SplatValue: Imm, BigEndian, DAG);
1564
1565 Exp2Imm = DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: DAG.getConstant(Val: 1, DL, VT: VecTy),
1566 N2: Exp2Imm);
1567 }
1568
1569 return DAG.getNode(Opcode: Opc, DL, VT: VecTy, N1: Op->getOperand(Num: 1), N2: Exp2Imm);
1570}
1571
1572static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
1573 SDLoc DL(Op);
1574 EVT ResTy = Op->getValueType(ResNo: 0);
1575 SDValue Vec = Op->getOperand(Num: 2);
1576 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1577 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1578 SDValue ConstValue = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1,
1579 DL, VT: ResEltTy);
1580 SDValue SplatVec = getBuildVectorSplat(VecTy: ResTy, SplatValue: ConstValue, BigEndian, DAG);
1581
1582 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: SplatVec);
1583}
1584
1585static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
1586 EVT ResTy = Op->getValueType(ResNo: 0);
1587 SDLoc DL(Op);
1588 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
1589 SDValue Bit = DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Op, DAG));
1590
1591 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1),
1592 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
1593}
1594
1595static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
1596 SDLoc DL(Op);
1597 EVT ResTy = Op->getValueType(ResNo: 0);
1598 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1599 << Op->getConstantOperandAPInt(Num: 2);
1600 SDValue BitMask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
1601
1602 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), N2: BitMask);
1603}
1604
1605SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1606 SelectionDAG &DAG) const {
1607 SDLoc DL(Op);
1608 unsigned Intrinsic = Op->getConstantOperandVal(Num: 0);
1609 switch (Intrinsic) {
1610 default:
1611 return SDValue();
1612 case Intrinsic::mips_shilo:
1613 return lowerDSPIntr(Op, DAG, Opc: MipsISD::SHILO);
1614 case Intrinsic::mips_dpau_h_qbl:
1615 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBL);
1616 case Intrinsic::mips_dpau_h_qbr:
1617 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBR);
1618 case Intrinsic::mips_dpsu_h_qbl:
1619 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBL);
1620 case Intrinsic::mips_dpsu_h_qbr:
1621 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBR);
1622 case Intrinsic::mips_dpa_w_ph:
1623 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPA_W_PH);
1624 case Intrinsic::mips_dps_w_ph:
1625 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPS_W_PH);
1626 case Intrinsic::mips_dpax_w_ph:
1627 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAX_W_PH);
1628 case Intrinsic::mips_dpsx_w_ph:
1629 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSX_W_PH);
1630 case Intrinsic::mips_mulsa_w_ph:
1631 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSA_W_PH);
1632 case Intrinsic::mips_mult:
1633 return lowerDSPIntr(Op, DAG, Opc: MipsISD::Mult);
1634 case Intrinsic::mips_multu:
1635 return lowerDSPIntr(Op, DAG, Opc: MipsISD::Multu);
1636 case Intrinsic::mips_madd:
1637 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAdd);
1638 case Intrinsic::mips_maddu:
1639 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAddu);
1640 case Intrinsic::mips_msub:
1641 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSub);
1642 case Intrinsic::mips_msubu:
1643 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSubu);
1644 case Intrinsic::mips_addv_b:
1645 case Intrinsic::mips_addv_h:
1646 case Intrinsic::mips_addv_w:
1647 case Intrinsic::mips_addv_d:
1648 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1649 N2: Op->getOperand(Num: 2));
1650 case Intrinsic::mips_addvi_b:
1651 case Intrinsic::mips_addvi_h:
1652 case Intrinsic::mips_addvi_w:
1653 case Intrinsic::mips_addvi_d:
1654 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1655 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
1656 case Intrinsic::mips_and_v:
1657 return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1658 N2: Op->getOperand(Num: 2));
1659 case Intrinsic::mips_andi_b:
1660 return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1661 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
1662 case Intrinsic::mips_bclr_b:
1663 case Intrinsic::mips_bclr_h:
1664 case Intrinsic::mips_bclr_w:
1665 case Intrinsic::mips_bclr_d:
1666 return lowerMSABitClear(Op, DAG);
1667 case Intrinsic::mips_bclri_b:
1668 case Intrinsic::mips_bclri_h:
1669 case Intrinsic::mips_bclri_w:
1670 case Intrinsic::mips_bclri_d:
1671 return lowerMSABitClearImm(Op, DAG);
1672 case Intrinsic::mips_binsli_b:
1673 case Intrinsic::mips_binsli_h:
1674 case Intrinsic::mips_binsli_w:
1675 case Intrinsic::mips_binsli_d: {
1676 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1677 EVT VecTy = Op->getValueType(ResNo: 0);
1678 EVT EltTy = VecTy.getVectorElementType();
1679 if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits())
1680 report_fatal_error(reason: "Immediate out of range");
1681 APInt Mask = APInt::getHighBitsSet(numBits: EltTy.getSizeInBits(),
1682 hiBitsSet: Op->getConstantOperandVal(Num: 3) + 1);
1683 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy,
1684 N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true),
1685 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1686 }
1687 case Intrinsic::mips_binsri_b:
1688 case Intrinsic::mips_binsri_h:
1689 case Intrinsic::mips_binsri_w:
1690 case Intrinsic::mips_binsri_d: {
1691 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1692 EVT VecTy = Op->getValueType(ResNo: 0);
1693 EVT EltTy = VecTy.getVectorElementType();
1694 if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits())
1695 report_fatal_error(reason: "Immediate out of range");
1696 APInt Mask = APInt::getLowBitsSet(numBits: EltTy.getSizeInBits(),
1697 loBitsSet: Op->getConstantOperandVal(Num: 3) + 1);
1698 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy,
1699 N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true),
1700 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1701 }
1702 case Intrinsic::mips_bmnz_v:
1703 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3),
1704 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1705 case Intrinsic::mips_bmnzi_b:
1706 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1707 N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 2),
1708 N3: Op->getOperand(Num: 1));
1709 case Intrinsic::mips_bmz_v:
1710 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3),
1711 N2: Op->getOperand(Num: 1), N3: Op->getOperand(Num: 2));
1712 case Intrinsic::mips_bmzi_b:
1713 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1714 N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 1),
1715 N3: Op->getOperand(Num: 2));
1716 case Intrinsic::mips_bneg_b:
1717 case Intrinsic::mips_bneg_h:
1718 case Intrinsic::mips_bneg_w:
1719 case Intrinsic::mips_bneg_d: {
1720 EVT VecTy = Op->getValueType(ResNo: 0);
1721 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
1722
1723 return DAG.getNode(Opcode: ISD::XOR, DL, VT: VecTy, N1: Op->getOperand(Num: 1),
1724 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One,
1725 N2: truncateVecElts(Op, DAG)));
1726 }
1727 case Intrinsic::mips_bnegi_b:
1728 case Intrinsic::mips_bnegi_h:
1729 case Intrinsic::mips_bnegi_w:
1730 case Intrinsic::mips_bnegi_d:
1731 return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::XOR, Imm: Op->getOperand(Num: 2),
1732 BigEndian: !Subtarget.isLittle());
1733 case Intrinsic::mips_bnz_b:
1734 case Intrinsic::mips_bnz_h:
1735 case Intrinsic::mips_bnz_w:
1736 case Intrinsic::mips_bnz_d:
1737 return DAG.getNode(Opcode: MipsISD::VALL_NONZERO, DL, VT: Op->getValueType(ResNo: 0),
1738 Operand: Op->getOperand(Num: 1));
1739 case Intrinsic::mips_bnz_v:
1740 return DAG.getNode(Opcode: MipsISD::VANY_NONZERO, DL, VT: Op->getValueType(ResNo: 0),
1741 Operand: Op->getOperand(Num: 1));
1742 case Intrinsic::mips_bsel_v:
1743 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1744 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1745 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3),
1746 N3: Op->getOperand(Num: 2));
1747 case Intrinsic::mips_bseli_b:
1748 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1749 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1750 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 3, DAG),
1751 N3: Op->getOperand(Num: 2));
1752 case Intrinsic::mips_bset_b:
1753 case Intrinsic::mips_bset_h:
1754 case Intrinsic::mips_bset_w:
1755 case Intrinsic::mips_bset_d: {
1756 EVT VecTy = Op->getValueType(ResNo: 0);
1757 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
1758
1759 return DAG.getNode(Opcode: ISD::OR, DL, VT: VecTy, N1: Op->getOperand(Num: 1),
1760 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One,
1761 N2: truncateVecElts(Op, DAG)));
1762 }
1763 case Intrinsic::mips_bseti_b:
1764 case Intrinsic::mips_bseti_h:
1765 case Intrinsic::mips_bseti_w:
1766 case Intrinsic::mips_bseti_d:
1767 return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::OR, Imm: Op->getOperand(Num: 2),
1768 BigEndian: !Subtarget.isLittle());
1769 case Intrinsic::mips_bz_b:
1770 case Intrinsic::mips_bz_h:
1771 case Intrinsic::mips_bz_w:
1772 case Intrinsic::mips_bz_d:
1773 return DAG.getNode(Opcode: MipsISD::VALL_ZERO, DL, VT: Op->getValueType(ResNo: 0),
1774 Operand: Op->getOperand(Num: 1));
1775 case Intrinsic::mips_bz_v:
1776 return DAG.getNode(Opcode: MipsISD::VANY_ZERO, DL, VT: Op->getValueType(ResNo: 0),
1777 Operand: Op->getOperand(Num: 1));
1778 case Intrinsic::mips_ceq_b:
1779 case Intrinsic::mips_ceq_h:
1780 case Intrinsic::mips_ceq_w:
1781 case Intrinsic::mips_ceq_d:
1782 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1783 RHS: Op->getOperand(Num: 2), Cond: ISD::SETEQ);
1784 case Intrinsic::mips_ceqi_b:
1785 case Intrinsic::mips_ceqi_h:
1786 case Intrinsic::mips_ceqi_w:
1787 case Intrinsic::mips_ceqi_d:
1788 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1789 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETEQ);
1790 case Intrinsic::mips_cle_s_b:
1791 case Intrinsic::mips_cle_s_h:
1792 case Intrinsic::mips_cle_s_w:
1793 case Intrinsic::mips_cle_s_d:
1794 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1795 RHS: Op->getOperand(Num: 2), Cond: ISD::SETLE);
1796 case Intrinsic::mips_clei_s_b:
1797 case Intrinsic::mips_clei_s_h:
1798 case Intrinsic::mips_clei_s_w:
1799 case Intrinsic::mips_clei_s_d:
1800 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1801 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLE);
1802 case Intrinsic::mips_cle_u_b:
1803 case Intrinsic::mips_cle_u_h:
1804 case Intrinsic::mips_cle_u_w:
1805 case Intrinsic::mips_cle_u_d:
1806 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1807 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE);
1808 case Intrinsic::mips_clei_u_b:
1809 case Intrinsic::mips_clei_u_h:
1810 case Intrinsic::mips_clei_u_w:
1811 case Intrinsic::mips_clei_u_d:
1812 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1813 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULE);
1814 case Intrinsic::mips_clt_s_b:
1815 case Intrinsic::mips_clt_s_h:
1816 case Intrinsic::mips_clt_s_w:
1817 case Intrinsic::mips_clt_s_d:
1818 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1819 RHS: Op->getOperand(Num: 2), Cond: ISD::SETLT);
1820 case Intrinsic::mips_clti_s_b:
1821 case Intrinsic::mips_clti_s_h:
1822 case Intrinsic::mips_clti_s_w:
1823 case Intrinsic::mips_clti_s_d:
1824 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1825 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLT);
1826 case Intrinsic::mips_clt_u_b:
1827 case Intrinsic::mips_clt_u_h:
1828 case Intrinsic::mips_clt_u_w:
1829 case Intrinsic::mips_clt_u_d:
1830 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1831 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT);
1832 case Intrinsic::mips_clti_u_b:
1833 case Intrinsic::mips_clti_u_h:
1834 case Intrinsic::mips_clti_u_w:
1835 case Intrinsic::mips_clti_u_d:
1836 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1837 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULT);
1838 case Intrinsic::mips_copy_s_b:
1839 case Intrinsic::mips_copy_s_h:
1840 case Intrinsic::mips_copy_s_w:
1841 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT);
1842 case Intrinsic::mips_copy_s_d:
1843 if (Subtarget.hasMips64())
1844 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1845 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT);
1846 else {
1847 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1848 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1849 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op),
1850 VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1851 N2: Op->getOperand(Num: 2));
1852 }
1853 case Intrinsic::mips_copy_u_b:
1854 case Intrinsic::mips_copy_u_h:
1855 case Intrinsic::mips_copy_u_w:
1856 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT);
1857 case Intrinsic::mips_copy_u_d:
1858 if (Subtarget.hasMips64())
1859 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1860 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT);
1861 else {
1862 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1863 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1864 // Note: When i64 is illegal, this results in copy_s.w instructions
1865 // instead of copy_u.w instructions. This makes no difference to the
1866 // behaviour since i64 is only illegal when the register file is 32-bit.
1867 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op),
1868 VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1869 N2: Op->getOperand(Num: 2));
1870 }
1871 case Intrinsic::mips_div_s_b:
1872 case Intrinsic::mips_div_s_h:
1873 case Intrinsic::mips_div_s_w:
1874 case Intrinsic::mips_div_s_d:
1875 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1876 N2: Op->getOperand(Num: 2));
1877 case Intrinsic::mips_div_u_b:
1878 case Intrinsic::mips_div_u_h:
1879 case Intrinsic::mips_div_u_w:
1880 case Intrinsic::mips_div_u_d:
1881 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1882 N2: Op->getOperand(Num: 2));
1883 case Intrinsic::mips_fadd_w:
1884 case Intrinsic::mips_fadd_d:
1885 // TODO: If intrinsics have fast-math-flags, propagate them.
1886 return DAG.getNode(Opcode: ISD::FADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1887 N2: Op->getOperand(Num: 2));
1888 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1889 case Intrinsic::mips_fceq_w:
1890 case Intrinsic::mips_fceq_d:
1891 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1892 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOEQ);
1893 case Intrinsic::mips_fcle_w:
1894 case Intrinsic::mips_fcle_d:
1895 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1896 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLE);
1897 case Intrinsic::mips_fclt_w:
1898 case Intrinsic::mips_fclt_d:
1899 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1900 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLT);
1901 case Intrinsic::mips_fcne_w:
1902 case Intrinsic::mips_fcne_d:
1903 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1904 RHS: Op->getOperand(Num: 2), Cond: ISD::SETONE);
1905 case Intrinsic::mips_fcor_w:
1906 case Intrinsic::mips_fcor_d:
1907 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1908 RHS: Op->getOperand(Num: 2), Cond: ISD::SETO);
1909 case Intrinsic::mips_fcueq_w:
1910 case Intrinsic::mips_fcueq_d:
1911 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1912 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUEQ);
1913 case Intrinsic::mips_fcule_w:
1914 case Intrinsic::mips_fcule_d:
1915 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1916 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE);
1917 case Intrinsic::mips_fcult_w:
1918 case Intrinsic::mips_fcult_d:
1919 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1920 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT);
1921 case Intrinsic::mips_fcun_w:
1922 case Intrinsic::mips_fcun_d:
1923 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1924 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUO);
1925 case Intrinsic::mips_fcune_w:
1926 case Intrinsic::mips_fcune_d:
1927 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1928 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUNE);
1929 case Intrinsic::mips_fdiv_w:
1930 case Intrinsic::mips_fdiv_d:
1931 // TODO: If intrinsics have fast-math-flags, propagate them.
1932 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1933 N2: Op->getOperand(Num: 2));
1934 case Intrinsic::mips_ffint_u_w:
1935 case Intrinsic::mips_ffint_u_d:
1936 return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0),
1937 Operand: Op->getOperand(Num: 1));
1938 case Intrinsic::mips_ffint_s_w:
1939 case Intrinsic::mips_ffint_s_d:
1940 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0),
1941 Operand: Op->getOperand(Num: 1));
1942 case Intrinsic::mips_fill_b:
1943 case Intrinsic::mips_fill_h:
1944 case Intrinsic::mips_fill_w:
1945 case Intrinsic::mips_fill_d: {
1946 EVT ResTy = Op->getValueType(ResNo: 0);
1947 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(),
1948 Op->getOperand(Num: 1));
1949
1950 // If ResTy is v2i64 then the type legalizer will break this node down into
1951 // an equivalent v4i32.
1952 return DAG.getBuildVector(VT: ResTy, DL, Ops);
1953 }
1954 case Intrinsic::mips_fexp2_w:
1955 case Intrinsic::mips_fexp2_d: {
1956 // TODO: If intrinsics have fast-math-flags, propagate them.
1957 EVT ResTy = Op->getValueType(ResNo: 0);
1958 return DAG.getNode(
1959 Opcode: ISD::FMUL, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
1960 N2: DAG.getNode(Opcode: ISD::FEXP2, DL: SDLoc(Op), VT: ResTy, Operand: Op->getOperand(Num: 2)));
1961 }
1962 case Intrinsic::mips_flog2_w:
1963 case Intrinsic::mips_flog2_d:
1964 return DAG.getNode(Opcode: ISD::FLOG2, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
1965 case Intrinsic::mips_fmadd_w:
1966 case Intrinsic::mips_fmadd_d:
1967 return DAG.getNode(Opcode: ISD::FMA, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
1968 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
1969 case Intrinsic::mips_fmul_w:
1970 case Intrinsic::mips_fmul_d:
1971 // TODO: If intrinsics have fast-math-flags, propagate them.
1972 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1973 N2: Op->getOperand(Num: 2));
1974 case Intrinsic::mips_fmsub_w:
1975 case Intrinsic::mips_fmsub_d: {
1976 // TODO: If intrinsics have fast-math-flags, propagate them.
1977 return DAG.getNode(Opcode: MipsISD::FMS, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
1978 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
1979 }
1980 case Intrinsic::mips_frint_w:
1981 case Intrinsic::mips_frint_d:
1982 return DAG.getNode(Opcode: ISD::FRINT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
1983 case Intrinsic::mips_fsqrt_w:
1984 case Intrinsic::mips_fsqrt_d:
1985 return DAG.getNode(Opcode: ISD::FSQRT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
1986 case Intrinsic::mips_fsub_w:
1987 case Intrinsic::mips_fsub_d:
1988 // TODO: If intrinsics have fast-math-flags, propagate them.
1989 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1990 N2: Op->getOperand(Num: 2));
1991 case Intrinsic::mips_ftrunc_u_w:
1992 case Intrinsic::mips_ftrunc_u_d:
1993 return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL, VT: Op->getValueType(ResNo: 0),
1994 Operand: Op->getOperand(Num: 1));
1995 case Intrinsic::mips_ftrunc_s_w:
1996 case Intrinsic::mips_ftrunc_s_d:
1997 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL, VT: Op->getValueType(ResNo: 0),
1998 Operand: Op->getOperand(Num: 1));
1999 case Intrinsic::mips_ilvev_b:
2000 case Intrinsic::mips_ilvev_h:
2001 case Intrinsic::mips_ilvev_w:
2002 case Intrinsic::mips_ilvev_d:
2003 return DAG.getNode(Opcode: MipsISD::ILVEV, DL, VT: Op->getValueType(ResNo: 0),
2004 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2005 case Intrinsic::mips_ilvl_b:
2006 case Intrinsic::mips_ilvl_h:
2007 case Intrinsic::mips_ilvl_w:
2008 case Intrinsic::mips_ilvl_d:
2009 return DAG.getNode(Opcode: MipsISD::ILVL, DL, VT: Op->getValueType(ResNo: 0),
2010 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2011 case Intrinsic::mips_ilvod_b:
2012 case Intrinsic::mips_ilvod_h:
2013 case Intrinsic::mips_ilvod_w:
2014 case Intrinsic::mips_ilvod_d:
2015 return DAG.getNode(Opcode: MipsISD::ILVOD, DL, VT: Op->getValueType(ResNo: 0),
2016 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2017 case Intrinsic::mips_ilvr_b:
2018 case Intrinsic::mips_ilvr_h:
2019 case Intrinsic::mips_ilvr_w:
2020 case Intrinsic::mips_ilvr_d:
2021 return DAG.getNode(Opcode: MipsISD::ILVR, DL, VT: Op->getValueType(ResNo: 0),
2022 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2023 case Intrinsic::mips_insert_b:
2024 case Intrinsic::mips_insert_h:
2025 case Intrinsic::mips_insert_w:
2026 case Intrinsic::mips_insert_d:
2027 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2028 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), N3: Op->getOperand(Num: 2));
2029 case Intrinsic::mips_insve_b:
2030 case Intrinsic::mips_insve_h:
2031 case Intrinsic::mips_insve_w:
2032 case Intrinsic::mips_insve_d: {
2033 // Report an error for out of range values.
2034 int64_t Max;
2035 switch (Intrinsic) {
2036 case Intrinsic::mips_insve_b: Max = 15; break;
2037 case Intrinsic::mips_insve_h: Max = 7; break;
2038 case Intrinsic::mips_insve_w: Max = 3; break;
2039 case Intrinsic::mips_insve_d: Max = 1; break;
2040 default: llvm_unreachable("Unmatched intrinsic");
2041 }
2042 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2043 if (Value < 0 || Value > Max)
2044 report_fatal_error(reason: "Immediate out of range");
2045 return DAG.getNode(Opcode: MipsISD::INSVE, DL, VT: Op->getValueType(ResNo: 0),
2046 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3),
2047 N4: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
2048 }
2049 case Intrinsic::mips_ldi_b:
2050 case Intrinsic::mips_ldi_h:
2051 case Intrinsic::mips_ldi_w:
2052 case Intrinsic::mips_ldi_d:
2053 return lowerMSASplatImm(Op, ImmOp: 1, DAG, IsSigned: true);
2054 case Intrinsic::mips_lsa:
2055 case Intrinsic::mips_dlsa: {
2056 EVT ResTy = Op->getValueType(ResNo: 0);
2057 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2058 N2: DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Op), VT: ResTy,
2059 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2060 }
2061 case Intrinsic::mips_maddv_b:
2062 case Intrinsic::mips_maddv_h:
2063 case Intrinsic::mips_maddv_w:
2064 case Intrinsic::mips_maddv_d: {
2065 EVT ResTy = Op->getValueType(ResNo: 0);
2066 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2067 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy,
2068 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2069 }
2070 case Intrinsic::mips_max_s_b:
2071 case Intrinsic::mips_max_s_h:
2072 case Intrinsic::mips_max_s_w:
2073 case Intrinsic::mips_max_s_d:
2074 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0),
2075 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2076 case Intrinsic::mips_max_u_b:
2077 case Intrinsic::mips_max_u_h:
2078 case Intrinsic::mips_max_u_w:
2079 case Intrinsic::mips_max_u_d:
2080 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0),
2081 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2082 case Intrinsic::mips_maxi_s_b:
2083 case Intrinsic::mips_maxi_s_h:
2084 case Intrinsic::mips_maxi_s_w:
2085 case Intrinsic::mips_maxi_s_d:
2086 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0),
2087 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true));
2088 case Intrinsic::mips_maxi_u_b:
2089 case Intrinsic::mips_maxi_u_h:
2090 case Intrinsic::mips_maxi_u_w:
2091 case Intrinsic::mips_maxi_u_d:
2092 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0),
2093 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2094 case Intrinsic::mips_min_s_b:
2095 case Intrinsic::mips_min_s_h:
2096 case Intrinsic::mips_min_s_w:
2097 case Intrinsic::mips_min_s_d:
2098 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0),
2099 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2100 case Intrinsic::mips_min_u_b:
2101 case Intrinsic::mips_min_u_h:
2102 case Intrinsic::mips_min_u_w:
2103 case Intrinsic::mips_min_u_d:
2104 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0),
2105 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2106 case Intrinsic::mips_mini_s_b:
2107 case Intrinsic::mips_mini_s_h:
2108 case Intrinsic::mips_mini_s_w:
2109 case Intrinsic::mips_mini_s_d:
2110 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0),
2111 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true));
2112 case Intrinsic::mips_mini_u_b:
2113 case Intrinsic::mips_mini_u_h:
2114 case Intrinsic::mips_mini_u_w:
2115 case Intrinsic::mips_mini_u_d:
2116 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0),
2117 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2118 case Intrinsic::mips_mod_s_b:
2119 case Intrinsic::mips_mod_s_h:
2120 case Intrinsic::mips_mod_s_w:
2121 case Intrinsic::mips_mod_s_d:
2122 return DAG.getNode(Opcode: ISD::SREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2123 N2: Op->getOperand(Num: 2));
2124 case Intrinsic::mips_mod_u_b:
2125 case Intrinsic::mips_mod_u_h:
2126 case Intrinsic::mips_mod_u_w:
2127 case Intrinsic::mips_mod_u_d:
2128 return DAG.getNode(Opcode: ISD::UREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2129 N2: Op->getOperand(Num: 2));
2130 case Intrinsic::mips_mulv_b:
2131 case Intrinsic::mips_mulv_h:
2132 case Intrinsic::mips_mulv_w:
2133 case Intrinsic::mips_mulv_d:
2134 return DAG.getNode(Opcode: ISD::MUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2135 N2: Op->getOperand(Num: 2));
2136 case Intrinsic::mips_msubv_b:
2137 case Intrinsic::mips_msubv_h:
2138 case Intrinsic::mips_msubv_w:
2139 case Intrinsic::mips_msubv_d: {
2140 EVT ResTy = Op->getValueType(ResNo: 0);
2141 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2142 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy,
2143 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2144 }
2145 case Intrinsic::mips_nlzc_b:
2146 case Intrinsic::mips_nlzc_h:
2147 case Intrinsic::mips_nlzc_w:
2148 case Intrinsic::mips_nlzc_d:
2149 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2150 case Intrinsic::mips_nor_v: {
2151 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2152 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2153 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2154 }
2155 case Intrinsic::mips_nori_b: {
2156 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2157 N1: Op->getOperand(Num: 1),
2158 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2159 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2160 }
2161 case Intrinsic::mips_or_v:
2162 return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2163 N2: Op->getOperand(Num: 2));
2164 case Intrinsic::mips_ori_b:
2165 return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2166 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2167 case Intrinsic::mips_pckev_b:
2168 case Intrinsic::mips_pckev_h:
2169 case Intrinsic::mips_pckev_w:
2170 case Intrinsic::mips_pckev_d:
2171 return DAG.getNode(Opcode: MipsISD::PCKEV, DL, VT: Op->getValueType(ResNo: 0),
2172 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2173 case Intrinsic::mips_pckod_b:
2174 case Intrinsic::mips_pckod_h:
2175 case Intrinsic::mips_pckod_w:
2176 case Intrinsic::mips_pckod_d:
2177 return DAG.getNode(Opcode: MipsISD::PCKOD, DL, VT: Op->getValueType(ResNo: 0),
2178 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2179 case Intrinsic::mips_pcnt_b:
2180 case Intrinsic::mips_pcnt_h:
2181 case Intrinsic::mips_pcnt_w:
2182 case Intrinsic::mips_pcnt_d:
2183 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2184 case Intrinsic::mips_sat_s_b:
2185 case Intrinsic::mips_sat_s_h:
2186 case Intrinsic::mips_sat_s_w:
2187 case Intrinsic::mips_sat_s_d:
2188 case Intrinsic::mips_sat_u_b:
2189 case Intrinsic::mips_sat_u_h:
2190 case Intrinsic::mips_sat_u_w:
2191 case Intrinsic::mips_sat_u_d: {
2192 // Report an error for out of range values.
2193 int64_t Max;
2194 switch (Intrinsic) {
2195 case Intrinsic::mips_sat_s_b:
2196 case Intrinsic::mips_sat_u_b: Max = 7; break;
2197 case Intrinsic::mips_sat_s_h:
2198 case Intrinsic::mips_sat_u_h: Max = 15; break;
2199 case Intrinsic::mips_sat_s_w:
2200 case Intrinsic::mips_sat_u_w: Max = 31; break;
2201 case Intrinsic::mips_sat_s_d:
2202 case Intrinsic::mips_sat_u_d: Max = 63; break;
2203 default: llvm_unreachable("Unmatched intrinsic");
2204 }
2205 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2206 if (Value < 0 || Value > Max)
2207 report_fatal_error(reason: "Immediate out of range");
2208 return SDValue();
2209 }
2210 case Intrinsic::mips_shf_b:
2211 case Intrinsic::mips_shf_h:
2212 case Intrinsic::mips_shf_w: {
2213 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2214 if (Value < 0 || Value > 255)
2215 report_fatal_error(reason: "Immediate out of range");
2216 return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: Op->getValueType(ResNo: 0),
2217 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 1));
2218 }
2219 case Intrinsic::mips_sldi_b:
2220 case Intrinsic::mips_sldi_h:
2221 case Intrinsic::mips_sldi_w:
2222 case Intrinsic::mips_sldi_d: {
2223 // Report an error for out of range values.
2224 int64_t Max;
2225 switch (Intrinsic) {
2226 case Intrinsic::mips_sldi_b: Max = 15; break;
2227 case Intrinsic::mips_sldi_h: Max = 7; break;
2228 case Intrinsic::mips_sldi_w: Max = 3; break;
2229 case Intrinsic::mips_sldi_d: Max = 1; break;
2230 default: llvm_unreachable("Unmatched intrinsic");
2231 }
2232 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 3))->getSExtValue();
2233 if (Value < 0 || Value > Max)
2234 report_fatal_error(reason: "Immediate out of range");
2235 return SDValue();
2236 }
2237 case Intrinsic::mips_sll_b:
2238 case Intrinsic::mips_sll_h:
2239 case Intrinsic::mips_sll_w:
2240 case Intrinsic::mips_sll_d:
2241 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2242 N2: truncateVecElts(Op, DAG));
2243 case Intrinsic::mips_slli_b:
2244 case Intrinsic::mips_slli_h:
2245 case Intrinsic::mips_slli_w:
2246 case Intrinsic::mips_slli_d:
2247 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0),
2248 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2249 case Intrinsic::mips_splat_b:
2250 case Intrinsic::mips_splat_h:
2251 case Intrinsic::mips_splat_w:
2252 case Intrinsic::mips_splat_d:
2253 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2254 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2255 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2256 // Instead we lower to MipsISD::VSHF and match from there.
2257 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2258 N1: lowerMSASplatZExt(Op, OpNr: 2, DAG), N2: Op->getOperand(Num: 1),
2259 N3: Op->getOperand(Num: 1));
2260 case Intrinsic::mips_splati_b:
2261 case Intrinsic::mips_splati_h:
2262 case Intrinsic::mips_splati_w:
2263 case Intrinsic::mips_splati_d:
2264 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2265 N1: lowerMSASplatImm(Op, ImmOp: 2, DAG), N2: Op->getOperand(Num: 1),
2266 N3: Op->getOperand(Num: 1));
2267 case Intrinsic::mips_sra_b:
2268 case Intrinsic::mips_sra_h:
2269 case Intrinsic::mips_sra_w:
2270 case Intrinsic::mips_sra_d:
2271 return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2272 N2: truncateVecElts(Op, DAG));
2273 case Intrinsic::mips_srai_b:
2274 case Intrinsic::mips_srai_h:
2275 case Intrinsic::mips_srai_w:
2276 case Intrinsic::mips_srai_d:
2277 return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0),
2278 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2279 case Intrinsic::mips_srari_b:
2280 case Intrinsic::mips_srari_h:
2281 case Intrinsic::mips_srari_w:
2282 case Intrinsic::mips_srari_d: {
2283 // Report an error for out of range values.
2284 int64_t Max;
2285 switch (Intrinsic) {
2286 case Intrinsic::mips_srari_b: Max = 7; break;
2287 case Intrinsic::mips_srari_h: Max = 15; break;
2288 case Intrinsic::mips_srari_w: Max = 31; break;
2289 case Intrinsic::mips_srari_d: Max = 63; break;
2290 default: llvm_unreachable("Unmatched intrinsic");
2291 }
2292 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2293 if (Value < 0 || Value > Max)
2294 report_fatal_error(reason: "Immediate out of range");
2295 return SDValue();
2296 }
2297 case Intrinsic::mips_srl_b:
2298 case Intrinsic::mips_srl_h:
2299 case Intrinsic::mips_srl_w:
2300 case Intrinsic::mips_srl_d:
2301 return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2302 N2: truncateVecElts(Op, DAG));
2303 case Intrinsic::mips_srli_b:
2304 case Intrinsic::mips_srli_h:
2305 case Intrinsic::mips_srli_w:
2306 case Intrinsic::mips_srli_d:
2307 return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0),
2308 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2309 case Intrinsic::mips_srlri_b:
2310 case Intrinsic::mips_srlri_h:
2311 case Intrinsic::mips_srlri_w:
2312 case Intrinsic::mips_srlri_d: {
2313 // Report an error for out of range values.
2314 int64_t Max;
2315 switch (Intrinsic) {
2316 case Intrinsic::mips_srlri_b: Max = 7; break;
2317 case Intrinsic::mips_srlri_h: Max = 15; break;
2318 case Intrinsic::mips_srlri_w: Max = 31; break;
2319 case Intrinsic::mips_srlri_d: Max = 63; break;
2320 default: llvm_unreachable("Unmatched intrinsic");
2321 }
2322 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2323 if (Value < 0 || Value > Max)
2324 report_fatal_error(reason: "Immediate out of range");
2325 return SDValue();
2326 }
2327 case Intrinsic::mips_subv_b:
2328 case Intrinsic::mips_subv_h:
2329 case Intrinsic::mips_subv_w:
2330 case Intrinsic::mips_subv_d:
2331 return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2332 N2: Op->getOperand(Num: 2));
2333 case Intrinsic::mips_subvi_b:
2334 case Intrinsic::mips_subvi_h:
2335 case Intrinsic::mips_subvi_w:
2336 case Intrinsic::mips_subvi_d:
2337 return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0),
2338 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2339 case Intrinsic::mips_vshf_b:
2340 case Intrinsic::mips_vshf_h:
2341 case Intrinsic::mips_vshf_w:
2342 case Intrinsic::mips_vshf_d:
2343 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2344 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2345 case Intrinsic::mips_xor_v:
2346 return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2347 N2: Op->getOperand(Num: 2));
2348 case Intrinsic::mips_xori_b:
2349 return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0),
2350 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2351 case Intrinsic::thread_pointer: {
2352 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2353 return DAG.getNode(Opcode: MipsISD::ThreadPointer, DL, VT: PtrVT);
2354 }
2355 }
2356}
2357
2358static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2359 const MipsSubtarget &Subtarget) {
2360 SDLoc DL(Op);
2361 SDValue ChainIn = Op->getOperand(Num: 0);
2362 SDValue Address = Op->getOperand(Num: 2);
2363 SDValue Offset = Op->getOperand(Num: 3);
2364 EVT ResTy = Op->getValueType(ResNo: 0);
2365 EVT PtrTy = Address->getValueType(ResNo: 0);
2366
2367 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2368 // however takes an i32 signed constant offset. The actual type of the
2369 // intrinsic is a scaled signed i10.
2370 if (Subtarget.isABI_N64())
2371 Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset);
2372
2373 Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset);
2374 return DAG.getLoad(VT: ResTy, dl: DL, Chain: ChainIn, Ptr: Address, PtrInfo: MachinePointerInfo(),
2375 Alignment: Align(16));
2376}
2377
2378SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2379 SelectionDAG &DAG) const {
2380 unsigned Intr = Op->getConstantOperandVal(Num: 1);
2381 switch (Intr) {
2382 default:
2383 return SDValue();
2384 case Intrinsic::mips_extp:
2385 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTP);
2386 case Intrinsic::mips_extpdp:
2387 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTPDP);
2388 case Intrinsic::mips_extr_w:
2389 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_W);
2390 case Intrinsic::mips_extr_r_w:
2391 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_R_W);
2392 case Intrinsic::mips_extr_rs_w:
2393 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_RS_W);
2394 case Intrinsic::mips_extr_s_h:
2395 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_S_H);
2396 case Intrinsic::mips_mthlip:
2397 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MTHLIP);
2398 case Intrinsic::mips_mulsaq_s_w_ph:
2399 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSAQ_S_W_PH);
2400 case Intrinsic::mips_maq_s_w_phl:
2401 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHL);
2402 case Intrinsic::mips_maq_s_w_phr:
2403 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHR);
2404 case Intrinsic::mips_maq_sa_w_phl:
2405 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHL);
2406 case Intrinsic::mips_maq_sa_w_phr:
2407 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHR);
2408 case Intrinsic::mips_dpaq_s_w_ph:
2409 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_S_W_PH);
2410 case Intrinsic::mips_dpsq_s_w_ph:
2411 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_S_W_PH);
2412 case Intrinsic::mips_dpaq_sa_l_w:
2413 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_SA_L_W);
2414 case Intrinsic::mips_dpsq_sa_l_w:
2415 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_SA_L_W);
2416 case Intrinsic::mips_dpaqx_s_w_ph:
2417 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_S_W_PH);
2418 case Intrinsic::mips_dpaqx_sa_w_ph:
2419 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_SA_W_PH);
2420 case Intrinsic::mips_dpsqx_s_w_ph:
2421 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_S_W_PH);
2422 case Intrinsic::mips_dpsqx_sa_w_ph:
2423 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_SA_W_PH);
2424 case Intrinsic::mips_ld_b:
2425 case Intrinsic::mips_ld_h:
2426 case Intrinsic::mips_ld_w:
2427 case Intrinsic::mips_ld_d:
2428 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2429 }
2430}
2431
2432static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2433 const MipsSubtarget &Subtarget) {
2434 SDLoc DL(Op);
2435 SDValue ChainIn = Op->getOperand(Num: 0);
2436 SDValue Value = Op->getOperand(Num: 2);
2437 SDValue Address = Op->getOperand(Num: 3);
2438 SDValue Offset = Op->getOperand(Num: 4);
2439 EVT PtrTy = Address->getValueType(ResNo: 0);
2440
2441 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2442 // however takes an i32 signed constant offset. The actual type of the
2443 // intrinsic is a scaled signed i10.
2444 if (Subtarget.isABI_N64())
2445 Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset);
2446
2447 Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset);
2448
2449 return DAG.getStore(Chain: ChainIn, dl: DL, Val: Value, Ptr: Address, PtrInfo: MachinePointerInfo(),
2450 Alignment: Align(16));
2451}
2452
2453SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2454 SelectionDAG &DAG) const {
2455 unsigned Intr = Op->getConstantOperandVal(Num: 1);
2456 switch (Intr) {
2457 default:
2458 return SDValue();
2459 case Intrinsic::mips_st_b:
2460 case Intrinsic::mips_st_h:
2461 case Intrinsic::mips_st_w:
2462 case Intrinsic::mips_st_d:
2463 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2464 }
2465}
2466
2467// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2468//
2469// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2470// choose to sign-extend but we could have equally chosen zero-extend. The
2471// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2472// result into this node later (possibly changing it to a zero-extend in the
2473// process).
2474SDValue MipsSETargetLowering::
2475lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2476 SDLoc DL(Op);
2477 EVT ResTy = Op->getValueType(ResNo: 0);
2478 SDValue Op0 = Op->getOperand(Num: 0);
2479 EVT VecTy = Op0->getValueType(ResNo: 0);
2480
2481 if (!VecTy.is128BitVector())
2482 return SDValue();
2483
2484 if (ResTy.isInteger()) {
2485 SDValue Op1 = Op->getOperand(Num: 1);
2486 EVT EltTy = VecTy.getVectorElementType();
2487 return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL, VT: ResTy, N1: Op0, N2: Op1,
2488 N3: DAG.getValueType(EltTy));
2489 }
2490
2491 return Op;
2492}
2493
2494static bool isConstantOrUndef(const SDValue Op) {
2495 if (Op->isUndef())
2496 return true;
2497 if (isa<ConstantSDNode>(Val: Op))
2498 return true;
2499 if (isa<ConstantFPSDNode>(Val: Op))
2500 return true;
2501 return false;
2502}
2503
2504static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2505 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2506 if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
2507 return true;
2508 return false;
2509}
2510
2511// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2512// backend.
2513//
2514// Lowers according to the following rules:
2515// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2516// 2 less than or equal to 64 and the value fits into a signed 10-bit
2517// immediate
2518// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2519// is a power of 2 less than or equal to 64 and the value does not fit into a
2520// signed 10-bit immediate
2521// - Non-constant splats are legal as-is.
2522// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2523// - All others are illegal and must be expanded.
2524SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2525 SelectionDAG &DAG) const {
2526 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
2527 EVT ResTy = Op->getValueType(ResNo: 0);
2528 SDLoc DL(Op);
2529 APInt SplatValue, SplatUndef;
2530 unsigned SplatBitSize;
2531 bool HasAnyUndefs;
2532
2533 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2534 return SDValue();
2535
2536 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2537 HasAnyUndefs, MinSplatBits: 8,
2538 isBigEndian: !Subtarget.isLittle()) && SplatBitSize <= 64) {
2539 // We can only cope with 8, 16, 32, or 64-bit elements
2540 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2541 SplatBitSize != 64)
2542 return SDValue();
2543
2544 // If the value isn't an integer type we will have to bitcast
2545 // from an integer type first. Also, if there are any undefs, we must
2546 // lower them to defined values first.
2547 if (ResTy.isInteger() && !HasAnyUndefs)
2548 return Op;
2549
2550 EVT ViaVecTy;
2551
2552 switch (SplatBitSize) {
2553 default:
2554 return SDValue();
2555 case 8:
2556 ViaVecTy = MVT::v16i8;
2557 break;
2558 case 16:
2559 ViaVecTy = MVT::v8i16;
2560 break;
2561 case 32:
2562 ViaVecTy = MVT::v4i32;
2563 break;
2564 case 64:
2565 // There's no fill.d to fall back on for 64-bit values
2566 return SDValue();
2567 }
2568
2569 // SelectionDAG::getConstant will promote SplatValue appropriately.
2570 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
2571
2572 // Bitcast to the type we originally wanted
2573 if (ViaVecTy != ResTy)
2574 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
2575
2576 return Result;
2577 } else if (DAG.isSplatValue(V: Op, /* AllowUndefs */ false))
2578 return Op;
2579 else if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
2580 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2581 // The resulting code is the same length as the expansion, but it doesn't
2582 // use memory operations
2583 EVT ResTy = Node->getValueType(ResNo: 0);
2584
2585 assert(ResTy.isVector());
2586
2587 unsigned NumElts = ResTy.getVectorNumElements();
2588 SDValue Vector = DAG.getUNDEF(VT: ResTy);
2589 for (unsigned i = 0; i < NumElts; ++i) {
2590 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
2591 N2: Node->getOperand(Num: i),
2592 N3: DAG.getConstant(Val: i, DL, VT: MVT::i32));
2593 }
2594 return Vector;
2595 }
2596
2597 return SDValue();
2598}
2599
2600// Lower VECTOR_SHUFFLE into SHF (if possible).
2601//
2602// SHF splits the vector into blocks of four elements, then shuffles these
2603// elements according to a <4 x i2> constant (encoded as an integer immediate).
2604//
2605// It is therefore possible to lower into SHF when the mask takes the form:
2606// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2607// When undef's appear they are treated as if they were whatever value is
2608// necessary in order to fit the above forms.
2609//
2610// For example:
2611// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2612// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2613// i32 7, i32 6, i32 5, i32 4>
2614// is lowered to:
2615// (SHF_H $w0, $w1, 27)
2616// where the 27 comes from:
2617// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2618static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
2619 SmallVector<int, 16> Indices,
2620 SelectionDAG &DAG) {
2621 int SHFIndices[4] = { -1, -1, -1, -1 };
2622
2623 if (Indices.size() < 4)
2624 return SDValue();
2625
2626 for (unsigned i = 0; i < 4; ++i) {
2627 for (unsigned j = i; j < Indices.size(); j += 4) {
2628 int Idx = Indices[j];
2629
2630 // Convert from vector index to 4-element subvector index
2631 // If an index refers to an element outside of the subvector then give up
2632 if (Idx != -1) {
2633 Idx -= 4 * (j / 4);
2634 if (Idx < 0 || Idx >= 4)
2635 return SDValue();
2636 }
2637
2638 // If the mask has an undef, replace it with the current index.
2639 // Note that it might still be undef if the current index is also undef
2640 if (SHFIndices[i] == -1)
2641 SHFIndices[i] = Idx;
2642
2643 // Check that non-undef values are the same as in the mask. If they
2644 // aren't then give up
2645 if (!(Idx == -1 || Idx == SHFIndices[i]))
2646 return SDValue();
2647 }
2648 }
2649
2650 // Calculate the immediate. Replace any remaining undefs with zero
2651 APInt Imm(32, 0);
2652 for (int i = 3; i >= 0; --i) {
2653 int Idx = SHFIndices[i];
2654
2655 if (Idx == -1)
2656 Idx = 0;
2657
2658 Imm <<= 2;
2659 Imm |= Idx & 0x3;
2660 }
2661
2662 SDLoc DL(Op);
2663 return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: ResTy,
2664 N1: DAG.getTargetConstant(Val: Imm, DL, VT: MVT::i32),
2665 N2: Op->getOperand(Num: 0));
2666}
2667
2668/// Determine whether a range fits a regular pattern of values.
2669/// This function accounts for the possibility of jumping over the End iterator.
2670template <typename ValType>
2671static bool
2672fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
2673 unsigned CheckStride,
2674 typename SmallVectorImpl<ValType>::const_iterator End,
2675 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2676 auto &I = Begin;
2677
2678 while (I != End) {
2679 if (*I != -1 && *I != ExpectedIndex)
2680 return false;
2681 ExpectedIndex += ExpectedIndexStride;
2682
2683 // Incrementing past End is undefined behaviour so we must increment one
2684 // step at a time and check for End at each step.
2685 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2686 ; // Empty loop body.
2687 }
2688 return true;
2689}
2690
2691// Determine whether VECTOR_SHUFFLE is a SPLATI.
2692//
2693// It is a SPLATI when the mask is:
2694// <x, x, x, ...>
2695// where x is any valid index.
2696//
2697// When undef's appear in the mask they are treated as if they were whatever
2698// value is necessary in order to fit the above form.
2699static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
2700 SmallVector<int, 16> Indices,
2701 SelectionDAG &DAG) {
2702 assert((Indices.size() % 2) == 0);
2703
2704 int SplatIndex = -1;
2705 for (const auto &V : Indices) {
2706 if (V != -1) {
2707 SplatIndex = V;
2708 break;
2709 }
2710 }
2711
2712 return fitsRegularPattern<int>(Begin: Indices.begin(), CheckStride: 1, End: Indices.end(), ExpectedIndex: SplatIndex,
2713 ExpectedIndexStride: 0);
2714}
2715
2716// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2717//
2718// ILVEV interleaves the even elements from each vector.
2719//
2720// It is possible to lower into ILVEV when the mask consists of two of the
2721// following forms interleaved:
2722// <0, 2, 4, ...>
2723// <n, n+2, n+4, ...>
2724// where n is the number of elements in the vector.
2725// For example:
2726// <0, 0, 2, 2, 4, 4, ...>
2727// <0, n, 2, n+2, 4, n+4, ...>
2728//
2729// When undef's appear in the mask they are treated as if they were whatever
2730// value is necessary in order to fit the above forms.
2731static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
2732 SmallVector<int, 16> Indices,
2733 SelectionDAG &DAG) {
2734 assert((Indices.size() % 2) == 0);
2735
2736 SDValue Wt;
2737 SDValue Ws;
2738 const auto &Begin = Indices.begin();
2739 const auto &End = Indices.end();
2740
2741 // Check even elements are taken from the even elements of one half or the
2742 // other and pick an operand accordingly.
2743 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2744 Wt = Op->getOperand(Num: 0);
2745 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2746 Wt = Op->getOperand(Num: 1);
2747 else
2748 return SDValue();
2749
2750 // Check odd elements are taken from the even elements of one half or the
2751 // other and pick an operand accordingly.
2752 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2753 Ws = Op->getOperand(Num: 0);
2754 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2755 Ws = Op->getOperand(Num: 1);
2756 else
2757 return SDValue();
2758
2759 return DAG.getNode(Opcode: MipsISD::ILVEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2760}
2761
2762// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2763//
2764// ILVOD interleaves the odd elements from each vector.
2765//
2766// It is possible to lower into ILVOD when the mask consists of two of the
2767// following forms interleaved:
2768// <1, 3, 5, ...>
2769// <n+1, n+3, n+5, ...>
2770// where n is the number of elements in the vector.
2771// For example:
2772// <1, 1, 3, 3, 5, 5, ...>
2773// <1, n+1, 3, n+3, 5, n+5, ...>
2774//
2775// When undef's appear in the mask they are treated as if they were whatever
2776// value is necessary in order to fit the above forms.
2777static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
2778 SmallVector<int, 16> Indices,
2779 SelectionDAG &DAG) {
2780 assert((Indices.size() % 2) == 0);
2781
2782 SDValue Wt;
2783 SDValue Ws;
2784 const auto &Begin = Indices.begin();
2785 const auto &End = Indices.end();
2786
2787 // Check even elements are taken from the odd elements of one half or the
2788 // other and pick an operand accordingly.
2789 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2790 Wt = Op->getOperand(Num: 0);
2791 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2792 Wt = Op->getOperand(Num: 1);
2793 else
2794 return SDValue();
2795
2796 // Check odd elements are taken from the odd elements of one half or the
2797 // other and pick an operand accordingly.
2798 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2799 Ws = Op->getOperand(Num: 0);
2800 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2801 Ws = Op->getOperand(Num: 1);
2802 else
2803 return SDValue();
2804
2805 return DAG.getNode(Opcode: MipsISD::ILVOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2806}
2807
2808// Lower VECTOR_SHUFFLE into ILVR (if possible).
2809//
2810// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2811// each vector.
2812//
2813// It is possible to lower into ILVR when the mask consists of two of the
2814// following forms interleaved:
2815// <0, 1, 2, ...>
2816// <n, n+1, n+2, ...>
2817// where n is the number of elements in the vector.
2818// For example:
2819// <0, 0, 1, 1, 2, 2, ...>
2820// <0, n, 1, n+1, 2, n+2, ...>
2821//
2822// When undef's appear in the mask they are treated as if they were whatever
2823// value is necessary in order to fit the above forms.
2824static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
2825 SmallVector<int, 16> Indices,
2826 SelectionDAG &DAG) {
2827 assert((Indices.size() % 2) == 0);
2828
2829 SDValue Wt;
2830 SDValue Ws;
2831 const auto &Begin = Indices.begin();
2832 const auto &End = Indices.end();
2833
2834 // Check even elements are taken from the right (lowest-indexed) elements of
2835 // one half or the other and pick an operand accordingly.
2836 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2837 Wt = Op->getOperand(Num: 0);
2838 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1))
2839 Wt = Op->getOperand(Num: 1);
2840 else
2841 return SDValue();
2842
2843 // Check odd elements are taken from the right (lowest-indexed) elements of
2844 // one half or the other and pick an operand accordingly.
2845 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2846 Ws = Op->getOperand(Num: 0);
2847 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1))
2848 Ws = Op->getOperand(Num: 1);
2849 else
2850 return SDValue();
2851
2852 return DAG.getNode(Opcode: MipsISD::ILVR, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2853}
2854
2855// Lower VECTOR_SHUFFLE into ILVL (if possible).
2856//
2857// ILVL interleaves consecutive elements from the left (highest-indexed) half
2858// of each vector.
2859//
2860// It is possible to lower into ILVL when the mask consists of two of the
2861// following forms interleaved:
2862// <x, x+1, x+2, ...>
2863// <n+x, n+x+1, n+x+2, ...>
2864// where n is the number of elements in the vector and x is half n.
2865// For example:
2866// <x, x, x+1, x+1, x+2, x+2, ...>
2867// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2868//
2869// When undef's appear in the mask they are treated as if they were whatever
2870// value is necessary in order to fit the above forms.
2871static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
2872 SmallVector<int, 16> Indices,
2873 SelectionDAG &DAG) {
2874 assert((Indices.size() % 2) == 0);
2875
2876 unsigned HalfSize = Indices.size() / 2;
2877 SDValue Wt;
2878 SDValue Ws;
2879 const auto &Begin = Indices.begin();
2880 const auto &End = Indices.end();
2881
2882 // Check even elements are taken from the left (highest-indexed) elements of
2883 // one half or the other and pick an operand accordingly.
2884 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2885 Wt = Op->getOperand(Num: 0);
2886 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, ExpectedIndexStride: 1))
2887 Wt = Op->getOperand(Num: 1);
2888 else
2889 return SDValue();
2890
2891 // Check odd elements are taken from the left (highest-indexed) elements of
2892 // one half or the other and pick an operand accordingly.
2893 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2894 Ws = Op->getOperand(Num: 0);
2895 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize,
2896 ExpectedIndexStride: 1))
2897 Ws = Op->getOperand(Num: 1);
2898 else
2899 return SDValue();
2900
2901 return DAG.getNode(Opcode: MipsISD::ILVL, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2902}
2903
2904// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2905//
2906// PCKEV copies the even elements of each vector into the result vector.
2907//
2908// It is possible to lower into PCKEV when the mask consists of two of the
2909// following forms concatenated:
2910// <0, 2, 4, ...>
2911// <n, n+2, n+4, ...>
2912// where n is the number of elements in the vector.
2913// For example:
2914// <0, 2, 4, ..., 0, 2, 4, ...>
2915// <0, 2, 4, ..., n, n+2, n+4, ...>
2916//
2917// When undef's appear in the mask they are treated as if they were whatever
2918// value is necessary in order to fit the above forms.
2919static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
2920 SmallVector<int, 16> Indices,
2921 SelectionDAG &DAG) {
2922 assert((Indices.size() % 2) == 0);
2923
2924 SDValue Wt;
2925 SDValue Ws;
2926 const auto &Begin = Indices.begin();
2927 const auto &Mid = Indices.begin() + Indices.size() / 2;
2928 const auto &End = Indices.end();
2929
2930 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
2931 Wt = Op->getOperand(Num: 0);
2932 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2933 Wt = Op->getOperand(Num: 1);
2934 else
2935 return SDValue();
2936
2937 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2938 Ws = Op->getOperand(Num: 0);
2939 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2940 Ws = Op->getOperand(Num: 1);
2941 else
2942 return SDValue();
2943
2944 return DAG.getNode(Opcode: MipsISD::PCKEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2945}
2946
2947// Lower VECTOR_SHUFFLE into PCKOD (if possible).
2948//
2949// PCKOD copies the odd elements of each vector into the result vector.
2950//
2951// It is possible to lower into PCKOD when the mask consists of two of the
2952// following forms concatenated:
2953// <1, 3, 5, ...>
2954// <n+1, n+3, n+5, ...>
2955// where n is the number of elements in the vector.
2956// For example:
2957// <1, 3, 5, ..., 1, 3, 5, ...>
2958// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2959//
2960// When undef's appear in the mask they are treated as if they were whatever
2961// value is necessary in order to fit the above forms.
2962static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
2963 SmallVector<int, 16> Indices,
2964 SelectionDAG &DAG) {
2965 assert((Indices.size() % 2) == 0);
2966
2967 SDValue Wt;
2968 SDValue Ws;
2969 const auto &Begin = Indices.begin();
2970 const auto &Mid = Indices.begin() + Indices.size() / 2;
2971 const auto &End = Indices.end();
2972
2973 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
2974 Wt = Op->getOperand(Num: 0);
2975 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2976 Wt = Op->getOperand(Num: 1);
2977 else
2978 return SDValue();
2979
2980 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2981 Ws = Op->getOperand(Num: 0);
2982 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2983 Ws = Op->getOperand(Num: 1);
2984 else
2985 return SDValue();
2986
2987 return DAG.getNode(Opcode: MipsISD::PCKOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2988}
2989
2990// Lower VECTOR_SHUFFLE into VSHF.
2991//
2992// This mostly consists of converting the shuffle indices in Indices into a
2993// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2994// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2995// if the type is v8i16 and all the indices are less than 8 then the second
2996// operand is unused and can be replaced with anything. We choose to replace it
2997// with the used operand since this reduces the number of instructions overall.
2998//
2999// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
3000// UNDEFs as same as SPLATI index.
3001// For other instances we use the last valid index if UNDEF is
3002// encountered.
3003static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
3004 const SmallVector<int, 16> &Indices,
3005 const bool isSPLATI,
3006 SelectionDAG &DAG) {
3007 SmallVector<SDValue, 16> Ops;
3008 SDValue Op0;
3009 SDValue Op1;
3010 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
3011 EVT MaskEltTy = MaskVecTy.getVectorElementType();
3012 bool Using1stVec = false;
3013 bool Using2ndVec = false;
3014 SDLoc DL(Op);
3015 int ResTyNumElts = ResTy.getVectorNumElements();
3016
3017 assert(Indices[0] >= 0 &&
3018 "shuffle mask starts with an UNDEF, which is not expected");
3019
3020 for (int i = 0; i < ResTyNumElts; ++i) {
3021 // Idx == -1 means UNDEF
3022 int Idx = Indices[i];
3023
3024 if (0 <= Idx && Idx < ResTyNumElts)
3025 Using1stVec = true;
3026 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
3027 Using2ndVec = true;
3028 }
3029 int LastValidIndex = 0;
3030 for (size_t i = 0; i < Indices.size(); i++) {
3031 int Idx = Indices[i];
3032 if (Idx < 0) {
3033 // Continue using splati index or use the last valid index.
3034 Idx = isSPLATI ? Indices[0] : LastValidIndex;
3035 } else {
3036 LastValidIndex = Idx;
3037 }
3038 Ops.push_back(Elt: DAG.getTargetConstant(Val: Idx, DL, VT: MaskEltTy));
3039 }
3040
3041 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
3042
3043 if (Using1stVec && Using2ndVec) {
3044 Op0 = Op->getOperand(Num: 0);
3045 Op1 = Op->getOperand(Num: 1);
3046 } else if (Using1stVec)
3047 Op0 = Op1 = Op->getOperand(Num: 0);
3048 else if (Using2ndVec)
3049 Op0 = Op1 = Op->getOperand(Num: 1);
3050 else
3051 llvm_unreachable("shuffle vector mask references neither vector operand?");
3052
3053 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
3054 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
3055 // VSHF concatenates the vectors in a bitwise fashion:
3056 // <0b00, 0b01> + <0b10, 0b11> ->
3057 // 0b0100 + 0b1110 -> 0b01001110
3058 // <0b10, 0b11, 0b00, 0b01>
3059 // We must therefore swap the operands to get the correct result.
3060 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: ResTy, N1: MaskVec, N2: Op1, N3: Op0);
3061}
3062
3063// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
3064// indices in the shuffle.
3065SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3066 SelectionDAG &DAG) const {
3067 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Val&: Op);
3068 EVT ResTy = Op->getValueType(ResNo: 0);
3069
3070 if (!ResTy.is128BitVector())
3071 return SDValue();
3072
3073 int ResTyNumElts = ResTy.getVectorNumElements();
3074 SmallVector<int, 16> Indices;
3075
3076 for (int i = 0; i < ResTyNumElts; ++i)
3077 Indices.push_back(Elt: Node->getMaskElt(Idx: i));
3078
3079 // splati.[bhwd] is preferable to the others but is matched from
3080 // MipsISD::VSHF.
3081 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
3082 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: true, DAG);
3083 SDValue Result;
3084 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3085 return Result;
3086 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3087 return Result;
3088 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3089 return Result;
3090 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3091 return Result;
3092 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3093 return Result;
3094 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3095 return Result;
3096 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3097 return Result;
3098 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: false, DAG);
3099}
3100
3101MachineBasicBlock *
3102MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3103 MachineBasicBlock *BB) const {
3104 // $bb:
3105 // bposge32_pseudo $vr0
3106 // =>
3107 // $bb:
3108 // bposge32 $tbb
3109 // $fbb:
3110 // li $vr2, 0
3111 // b $sink
3112 // $tbb:
3113 // li $vr1, 1
3114 // $sink:
3115 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3116
3117 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3118 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3119 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3120 DebugLoc DL = MI.getDebugLoc();
3121 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3122 MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB));
3123 MachineFunction *F = BB->getParent();
3124 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3125 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3126 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB);
3127 F->insert(MBBI: It, MBB: FBB);
3128 F->insert(MBBI: It, MBB: TBB);
3129 F->insert(MBBI: It, MBB: Sink);
3130
3131 // Transfer the remainder of BB and its successor edges to Sink.
3132 Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)),
3133 To: BB->end());
3134 Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3135
3136 // Add successors.
3137 BB->addSuccessor(Succ: FBB);
3138 BB->addSuccessor(Succ: TBB);
3139 FBB->addSuccessor(Succ: Sink);
3140 TBB->addSuccessor(Succ: Sink);
3141
3142 // Insert the real bposge32 instruction to $BB.
3143 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32)).addMBB(MBB: TBB);
3144 // Insert the real bposge32c instruction to $BB.
3145 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32C_MMR3)).addMBB(MBB: TBB);
3146
3147 // Fill $FBB.
3148 Register VR2 = RegInfo.createVirtualRegister(RegClass: RC);
3149 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR2)
3150 .addReg(RegNo: Mips::ZERO).addImm(Val: 0);
3151 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink);
3152
3153 // Fill $TBB.
3154 Register VR1 = RegInfo.createVirtualRegister(RegClass: RC);
3155 BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR1)
3156 .addReg(RegNo: Mips::ZERO).addImm(Val: 1);
3157
3158 // Insert phi function to $Sink.
3159 BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI),
3160 DestReg: MI.getOperand(i: 0).getReg())
3161 .addReg(RegNo: VR2)
3162 .addMBB(MBB: FBB)
3163 .addReg(RegNo: VR1)
3164 .addMBB(MBB: TBB);
3165
3166 MI.eraseFromParent(); // The pseudo instruction is gone now.
3167 return Sink;
3168}
3169
3170MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3171 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3172 // $bb:
3173 // vany_nonzero $rd, $ws
3174 // =>
3175 // $bb:
3176 // bnz.b $ws, $tbb
3177 // b $fbb
3178 // $fbb:
3179 // li $rd1, 0
3180 // b $sink
3181 // $tbb:
3182 // li $rd2, 1
3183 // $sink:
3184 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3185
3186 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3187 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3188 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3189 DebugLoc DL = MI.getDebugLoc();
3190 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3191 MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB));
3192 MachineFunction *F = BB->getParent();
3193 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3194 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3195 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB);
3196 F->insert(MBBI: It, MBB: FBB);
3197 F->insert(MBBI: It, MBB: TBB);
3198 F->insert(MBBI: It, MBB: Sink);
3199
3200 // Transfer the remainder of BB and its successor edges to Sink.
3201 Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)),
3202 To: BB->end());
3203 Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3204
3205 // Add successors.
3206 BB->addSuccessor(Succ: FBB);
3207 BB->addSuccessor(Succ: TBB);
3208 FBB->addSuccessor(Succ: Sink);
3209 TBB->addSuccessor(Succ: Sink);
3210
3211 // Insert the real bnz.b instruction to $BB.
3212 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: BranchOp))
3213 .addReg(RegNo: MI.getOperand(i: 1).getReg())
3214 .addMBB(MBB: TBB);
3215
3216 // Fill $FBB.
3217 Register RD1 = RegInfo.createVirtualRegister(RegClass: RC);
3218 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD1)
3219 .addReg(RegNo: Mips::ZERO).addImm(Val: 0);
3220 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink);
3221
3222 // Fill $TBB.
3223 Register RD2 = RegInfo.createVirtualRegister(RegClass: RC);
3224 BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD2)
3225 .addReg(RegNo: Mips::ZERO).addImm(Val: 1);
3226
3227 // Insert phi function to $Sink.
3228 BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI),
3229 DestReg: MI.getOperand(i: 0).getReg())
3230 .addReg(RegNo: RD1)
3231 .addMBB(MBB: FBB)
3232 .addReg(RegNo: RD2)
3233 .addMBB(MBB: TBB);
3234
3235 MI.eraseFromParent(); // The pseudo instruction is gone now.
3236 return Sink;
3237}
3238
3239// Emit the COPY_FW pseudo instruction.
3240//
3241// copy_fw_pseudo $fd, $ws, n
3242// =>
3243// copy_u_w $rt, $ws, $n
3244// mtc1 $rt, $fd
3245//
3246// When n is zero, the equivalent operation can be performed with (potentially)
3247// zero instructions due to register overlaps. This optimization is never valid
3248// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3249MachineBasicBlock *
3250MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3251 MachineBasicBlock *BB) const {
3252 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3253 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3254 DebugLoc DL = MI.getDebugLoc();
3255 Register Fd = MI.getOperand(i: 0).getReg();
3256 Register Ws = MI.getOperand(i: 1).getReg();
3257 unsigned Lane = MI.getOperand(i: 2).getImm();
3258
3259 if (Lane == 0) {
3260 unsigned Wt = Ws;
3261 if (!Subtarget.useOddSPReg()) {
3262 // We must copy to an even-numbered MSA register so that the
3263 // single-precision sub-register is also guaranteed to be even-numbered.
3264 Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WEvensRegClass);
3265
3266 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Wt).addReg(RegNo: Ws);
3267 }
3268
3269 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_lo);
3270 } else {
3271 Register Wt = RegInfo.createVirtualRegister(
3272 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3273 : &Mips::MSA128WEvensRegClass);
3274
3275 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: Lane);
3276 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_lo);
3277 }
3278
3279 MI.eraseFromParent(); // The pseudo instruction is gone now.
3280 return BB;
3281}
3282
3283// Emit the COPY_FD pseudo instruction.
3284//
3285// copy_fd_pseudo $fd, $ws, n
3286// =>
3287// splati.d $wt, $ws, $n
3288// copy $fd, $wt:sub_64
3289//
3290// When n is zero, the equivalent operation can be performed with (potentially)
3291// zero instructions due to register overlaps. This optimization is always
3292// valid because FR=1 mode which is the only supported mode in MSA.
3293MachineBasicBlock *
3294MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3295 MachineBasicBlock *BB) const {
3296 assert(Subtarget.isFP64bit());
3297
3298 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3299 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3300 Register Fd = MI.getOperand(i: 0).getReg();
3301 Register Ws = MI.getOperand(i: 1).getReg();
3302 unsigned Lane = MI.getOperand(i: 2).getImm() * 2;
3303 DebugLoc DL = MI.getDebugLoc();
3304
3305 if (Lane == 0)
3306 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Ws, Flags: {}, SubReg: Mips::sub_64);
3307 else {
3308 Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3309
3310 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: 1);
3311 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_64);
3312 }
3313
3314 MI.eraseFromParent(); // The pseudo instruction is gone now.
3315 return BB;
3316}
3317
3318// Emit the INSERT_FW pseudo instruction.
3319//
3320// insert_fw_pseudo $wd, $wd_in, $n, $fs
3321// =>
3322// subreg_to_reg $wt:sub_lo, $fs
3323// insve_w $wd[$n], $wd_in, $wt[0]
3324MachineBasicBlock *
3325MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3326 MachineBasicBlock *BB) const {
3327 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3328 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3329 DebugLoc DL = MI.getDebugLoc();
3330 Register Wd = MI.getOperand(i: 0).getReg();
3331 Register Wd_in = MI.getOperand(i: 1).getReg();
3332 unsigned Lane = MI.getOperand(i: 2).getImm();
3333 Register Fs = MI.getOperand(i: 3).getReg();
3334 Register Wt = RegInfo.createVirtualRegister(
3335 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3336 : &Mips::MSA128WEvensRegClass);
3337
3338 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3339 .addImm(Val: 0)
3340 .addReg(RegNo: Fs)
3341 .addImm(Val: Mips::sub_lo);
3342 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_W), DestReg: Wd)
3343 .addReg(RegNo: Wd_in)
3344 .addImm(Val: Lane)
3345 .addReg(RegNo: Wt)
3346 .addImm(Val: 0);
3347
3348 MI.eraseFromParent(); // The pseudo instruction is gone now.
3349 return BB;
3350}
3351
3352// Emit the INSERT_FD pseudo instruction.
3353//
3354// insert_fd_pseudo $wd, $fs, n
3355// =>
3356// subreg_to_reg $wt:sub_64, $fs
3357// insve_d $wd[$n], $wd_in, $wt[0]
3358MachineBasicBlock *
3359MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3360 MachineBasicBlock *BB) const {
3361 assert(Subtarget.isFP64bit());
3362
3363 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3364 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3365 DebugLoc DL = MI.getDebugLoc();
3366 Register Wd = MI.getOperand(i: 0).getReg();
3367 Register Wd_in = MI.getOperand(i: 1).getReg();
3368 unsigned Lane = MI.getOperand(i: 2).getImm();
3369 Register Fs = MI.getOperand(i: 3).getReg();
3370 Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3371
3372 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3373 .addImm(Val: 0)
3374 .addReg(RegNo: Fs)
3375 .addImm(Val: Mips::sub_64);
3376 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_D), DestReg: Wd)
3377 .addReg(RegNo: Wd_in)
3378 .addImm(Val: Lane)
3379 .addReg(RegNo: Wt)
3380 .addImm(Val: 0);
3381
3382 MI.eraseFromParent(); // The pseudo instruction is gone now.
3383 return BB;
3384}
3385
3386// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3387//
3388// For integer:
3389// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3390// =>
3391// (SLL $lanetmp1, $lane, <log2size)
3392// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3393// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3394// (NEG $lanetmp2, $lanetmp1)
3395// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3396//
3397// For floating point:
3398// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3399// =>
3400// (SUBREG_TO_REG $wt, $fs, <subreg>)
3401// (SLL $lanetmp1, $lane, <log2size)
3402// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3403// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3404// (NEG $lanetmp2, $lanetmp1)
3405// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3406MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3407 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3408 bool IsFP) const {
3409 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3410 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3411 DebugLoc DL = MI.getDebugLoc();
3412 Register Wd = MI.getOperand(i: 0).getReg();
3413 Register SrcVecReg = MI.getOperand(i: 1).getReg();
3414 Register LaneReg = MI.getOperand(i: 2).getReg();
3415 Register SrcValReg = MI.getOperand(i: 3).getReg();
3416
3417 const TargetRegisterClass *VecRC = nullptr;
3418 // FIXME: This should be true for N32 too.
3419 const TargetRegisterClass *GPRRC =
3420 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3421 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3422 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3423 unsigned EltLog2Size;
3424 unsigned InsertOp = 0;
3425 unsigned InsveOp = 0;
3426 switch (EltSizeInBytes) {
3427 default:
3428 llvm_unreachable("Unexpected size");
3429 case 1:
3430 EltLog2Size = 0;
3431 InsertOp = Mips::INSERT_B;
3432 InsveOp = Mips::INSVE_B;
3433 VecRC = &Mips::MSA128BRegClass;
3434 break;
3435 case 2:
3436 EltLog2Size = 1;
3437 InsertOp = Mips::INSERT_H;
3438 InsveOp = Mips::INSVE_H;
3439 VecRC = &Mips::MSA128HRegClass;
3440 break;
3441 case 4:
3442 EltLog2Size = 2;
3443 InsertOp = Mips::INSERT_W;
3444 InsveOp = Mips::INSVE_W;
3445 VecRC = &Mips::MSA128WRegClass;
3446 break;
3447 case 8:
3448 EltLog2Size = 3;
3449 InsertOp = Mips::INSERT_D;
3450 InsveOp = Mips::INSVE_D;
3451 VecRC = &Mips::MSA128DRegClass;
3452 break;
3453 }
3454
3455 if (IsFP) {
3456 Register Wt = RegInfo.createVirtualRegister(RegClass: VecRC);
3457 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3458 .addImm(Val: 0)
3459 .addReg(RegNo: SrcValReg)
3460 .addImm(Val: EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3461 SrcValReg = Wt;
3462 }
3463
3464 // Convert the lane index into a byte index
3465 if (EltSizeInBytes != 1) {
3466 Register LaneTmp1 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3467 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: ShiftOp), DestReg: LaneTmp1)
3468 .addReg(RegNo: LaneReg)
3469 .addImm(Val: EltLog2Size);
3470 LaneReg = LaneTmp1;
3471 }
3472
3473 // Rotate bytes around so that the desired lane is element zero
3474 Register WdTmp1 = RegInfo.createVirtualRegister(RegClass: VecRC);
3475 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: WdTmp1)
3476 .addReg(RegNo: SrcVecReg)
3477 .addReg(RegNo: SrcVecReg)
3478 .addReg(RegNo: LaneReg, Flags: {}, SubReg: SubRegIdx);
3479
3480 Register WdTmp2 = RegInfo.createVirtualRegister(RegClass: VecRC);
3481 if (IsFP) {
3482 // Use insve.df to insert to element zero
3483 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsveOp), DestReg: WdTmp2)
3484 .addReg(RegNo: WdTmp1)
3485 .addImm(Val: 0)
3486 .addReg(RegNo: SrcValReg)
3487 .addImm(Val: 0);
3488 } else {
3489 // Use insert.df to insert to element zero
3490 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsertOp), DestReg: WdTmp2)
3491 .addReg(RegNo: WdTmp1)
3492 .addReg(RegNo: SrcValReg)
3493 .addImm(Val: 0);
3494 }
3495
3496 // Rotate elements the rest of the way for a full rotation.
3497 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3498 // the lane index to do this.
3499 Register LaneTmp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3500 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3501 DestReg: LaneTmp2)
3502 .addReg(RegNo: Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3503 .addReg(RegNo: LaneReg);
3504 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: Wd)
3505 .addReg(RegNo: WdTmp2)
3506 .addReg(RegNo: WdTmp2)
3507 .addReg(RegNo: LaneTmp2, Flags: {}, SubReg: SubRegIdx);
3508
3509 MI.eraseFromParent(); // The pseudo instruction is gone now.
3510 return BB;
3511}
3512
3513// Emit the FILL_FW pseudo instruction.
3514//
3515// fill_fw_pseudo $wd, $fs
3516// =>
3517// implicit_def $wt1
3518// insert_subreg $wt2:subreg_lo, $wt1, $fs
3519// splati.w $wd, $wt2[0]
3520MachineBasicBlock *
3521MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3522 MachineBasicBlock *BB) const {
3523 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3524 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3525 DebugLoc DL = MI.getDebugLoc();
3526 Register Wd = MI.getOperand(i: 0).getReg();
3527 Register Fs = MI.getOperand(i: 1).getReg();
3528 Register Wt1 = RegInfo.createVirtualRegister(
3529 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3530 : &Mips::MSA128WEvensRegClass);
3531 Register Wt2 = RegInfo.createVirtualRegister(
3532 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3533 : &Mips::MSA128WEvensRegClass);
3534
3535 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1);
3536 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2)
3537 .addReg(RegNo: Wt1)
3538 .addReg(RegNo: Fs)
3539 .addImm(Val: Mips::sub_lo);
3540 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0);
3541
3542 MI.eraseFromParent(); // The pseudo instruction is gone now.
3543 return BB;
3544}
3545
3546// Emit the FILL_FD pseudo instruction.
3547//
3548// fill_fd_pseudo $wd, $fs
3549// =>
3550// implicit_def $wt1
3551// insert_subreg $wt2:subreg_64, $wt1, $fs
3552// splati.d $wd, $wt2[0]
3553MachineBasicBlock *
3554MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3555 MachineBasicBlock *BB) const {
3556 assert(Subtarget.isFP64bit());
3557
3558 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3559 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3560 DebugLoc DL = MI.getDebugLoc();
3561 Register Wd = MI.getOperand(i: 0).getReg();
3562 Register Fs = MI.getOperand(i: 1).getReg();
3563 Register Wt1 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3564 Register Wt2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3565
3566 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1);
3567 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2)
3568 .addReg(RegNo: Wt1)
3569 .addReg(RegNo: Fs)
3570 .addImm(Val: Mips::sub_64);
3571 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0);
3572
3573 MI.eraseFromParent(); // The pseudo instruction is gone now.
3574 return BB;
3575}
3576
3577// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3578// register.
3579//
3580// STF16 MSA128F16:$wd, mem_simm10:$addr
3581// =>
3582// copy_u.h $rtemp,$wd[0]
3583// sh $rtemp, $addr
3584//
3585// Safety: We can't use st.h & co as they would over write the memory after
3586// the destination. It would require half floats be allocated 16 bytes(!) of
3587// space.
3588MachineBasicBlock *
3589MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3590 MachineBasicBlock *BB) const {
3591
3592 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3593 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3594 DebugLoc DL = MI.getDebugLoc();
3595 Register Ws = MI.getOperand(i: 0).getReg();
3596 Register Rt = MI.getOperand(i: 1).getReg();
3597 const MachineMemOperand &MMO = **MI.memoperands_begin();
3598 unsigned Imm = MMO.getOffset();
3599
3600 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3601 // spill and reload can expand as a GPR64 operand. Examine the
3602 // operand in detail and default to ABI.
3603 const TargetRegisterClass *RC =
3604 MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg())
3605 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3606 : &Mips::GPR64RegClass);
3607 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3608 Register Rs = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass);
3609
3610 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_U_H), DestReg: Rs).addReg(RegNo: Ws).addImm(Val: 0);
3611 if(!UsingMips32) {
3612 Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR64RegClass);
3613 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Tmp)
3614 .addImm(Val: 0)
3615 .addReg(RegNo: Rs)
3616 .addImm(Val: Mips::sub_32);
3617 Rs = Tmp;
3618 }
3619 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::SH : Mips::SH64))
3620 .addReg(RegNo: Rs)
3621 .addReg(RegNo: Rt)
3622 .addImm(Val: Imm)
3623 .addMemOperand(MMO: BB->getParent()->getMachineMemOperand(
3624 MMO: &MMO, Offset: MMO.getOffset(), Size: MMO.getSize()));
3625
3626 MI.eraseFromParent();
3627 return BB;
3628}
3629
3630// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3631//
3632// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3633// =>
3634// lh $rtemp, $addr
3635// fill.h $wd, $rtemp
3636//
3637// Safety: We can't use ld.h & co as they over-read from the source.
3638// Additionally, if the address is not modulo 16, 2 cases can occur:
3639// a) Segmentation fault as the load instruction reads from a memory page
3640// memory it's not supposed to.
3641// b) The load crosses an implementation specific boundary, requiring OS
3642// intervention.
3643MachineBasicBlock *
3644MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3645 MachineBasicBlock *BB) const {
3646
3647 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3648 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3649 DebugLoc DL = MI.getDebugLoc();
3650 Register Wd = MI.getOperand(i: 0).getReg();
3651
3652 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3653 // spill and reload can expand as a GPR64 operand. Examine the
3654 // operand in detail and default to ABI.
3655 const TargetRegisterClass *RC =
3656 MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg())
3657 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3658 : &Mips::GPR64RegClass);
3659
3660 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3661 Register Rt = RegInfo.createVirtualRegister(RegClass: RC);
3662
3663 MachineInstrBuilder MIB =
3664 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::LH : Mips::LH64), DestReg: Rt);
3665 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands()))
3666 MIB.add(MO);
3667
3668 if(!UsingMips32) {
3669 Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass);
3670 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Tmp)
3671 .addReg(RegNo: Rt, Flags: {}, SubReg: Mips::sub_32);
3672 Rt = Tmp;
3673 }
3674
3675 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FILL_H), DestReg: Wd).addReg(RegNo: Rt);
3676
3677 MI.eraseFromParent();
3678 return BB;
3679}
3680
3681// Emit the FPROUND_PSEUDO instruction.
3682//
3683// Round an FGR64Opnd, FGR32Opnd to an f16.
3684//
3685// Safety: Cycle the operand through the GPRs so the result always ends up
3686// the correct MSA register.
3687//
3688// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3689// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3690// (which they can be, as the MSA registers are defined to alias the
3691// FPU's 64 bit and 32 bit registers) the result can be accessed using
3692// the correct register class. That requires operands be tie-able across
3693// register classes which have a sub/super register class relationship.
3694//
3695// For FPG32Opnd:
3696//
3697// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3698// =>
3699// mfc1 $rtemp, $fs
3700// fill.w $rtemp, $wtemp
3701// fexdo.w $wd, $wtemp, $wtemp
3702//
3703// For FPG64Opnd on mips32r2+:
3704//
3705// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3706// =>
3707// mfc1 $rtemp, $fs
3708// fill.w $rtemp, $wtemp
3709// mfhc1 $rtemp2, $fs
3710// insert.w $wtemp[1], $rtemp2
3711// insert.w $wtemp[3], $rtemp2
3712// fexdo.w $wtemp2, $wtemp, $wtemp
3713// fexdo.h $wd, $temp2, $temp2
3714//
3715// For FGR64Opnd on mips64r2+:
3716//
3717// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3718// =>
3719// dmfc1 $rtemp, $fs
3720// fill.d $rtemp, $wtemp
3721// fexdo.w $wtemp2, $wtemp, $wtemp
3722// fexdo.h $wd, $wtemp2, $wtemp2
3723//
3724// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3725// undef bits are "just right" and the exception enable bits are
3726// set. By using fill.w to replicate $fs into all elements over
3727// insert.w for one element, we avoid that potiential case. If
3728// fexdo.[hw] causes an exception in, the exception is valid and it
3729// occurs for all elements.
3730MachineBasicBlock *
3731MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3732 MachineBasicBlock *BB,
3733 bool IsFGR64) const {
3734
3735 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3736 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3737 // it.
3738 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3739
3740 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3741 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3742
3743 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3744 DebugLoc DL = MI.getDebugLoc();
3745 Register Wd = MI.getOperand(i: 0).getReg();
3746 Register Fs = MI.getOperand(i: 1).getReg();
3747
3748 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3749 Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3750 const TargetRegisterClass *GPRRC =
3751 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3752 unsigned MFC1Opc = IsFGR64onMips64
3753 ? Mips::DMFC1
3754 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3755 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3756
3757 // Perform the register class copy as mentioned above.
3758 Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC);
3759 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MFC1Opc), DestReg: Rtemp).addReg(RegNo: Fs);
3760 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: FILLOpc), DestReg: Wtemp).addReg(RegNo: Rtemp);
3761 unsigned WPHI = Wtemp;
3762
3763 if (IsFGR64onMips32) {
3764 Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3765 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MFHC1_D64), DestReg: Rtemp2).addReg(RegNo: Fs);
3766 Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3767 Register Wtemp3 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3768 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp2)
3769 .addReg(RegNo: Wtemp)
3770 .addReg(RegNo: Rtemp2)
3771 .addImm(Val: 1);
3772 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp3)
3773 .addReg(RegNo: Wtemp2)
3774 .addReg(RegNo: Rtemp2)
3775 .addImm(Val: 3);
3776 WPHI = Wtemp3;
3777 }
3778
3779 if (IsFGR64) {
3780 Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3781 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_W), DestReg: Wtemp2)
3782 .addReg(RegNo: WPHI)
3783 .addReg(RegNo: WPHI);
3784 WPHI = Wtemp2;
3785 }
3786
3787 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_H), DestReg: Wd).addReg(RegNo: WPHI).addReg(RegNo: WPHI);
3788
3789 MI.eraseFromParent();
3790 return BB;
3791}
3792
3793// Emit the FPEXTEND_PSEUDO instruction.
3794//
3795// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3796//
3797// Safety: Cycle the result through the GPRs so the result always ends up
3798// the correct floating point register.
3799//
3800// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3801// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3802// (which they can be, as the MSA registers are defined to alias the
3803// FPU's 64 bit and 32 bit registers) the result can be accessed using
3804// the correct register class. That requires operands be tie-able across
3805// register classes which have a sub/super register class relationship. I
3806// haven't checked.
3807//
3808// For FGR32Opnd:
3809//
3810// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3811// =>
3812// fexupr.w $wtemp, $ws
3813// copy_s.w $rtemp, $ws[0]
3814// mtc1 $rtemp, $fd
3815//
3816// For FGR64Opnd on Mips64:
3817//
3818// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3819// =>
3820// fexupr.w $wtemp, $ws
3821// fexupr.d $wtemp2, $wtemp
3822// copy_s.d $rtemp, $wtemp2s[0]
3823// dmtc1 $rtemp, $fd
3824//
3825// For FGR64Opnd on Mips32:
3826//
3827// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3828// =>
3829// fexupr.w $wtemp, $ws
3830// fexupr.d $wtemp2, $wtemp
3831// copy_s.w $rtemp, $wtemp2[0]
3832// mtc1 $rtemp, $ftemp
3833// copy_s.w $rtemp2, $wtemp2[1]
3834// $fd = mthc1 $rtemp2, $ftemp
3835MachineBasicBlock *
3836MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3837 MachineBasicBlock *BB,
3838 bool IsFGR64) const {
3839
3840 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3841 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3842 // it.
3843 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3844
3845 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3846 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3847
3848 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3849 DebugLoc DL = MI.getDebugLoc();
3850 Register Fd = MI.getOperand(i: 0).getReg();
3851 Register Ws = MI.getOperand(i: 1).getReg();
3852
3853 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3854 const TargetRegisterClass *GPRRC =
3855 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3856 unsigned MTC1Opc = IsFGR64onMips64
3857 ? Mips::DMTC1
3858 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3859 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3860
3861 Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3862 Register WPHI = Wtemp;
3863
3864 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_W), DestReg: Wtemp).addReg(RegNo: Ws);
3865 if (IsFGR64) {
3866 WPHI = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3867 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_D), DestReg: WPHI).addReg(RegNo: Wtemp);
3868 }
3869
3870 // Perform the safety regclass copy mentioned above.
3871 Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC);
3872 Register FPRPHI = IsFGR64onMips32
3873 ? RegInfo.createVirtualRegister(RegClass: &Mips::FGR64RegClass)
3874 : Fd;
3875 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: COPYOpc), DestReg: Rtemp).addReg(RegNo: WPHI).addImm(Val: 0);
3876 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MTC1Opc), DestReg: FPRPHI).addReg(RegNo: Rtemp);
3877
3878 if (IsFGR64onMips32) {
3879 Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3880 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_S_W), DestReg: Rtemp2)
3881 .addReg(RegNo: WPHI)
3882 .addImm(Val: 1);
3883 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MTHC1_D64), DestReg: Fd)
3884 .addReg(RegNo: FPRPHI)
3885 .addReg(RegNo: Rtemp2);
3886 }
3887
3888 MI.eraseFromParent();
3889 return BB;
3890}
3891
3892// Emit the FEXP2_W_1 pseudo instructions.
3893//
3894// fexp2_w_1_pseudo $wd, $wt
3895// =>
3896// ldi.w $ws, 1
3897// fexp2.w $wd, $ws, $wt
3898MachineBasicBlock *
3899MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3900 MachineBasicBlock *BB) const {
3901 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3902 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3903 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3904 Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC);
3905 Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC);
3906 DebugLoc DL = MI.getDebugLoc();
3907
3908 // Splat 1.0 into a vector
3909 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_W), DestReg: Ws1).addImm(Val: 1);
3910 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_W), DestReg: Ws2).addReg(RegNo: Ws1);
3911
3912 // Emit 1.0 * fexp2(Wt)
3913 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_W), DestReg: MI.getOperand(i: 0).getReg())
3914 .addReg(RegNo: Ws2)
3915 .addReg(RegNo: MI.getOperand(i: 1).getReg());
3916
3917 MI.eraseFromParent(); // The pseudo instruction is gone now.
3918 return BB;
3919}
3920
3921// Emit the FEXP2_D_1 pseudo instructions.
3922//
3923// fexp2_d_1_pseudo $wd, $wt
3924// =>
3925// ldi.d $ws, 1
3926// fexp2.d $wd, $ws, $wt
3927MachineBasicBlock *
3928MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3929 MachineBasicBlock *BB) const {
3930 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3931 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3932 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3933 Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC);
3934 Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC);
3935 DebugLoc DL = MI.getDebugLoc();
3936
3937 // Splat 1.0 into a vector
3938 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_D), DestReg: Ws1).addImm(Val: 1);
3939 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_D), DestReg: Ws2).addReg(RegNo: Ws1);
3940
3941 // Emit 1.0 * fexp2(Wt)
3942 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_D), DestReg: MI.getOperand(i: 0).getReg())
3943 .addReg(RegNo: Ws2)
3944 .addReg(RegNo: MI.getOperand(i: 1).getReg());
3945
3946 MI.eraseFromParent(); // The pseudo instruction is gone now.
3947 return BB;
3948}
3949