1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/ISDOpcodes.h"
22#include "llvm/CodeGen/MachineBasicBlock.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineMemOperand.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/CodeGen/TargetInstrInfo.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetSubtargetInfo.h"
33#include "llvm/CodeGen/ValueTypes.h"
34#include "llvm/CodeGenTypes/MachineValueType.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
38#include "llvm/Support/Casting.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/TargetParser/Triple.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(Val: false),
56 cl::desc("Expand double precision loads and "
57 "stores to their single precision "
58 "counterparts"));
59
60// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
61// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
62// INST.h for v16, INST.w for v32, INST.d for v64.
63TargetLoweringBase::LegalizeTypeAction
64MipsSETargetLowering::getPreferredVectorAction(MVT VT) const {
65 if (this->Subtarget.hasMSA()) {
66 switch (VT.SimpleTy) {
67 // Leave v2i1 vectors to be promoted to larger ones.
68 // Other i1 types will be promoted by default.
69 case MVT::v2i1:
70 return TypePromoteInteger;
71 break;
72 // 16-bit vector types (v2 and longer)
73 case MVT::v2i8:
74 // 32-bit vector types (v2 and longer)
75 case MVT::v2i16:
76 case MVT::v4i8:
77 // 64-bit vector types (v2 and longer)
78 case MVT::v2i32:
79 case MVT::v4i16:
80 case MVT::v8i8:
81 return TypeWidenVector;
82 break;
83 // Only word (.w) and doubleword (.d) are available for floating point
84 // vectors. That means floating point vectors should be either v2f64
85 // or v4f32.
86 // Here we only explicitly widen the f32 types - f16 will be promoted
87 // by default.
88 case MVT::v2f32:
89 case MVT::v3f32:
90 return TypeWidenVector;
91 // v2i64 is already 128-bit wide.
92 default:
93 break;
94 }
95 }
96 return TargetLoweringBase::getPreferredVectorAction(VT);
97}
98
99MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
100 const MipsSubtarget &STI)
101 : MipsTargetLowering(TM, STI) {
102 // Set up the register classes
103 addRegisterClass(VT: MVT::i32, RC: &Mips::GPR32RegClass);
104
105 if (Subtarget.isGP64bit())
106 addRegisterClass(VT: MVT::i64, RC: &Mips::GPR64RegClass);
107
108 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
109 // Expand all truncating stores and extending loads.
110 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) {
111 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) {
112 setTruncStoreAction(ValVT: VT0, MemVT: VT1, Action: Expand);
113 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
114 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
115 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT0, MemVT: VT1, Action: Expand);
116 }
117 }
118 }
119
120 if (Subtarget.hasDSP()) {
121 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
122
123 for (const auto &VecTy : VecTys) {
124 addRegisterClass(VT: VecTy, RC: &Mips::DSPRRegClass);
125
126 // Expand all builtin opcodes.
127 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
128 setOperationAction(Op: Opc, VT: VecTy, Action: Expand);
129
130 setOperationAction(Op: ISD::ADD, VT: VecTy, Action: Legal);
131 setOperationAction(Op: ISD::SUB, VT: VecTy, Action: Legal);
132 setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Legal);
133 setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Legal);
134 setOperationAction(Op: ISD::BITCAST, VT: VecTy, Action: Legal);
135 }
136
137 setTargetDAGCombine(
138 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT});
139
140 if (Subtarget.hasMips32r2()) {
141 setOperationAction(Op: ISD::ADDC, VT: MVT::i32, Action: Legal);
142 setOperationAction(Op: ISD::ADDE, VT: MVT::i32, Action: Legal);
143 }
144 }
145
146 if (Subtarget.hasDSPR2())
147 setOperationAction(Op: ISD::MUL, VT: MVT::v2i16, Action: Legal);
148
149 if (Subtarget.hasMSA()) {
150 addMSAIntType(Ty: MVT::v16i8, RC: &Mips::MSA128BRegClass);
151 addMSAIntType(Ty: MVT::v8i16, RC: &Mips::MSA128HRegClass);
152 addMSAIntType(Ty: MVT::v4i32, RC: &Mips::MSA128WRegClass);
153 addMSAIntType(Ty: MVT::v2i64, RC: &Mips::MSA128DRegClass);
154 addMSAFloatType(Ty: MVT::v8f16, RC: &Mips::MSA128HRegClass);
155 addMSAFloatType(Ty: MVT::v4f32, RC: &Mips::MSA128WRegClass);
156 addMSAFloatType(Ty: MVT::v2f64, RC: &Mips::MSA128DRegClass);
157
158 // f16 is a storage-only type, always promote it to f32.
159 addRegisterClass(VT: MVT::f16, RC: &Mips::MSA128HRegClass);
160 setOperationAction(Op: ISD::SETCC, VT: MVT::f16, Action: Promote);
161 setOperationAction(Op: ISD::BR_CC, VT: MVT::f16, Action: Promote);
162 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f16, Action: Promote);
163 setOperationAction(Op: ISD::SELECT, VT: MVT::f16, Action: Promote);
164 setOperationAction(Op: ISD::FADD, VT: MVT::f16, Action: Promote);
165 setOperationAction(Op: ISD::FSUB, VT: MVT::f16, Action: Promote);
166 setOperationAction(Op: ISD::FMUL, VT: MVT::f16, Action: Promote);
167 setOperationAction(Op: ISD::FDIV, VT: MVT::f16, Action: Promote);
168 setOperationAction(Op: ISD::FREM, VT: MVT::f16, Action: Promote);
169 setOperationAction(Op: ISD::FMA, VT: MVT::f16, Action: Promote);
170 setOperationAction(Op: ISD::FNEG, VT: MVT::f16, Action: Promote);
171 setOperationAction(Op: ISD::FABS, VT: MVT::f16, Action: Promote);
172 setOperationAction(Op: ISD::FCEIL, VT: MVT::f16, Action: Promote);
173 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f16, Action: Promote);
174 setOperationAction(Op: ISD::FCOS, VT: MVT::f16, Action: Promote);
175 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f16, Action: Promote);
176 setOperationAction(Op: ISD::FFLOOR, VT: MVT::f16, Action: Promote);
177 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f16, Action: Promote);
178 setOperationAction(Op: ISD::FPOW, VT: MVT::f16, Action: Promote);
179 setOperationAction(Op: ISD::FPOWI, VT: MVT::f16, Action: Promote);
180 setOperationAction(Op: ISD::FRINT, VT: MVT::f16, Action: Promote);
181 setOperationAction(Op: ISD::FSIN, VT: MVT::f16, Action: Promote);
182 setOperationAction(Op: ISD::FSINCOS, VT: MVT::f16, Action: Promote);
183 setOperationAction(Op: ISD::FSQRT, VT: MVT::f16, Action: Promote);
184 setOperationAction(Op: ISD::FEXP, VT: MVT::f16, Action: Promote);
185 setOperationAction(Op: ISD::FEXP2, VT: MVT::f16, Action: Promote);
186 setOperationAction(Op: ISD::FLOG, VT: MVT::f16, Action: Promote);
187 setOperationAction(Op: ISD::FLOG2, VT: MVT::f16, Action: Promote);
188 setOperationAction(Op: ISD::FLOG10, VT: MVT::f16, Action: Promote);
189 setOperationAction(Op: ISD::FROUND, VT: MVT::f16, Action: Promote);
190 setOperationAction(Op: ISD::FTRUNC, VT: MVT::f16, Action: Promote);
191 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f16, Action: Promote);
192 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f16, Action: Promote);
193 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f16, Action: Promote);
194 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f16, Action: Promote);
195
196 setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR});
197 }
198
199 if (!Subtarget.useSoftFloat()) {
200 addRegisterClass(VT: MVT::f32, RC: &Mips::FGR32RegClass);
201
202 // When dealing with single precision only, use libcalls
203 if (!Subtarget.isSingleFloat()) {
204 if (Subtarget.isFP64bit())
205 addRegisterClass(VT: MVT::f64, RC: &Mips::FGR64RegClass);
206 else
207 addRegisterClass(VT: MVT::f64, RC: &Mips::AFGR64RegClass);
208 }
209
210 for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
211 ISD::STRICT_FDIV, ISD::STRICT_FSQRT}) {
212 setOperationAction(Op, VT: MVT::f32, Action: Legal);
213 setOperationAction(Op, VT: MVT::f64, Action: Legal);
214 }
215 }
216
217 // Targets with 64bits integer registers, but no 64bit floating point register
218 // do not support conversion between them
219 if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
220 !Subtarget.useSoftFloat()) {
221 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Expand);
222 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
223 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Expand);
224 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
225 }
226
227 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Custom);
228 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Custom);
229 setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Custom);
230 setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Custom);
231
232 if (Subtarget.hasCnMips())
233 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal);
234 else if (Subtarget.isR5900()) {
235 // R5900 doesn't have DMULT/DMULTU/DDIV/DDIVU - expand to 32-bit ops
236 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Expand);
237 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
238 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
239 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Expand);
240 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Expand);
241 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
242 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
243 } else if (Subtarget.isGP64bit())
244 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Custom);
245
246 if (Subtarget.isGP64bit() && !Subtarget.isR5900()) {
247 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom);
248 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom);
249 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom);
250 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom);
251 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Custom);
252 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Custom);
253 }
254
255 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::i64, Action: Custom);
256 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i64, Action: Custom);
257
258 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Custom);
259 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Custom);
260 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
261 if (Subtarget.hasMips32r6()) {
262 setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Legal);
263 setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Legal);
264 } else {
265 setOperationAction(Op: ISD::LOAD, VT: MVT::i32, Action: Custom);
266 setOperationAction(Op: ISD::STORE, VT: MVT::i32, Action: Custom);
267 }
268
269 setTargetDAGCombine(ISD::MUL);
270
271 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
272 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
273 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
274
275 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
276 !Subtarget.hasMips64()) {
277 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
278 }
279
280 if (NoDPLoadStore) {
281 setOperationAction(Op: ISD::LOAD, VT: MVT::f64, Action: Custom);
282 setOperationAction(Op: ISD::STORE, VT: MVT::f64, Action: Custom);
283 }
284
285 if (Subtarget.hasMips32r6()) {
286 // MIPS32r6 replaces the accumulator-based multiplies with a three register
287 // instruction
288 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
289 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
290 setOperationAction(Op: ISD::MUL, VT: MVT::i32, Action: Legal);
291 setOperationAction(Op: ISD::MULHS, VT: MVT::i32, Action: Legal);
292 setOperationAction(Op: ISD::MULHU, VT: MVT::i32, Action: Legal);
293
294 // MIPS32r6 replaces the accumulator-based division/remainder with separate
295 // three register division and remainder instructions.
296 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
297 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
298 setOperationAction(Op: ISD::SDIV, VT: MVT::i32, Action: Legal);
299 setOperationAction(Op: ISD::UDIV, VT: MVT::i32, Action: Legal);
300 setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
301 setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
302
303 // MIPS32r6 replaces conditional moves with an equivalent that removes the
304 // need for three GPR read ports.
305 setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Legal);
306 setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Legal);
307 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Expand);
308
309 setOperationAction(Op: ISD::SETCC, VT: MVT::f32, Action: Legal);
310 setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Legal);
311 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Expand);
312
313 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
314 setOperationAction(Op: ISD::SETCC, VT: MVT::f64, Action: Legal);
315 setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Legal);
316 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Expand);
317
318 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Legal);
319
320 // Floating point > and >= are supported via < and <=
321 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
322 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand);
323 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f32, Action: Expand);
324 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
325 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
326 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
327 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::f32, Action: Expand);
328 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f32, Action: Expand);
329
330 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
331 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f64, Action: Expand);
332 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::f64, Action: Expand);
333 setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
334 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
335 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
336 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::f64, Action: Expand);
337 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f64, Action: Expand);
338 }
339
340 if (Subtarget.hasMips64r6()) {
341 // MIPS64r6 replaces the accumulator-based multiplies with a three register
342 // instruction
343 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
344 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
345 setOperationAction(Op: ISD::MUL, VT: MVT::i64, Action: Legal);
346 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Legal);
347 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Legal);
348
349 // MIPS32r6 replaces the accumulator-based division/remainder with separate
350 // three register division and remainder instructions.
351 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
352 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
353 setOperationAction(Op: ISD::SDIV, VT: MVT::i64, Action: Legal);
354 setOperationAction(Op: ISD::UDIV, VT: MVT::i64, Action: Legal);
355 setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
356 setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
357
358 // MIPS64r6 replaces conditional moves with an equivalent that removes the
359 // need for three GPR read ports.
360 setOperationAction(Op: ISD::SETCC, VT: MVT::i64, Action: Legal);
361 setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Legal);
362 setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: Expand);
363 }
364
365 if (Subtarget.isR5900()) {
366 // R5900 FPU only supports 4 compare conditions: C.F, C.EQ, C.OLT, C.OLE
367 // (and their inversions via bc1t/bc1f). Expand all conditions that would
368 // require C.UN, C.UEQ, C.ULT, or C.ULE instructions (not available on
369 // R5900). The legalizer resolves these via operand swapping, condition
370 // inversion, and decomposition into supported conditions.
371 setCondCodeAction(CCs: ISD::SETOGT, VT: MVT::f32, Action: Expand);
372 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
373 setCondCodeAction(CCs: ISD::SETGT, VT: MVT::f32, Action: Expand);
374 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::f32, Action: Expand);
375 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
376 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::f32, Action: Expand);
377 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
378 setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
379 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
380 setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
381 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::f32, Action: Expand);
382
383 // R5900 FPU does not support IEEE 754 special values (NaN, infinity). Use
384 // custom lowering to decide per-instruction: hardware when nnan+ninf flags
385 // guarantee no NaN or infinity, software libcall otherwise.
386 setOperationAction(Op: ISD::FADD, VT: MVT::f32, Action: Custom);
387 setOperationAction(Op: ISD::FSUB, VT: MVT::f32, Action: Custom);
388 setOperationAction(Op: ISD::FMUL, VT: MVT::f32, Action: Custom);
389 setOperationAction(Op: ISD::FDIV, VT: MVT::f32, Action: Custom);
390 setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Custom);
391 }
392
393 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
394}
395
396const MipsTargetLowering *
397llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
398 const MipsSubtarget &STI) {
399 return new MipsSETargetLowering(TM, STI);
400}
401
402const TargetRegisterClass *
403MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
404 if (VT == MVT::Untyped)
405 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
406
407 return TargetLowering::getRepRegClassFor(VT);
408}
409
410// Enable MSA support for the given integer type and Register class.
411void MipsSETargetLowering::
412addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
413 addRegisterClass(VT: Ty, RC);
414
415 // Expand all builtin opcodes.
416 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
417 setOperationAction(Op: Opc, VT: Ty, Action: Expand);
418
419 setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal);
420 setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal);
421 setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal);
422 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Custom);
423 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal);
424 setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom);
425 setOperationAction(Op: ISD::UNDEF, VT: Ty, Action: Legal);
426
427 setOperationAction(Op: ISD::ADD, VT: Ty, Action: Legal);
428 setOperationAction(Op: ISD::AND, VT: Ty, Action: Legal);
429 setOperationAction(Op: ISD::CTLZ, VT: Ty, Action: Legal);
430 setOperationAction(Op: ISD::CTPOP, VT: Ty, Action: Legal);
431 setOperationAction(Op: ISD::MUL, VT: Ty, Action: Legal);
432 setOperationAction(Op: ISD::OR, VT: Ty, Action: Legal);
433 setOperationAction(Op: ISD::SDIV, VT: Ty, Action: Legal);
434 setOperationAction(Op: ISD::SREM, VT: Ty, Action: Legal);
435 setOperationAction(Op: ISD::SHL, VT: Ty, Action: Legal);
436 setOperationAction(Op: ISD::SRA, VT: Ty, Action: Legal);
437 setOperationAction(Op: ISD::SRL, VT: Ty, Action: Legal);
438 setOperationAction(Op: ISD::SUB, VT: Ty, Action: Legal);
439 setOperationAction(Op: ISD::SMAX, VT: Ty, Action: Legal);
440 setOperationAction(Op: ISD::SMIN, VT: Ty, Action: Legal);
441 setOperationAction(Op: ISD::UDIV, VT: Ty, Action: Legal);
442 setOperationAction(Op: ISD::UREM, VT: Ty, Action: Legal);
443 setOperationAction(Op: ISD::UMAX, VT: Ty, Action: Legal);
444 setOperationAction(Op: ISD::UMIN, VT: Ty, Action: Legal);
445 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: Ty, Action: Custom);
446 setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal);
447 setOperationAction(Op: ISD::XOR, VT: Ty, Action: Legal);
448
449 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
450 setOperationAction(Op: ISD::FP_TO_SINT, VT: Ty, Action: Legal);
451 setOperationAction(Op: ISD::FP_TO_UINT, VT: Ty, Action: Legal);
452 setOperationAction(Op: ISD::SINT_TO_FP, VT: Ty, Action: Legal);
453 setOperationAction(Op: ISD::UINT_TO_FP, VT: Ty, Action: Legal);
454 }
455
456 setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal);
457 setCondCodeAction(CCs: ISD::SETNE, VT: Ty, Action: Expand);
458 setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand);
459 setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand);
460 setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand);
461 setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand);
462}
463
464// Enable MSA support for the given floating-point type and Register class.
465void MipsSETargetLowering::
466addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
467 addRegisterClass(VT: Ty, RC);
468
469 // Expand all builtin opcodes.
470 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
471 setOperationAction(Op: Opc, VT: Ty, Action: Expand);
472
473 setOperationAction(Op: ISD::LOAD, VT: Ty, Action: Legal);
474 setOperationAction(Op: ISD::STORE, VT: Ty, Action: Legal);
475 setOperationAction(Op: ISD::BITCAST, VT: Ty, Action: Legal);
476 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: Ty, Action: Legal);
477 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: Ty, Action: Legal);
478 setOperationAction(Op: ISD::BUILD_VECTOR, VT: Ty, Action: Custom);
479
480 if (Ty != MVT::v8f16) {
481 setOperationAction(Op: ISD::FABS, VT: Ty, Action: Legal);
482 setOperationAction(Op: ISD::FADD, VT: Ty, Action: Legal);
483 setOperationAction(Op: ISD::FDIV, VT: Ty, Action: Legal);
484 setOperationAction(Op: ISD::FEXP2, VT: Ty, Action: Legal);
485 setOperationAction(Op: ISD::FLOG2, VT: Ty, Action: Legal);
486 setOperationAction(Op: ISD::FMA, VT: Ty, Action: Legal);
487 setOperationAction(Op: ISD::FMUL, VT: Ty, Action: Legal);
488 setOperationAction(Op: ISD::FRINT, VT: Ty, Action: Legal);
489 setOperationAction(Op: ISD::FSQRT, VT: Ty, Action: Legal);
490 setOperationAction(Op: ISD::FSUB, VT: Ty, Action: Legal);
491 setOperationAction(Op: ISD::VSELECT, VT: Ty, Action: Legal);
492
493 setOperationAction(Op: ISD::SETCC, VT: Ty, Action: Legal);
494 setCondCodeAction(CCs: ISD::SETOGE, VT: Ty, Action: Expand);
495 setCondCodeAction(CCs: ISD::SETOGT, VT: Ty, Action: Expand);
496 setCondCodeAction(CCs: ISD::SETUGE, VT: Ty, Action: Expand);
497 setCondCodeAction(CCs: ISD::SETUGT, VT: Ty, Action: Expand);
498 setCondCodeAction(CCs: ISD::SETGE, VT: Ty, Action: Expand);
499 setCondCodeAction(CCs: ISD::SETGT, VT: Ty, Action: Expand);
500 }
501}
502
503SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
504 if(!Subtarget.hasMips32r6())
505 return MipsTargetLowering::LowerOperation(Op, DAG);
506
507 EVT ResTy = Op->getValueType(ResNo: 0);
508 SDLoc DL(Op);
509
510 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
511 // floating point register are undefined. Not really an issue as sel.d, which
512 // is produced from an FSELECT node, only looks at bit 0.
513 SDValue Tmp = DAG.getNode(Opcode: MipsISD::MTC1_D64, DL, VT: MVT::f64, Operand: Op->getOperand(Num: 0));
514 return DAG.getNode(Opcode: MipsISD::FSELECT, DL, VT: ResTy, N1: Tmp, N2: Op->getOperand(Num: 1),
515 N3: Op->getOperand(Num: 2));
516}
517
518bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
519 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
520 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
521
522 if (Subtarget.systemSupportsUnalignedAccess()) {
523 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
524 // implementation defined whether this is handled by hardware, software, or
525 // a hybrid of the two but it's expected that most implementations will
526 // handle the majority of cases in hardware.
527 if (Fast)
528 *Fast = 1;
529 return true;
530 } else if (Subtarget.hasMips32r6()) {
531 return false;
532 }
533
534 switch (SVT) {
535 case MVT::i64:
536 case MVT::i32:
537 if (Fast)
538 *Fast = 1;
539 return true;
540 default:
541 return false;
542 }
543}
544
545SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
546 SelectionDAG &DAG) const {
547 switch(Op.getOpcode()) {
548 case ISD::LOAD: return lowerLOAD(Op, DAG);
549 case ISD::STORE: return lowerSTORE(Op, DAG);
550 case ISD::SMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: true, DAG);
551 case ISD::UMUL_LOHI: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: true, HasHi: true, DAG);
552 case ISD::MULHS: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: false, HasHi: true, DAG);
553 case ISD::MULHU: return lowerMulDiv(Op, NewOpc: MipsISD::Multu, HasLo: false, HasHi: true, DAG);
554 case ISD::MUL: return lowerMulDiv(Op, NewOpc: MipsISD::Mult, HasLo: true, HasHi: false, DAG);
555 case ISD::SDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRem, HasLo: true, HasHi: true, DAG);
556 case ISD::UDIVREM: return lowerMulDiv(Op, NewOpc: MipsISD::DivRemU, HasLo: true, HasHi: true,
557 DAG);
558 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
559 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
560 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
561 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
562 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
563 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
564 case ISD::SELECT: return lowerSELECT(Op, DAG);
565 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
566 case ISD::FADD:
567 return lowerR5900FPOp(Op, DAG, LC: RTLIB::ADD_F32);
568 case ISD::FSUB:
569 return lowerR5900FPOp(Op, DAG, LC: RTLIB::SUB_F32);
570 case ISD::FMUL:
571 return lowerR5900FPOp(Op, DAG, LC: RTLIB::MUL_F32);
572 case ISD::FDIV:
573 return lowerR5900FPOp(Op, DAG, LC: RTLIB::DIV_F32);
574 case ISD::FSQRT:
575 return lowerR5900FPOp(Op, DAG, LC: RTLIB::SQRT_F32);
576 }
577
578 return MipsTargetLowering::LowerOperation(Op, DAG);
579}
580
581SDValue MipsSETargetLowering::lowerR5900FPOp(SDValue Op, SelectionDAG &DAG,
582 RTLIB::Libcall LC) const {
583 assert(Subtarget.isR5900());
584 SDNodeFlags Flags = Op->getFlags();
585
586 if (Flags.hasNoNaNs() && Flags.hasNoInfs()) {
587 // Use the hardware FPU instruction if the operation is guaranteed to have
588 // no NaN or infinity inputs/outputs (nnan+ninf flags).
589 return Op;
590 }
591
592 // Fall back to a software libcall for IEEE correctness.
593 SDLoc DL(Op);
594 MVT VT = Op.getSimpleValueType();
595 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
596 TargetLowering::MakeLibCallOptions CallOptions;
597 auto [Result, Chain] = makeLibCall(DAG, LC, RetVT: VT, Ops, CallOptions, dl: DL);
598 return Result;
599}
600
601// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
602//
603// Performs the following transformations:
604// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
605// sign/zero-extension is completely overwritten by the new one performed by
606// the ISD::AND.
607// - Removes redundant zero extensions performed by an ISD::AND.
608static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
609 TargetLowering::DAGCombinerInfo &DCI,
610 const MipsSubtarget &Subtarget) {
611 if (!Subtarget.hasMSA())
612 return SDValue();
613
614 SDValue Op0 = N->getOperand(Num: 0);
615 SDValue Op1 = N->getOperand(Num: 1);
616 unsigned Op0Opcode = Op0->getOpcode();
617
618 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
619 // where $d + 1 == 2^n and n == 32
620 // or $d + 1 == 2^n and n <= 32 and ZExt
621 // -> (MipsVExtractZExt $a, $b, $c)
622 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
623 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
624 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Val&: Op1);
625
626 if (!Mask)
627 return SDValue();
628
629 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
630
631 if (Log2IfPositive <= 0)
632 return SDValue(); // Mask+1 is not a power of 2
633
634 SDValue Op0Op2 = Op0->getOperand(Num: 2);
635 EVT ExtendTy = cast<VTSDNode>(Val&: Op0Op2)->getVT();
636 unsigned ExtendTySize = ExtendTy.getSizeInBits();
637 unsigned Log2 = Log2IfPositive;
638
639 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
640 Log2 == ExtendTySize) {
641 SDValue Ops[] = { Op0->getOperand(Num: 0), Op0->getOperand(Num: 1), Op0Op2 };
642 return DAG.getNode(Opcode: MipsISD::VEXTRACT_ZEXT_ELT, DL: SDLoc(Op0),
643 VTList: Op0->getVTList(),
644 Ops: ArrayRef(Ops, Op0->getNumOperands()));
645 }
646 }
647
648 return SDValue();
649}
650
651// Determine if the specified node is a constant vector splat.
652//
653// Returns true and sets Imm if:
654// * N is a ISD::BUILD_VECTOR representing a constant splat
655//
656// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
657// differences are that it assumes the MSA has already been checked and the
658// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
659// must not be in order for binsri.d to be selectable).
660static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
661 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(Val: N.getNode());
662
663 if (!Node)
664 return false;
665
666 APInt SplatValue, SplatUndef;
667 unsigned SplatBitSize;
668 bool HasAnyUndefs;
669
670 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
671 MinSplatBits: 8, isBigEndian: !IsLittleEndian))
672 return false;
673
674 Imm = SplatValue;
675
676 return true;
677}
678
679// Test whether the given node is an all-ones build_vector.
680static bool isVectorAllOnes(SDValue N) {
681 // Look through bitcasts. Endianness doesn't matter because we are looking
682 // for an all-ones value.
683 if (N->getOpcode() == ISD::BITCAST)
684 N = N->getOperand(Num: 0);
685
686 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val&: N);
687
688 if (!BVN)
689 return false;
690
691 APInt SplatValue, SplatUndef;
692 unsigned SplatBitSize;
693 bool HasAnyUndefs;
694
695 // Endianness doesn't matter in this context because we are looking for
696 // an all-ones value.
697 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
698 return SplatValue.isAllOnes();
699
700 return false;
701}
702
703// Test whether N is the bitwise inverse of OfNode.
704static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
705 if (N->getOpcode() != ISD::XOR)
706 return false;
707
708 if (isVectorAllOnes(N: N->getOperand(Num: 0)))
709 return N->getOperand(Num: 1) == OfNode;
710
711 if (isVectorAllOnes(N: N->getOperand(Num: 1)))
712 return N->getOperand(Num: 0) == OfNode;
713
714 return false;
715}
716
717// Perform combines where ISD::OR is the root node.
718//
719// Performs the following transformations:
720// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
721// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
722// vector type.
723static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
724 TargetLowering::DAGCombinerInfo &DCI,
725 const MipsSubtarget &Subtarget) {
726 if (!Subtarget.hasMSA())
727 return SDValue();
728
729 EVT Ty = N->getValueType(ResNo: 0);
730
731 if (!Ty.is128BitVector())
732 return SDValue();
733
734 SDValue Op0 = N->getOperand(Num: 0);
735 SDValue Op1 = N->getOperand(Num: 1);
736
737 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
738 SDValue Op0Op0 = Op0->getOperand(Num: 0);
739 SDValue Op0Op1 = Op0->getOperand(Num: 1);
740 SDValue Op1Op0 = Op1->getOperand(Num: 0);
741 SDValue Op1Op1 = Op1->getOperand(Num: 1);
742 bool IsLittleEndian = !Subtarget.isLittle();
743
744 SDValue IfSet, IfClr, Cond;
745 bool IsConstantMask = false;
746 APInt Mask, InvMask;
747
748 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
749 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
750 // looking.
751 // IfClr will be set if we find a valid match.
752 if (isVSplat(N: Op0Op0, Imm&: Mask, IsLittleEndian)) {
753 Cond = Op0Op0;
754 IfSet = Op0Op1;
755
756 if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) &&
757 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
758 IfClr = Op1Op1;
759 else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) &&
760 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
761 IfClr = Op1Op0;
762
763 IsConstantMask = true;
764 }
765
766 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
767 // thing again using this mask.
768 // IfClr will be set if we find a valid match.
769 if (!IfClr.getNode() && isVSplat(N: Op0Op1, Imm&: Mask, IsLittleEndian)) {
770 Cond = Op0Op1;
771 IfSet = Op0Op0;
772
773 if (isVSplat(N: Op1Op0, Imm&: InvMask, IsLittleEndian) &&
774 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
775 IfClr = Op1Op1;
776 else if (isVSplat(N: Op1Op1, Imm&: InvMask, IsLittleEndian) &&
777 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
778 IfClr = Op1Op0;
779
780 IsConstantMask = true;
781 }
782
783 // If IfClr is not yet set, try looking for a non-constant match.
784 // IfClr will be set if we find a valid match amongst the eight
785 // possibilities.
786 if (!IfClr.getNode()) {
787 if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op0)) {
788 Cond = Op1Op0;
789 IfSet = Op1Op1;
790 IfClr = Op0Op1;
791 } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op0)) {
792 Cond = Op1Op0;
793 IfSet = Op1Op1;
794 IfClr = Op0Op0;
795 } else if (isBitwiseInverse(N: Op0Op0, OfNode: Op1Op1)) {
796 Cond = Op1Op1;
797 IfSet = Op1Op0;
798 IfClr = Op0Op1;
799 } else if (isBitwiseInverse(N: Op0Op1, OfNode: Op1Op1)) {
800 Cond = Op1Op1;
801 IfSet = Op1Op0;
802 IfClr = Op0Op0;
803 } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op0)) {
804 Cond = Op0Op0;
805 IfSet = Op0Op1;
806 IfClr = Op1Op1;
807 } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op0)) {
808 Cond = Op0Op0;
809 IfSet = Op0Op1;
810 IfClr = Op1Op0;
811 } else if (isBitwiseInverse(N: Op1Op0, OfNode: Op0Op1)) {
812 Cond = Op0Op1;
813 IfSet = Op0Op0;
814 IfClr = Op1Op1;
815 } else if (isBitwiseInverse(N: Op1Op1, OfNode: Op0Op1)) {
816 Cond = Op0Op1;
817 IfSet = Op0Op0;
818 IfClr = Op1Op0;
819 }
820 }
821
822 // At this point, IfClr will be set if we have a valid match.
823 if (!IfClr.getNode())
824 return SDValue();
825
826 assert(Cond.getNode() && IfSet.getNode());
827
828 // Fold degenerate cases.
829 if (IsConstantMask) {
830 if (Mask.isAllOnes())
831 return IfSet;
832 else if (Mask == 0)
833 return IfClr;
834 }
835
836 // Transform the DAG into an equivalent VSELECT.
837 return DAG.getNode(Opcode: ISD::VSELECT, DL: SDLoc(N), VT: Ty, N1: Cond, N2: IfSet, N3: IfClr);
838 }
839
840 return SDValue();
841}
842
843static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
844 SelectionDAG &DAG,
845 const MipsSubtarget &Subtarget) {
846 // Estimate the number of operations the below transform will turn a
847 // constant multiply into. The number is approximately equal to the minimal
848 // number of powers of two that constant can be broken down to by adding
849 // or subtracting them.
850 //
851 // If we have taken more than 12[1] / 8[2] steps to attempt the
852 // optimization for a native sized value, it is more than likely that this
853 // optimization will make things worse.
854 //
855 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
856 // multiplication requires at least 4 cycles, but another cycle (or two)
857 // to retrieve the result from the HI/LO registers.
858 //
859 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
860 // materialized in 2 instructions, multiplication requires at least 4
861 // cycles, but another cycle (or two) to retrieve the result from the
862 // HI/LO registers.
863 //
864 // TODO:
865 // - MaxSteps needs to consider the `VT` of the constant for the current
866 // target.
867 // - Consider to perform this optimization after type legalization.
868 // That allows to remove a workaround for types not supported natively.
869 // - Take in account `-Os, -Oz` flags because this optimization
870 // increases code size.
871 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
872
873 SmallVector<APInt, 16> WorkStack(1, C);
874 unsigned Steps = 0;
875 unsigned BitWidth = C.getBitWidth();
876
877 while (!WorkStack.empty()) {
878 APInt Val = WorkStack.pop_back_val();
879
880 if (Val == 0 || Val == 1)
881 continue;
882
883 if (Steps >= MaxSteps)
884 return false;
885
886 if (Val.isPowerOf2()) {
887 ++Steps;
888 continue;
889 }
890
891 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
892 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
893 : APInt(BitWidth, 1) << C.ceilLogBase2();
894 if ((Val - Floor).ule(RHS: Ceil - Val)) {
895 WorkStack.push_back(Elt: Floor);
896 WorkStack.push_back(Elt: Val - Floor);
897 } else {
898 WorkStack.push_back(Elt: Ceil);
899 WorkStack.push_back(Elt: Ceil - Val);
900 }
901
902 ++Steps;
903 }
904
905 // If the value being multiplied is not supported natively, we have to pay
906 // an additional legalization cost, conservatively assume an increase in the
907 // cost of 3 instructions per step. This values for this heuristic were
908 // determined experimentally.
909 unsigned RegisterSize = DAG.getTargetLoweringInfo()
910 .getRegisterType(Context&: *DAG.getContext(), VT)
911 .getSizeInBits();
912 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
913 if (Steps > 27)
914 return false;
915
916 return true;
917}
918
919static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
920 EVT ShiftTy, SelectionDAG &DAG) {
921 // Return 0.
922 if (C == 0)
923 return DAG.getConstant(Val: 0, DL, VT);
924
925 // Return x.
926 if (C == 1)
927 return X;
928
929 // If c is power of 2, return (shl x, log2(c)).
930 if (C.isPowerOf2())
931 return DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X,
932 N2: DAG.getConstant(Val: C.logBase2(), DL, VT: ShiftTy));
933
934 unsigned BitWidth = C.getBitWidth();
935 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
936 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
937 APInt(BitWidth, 1) << C.ceilLogBase2();
938
939 // If |c - floor_c| <= |c - ceil_c|,
940 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
941 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
942 if ((C - Floor).ule(RHS: Ceil - C)) {
943 SDValue Op0 = genConstMult(X, C: Floor, DL, VT, ShiftTy, DAG);
944 SDValue Op1 = genConstMult(X, C: C - Floor, DL, VT, ShiftTy, DAG);
945 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
946 }
947
948 // If |c - floor_c| > |c - ceil_c|,
949 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
950 SDValue Op0 = genConstMult(X, C: Ceil, DL, VT, ShiftTy, DAG);
951 SDValue Op1 = genConstMult(X, C: Ceil - C, DL, VT, ShiftTy, DAG);
952 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
953}
954
955static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
956 const TargetLowering::DAGCombinerInfo &DCI,
957 const MipsSETargetLowering *TL,
958 const MipsSubtarget &Subtarget) {
959 EVT VT = N->getValueType(ResNo: 0);
960
961 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)))
962 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
963 C: C->getAPIntValue(), VT, DAG, Subtarget))
964 return genConstMult(X: N->getOperand(Num: 0), C: C->getAPIntValue(), DL: SDLoc(N), VT,
965 ShiftTy: TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
966 DAG);
967
968 return SDValue(N, 0);
969}
970
971static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
972 SelectionDAG &DAG,
973 const MipsSubtarget &Subtarget) {
974 // See if this is a vector splat immediate node.
975 APInt SplatValue, SplatUndef;
976 unsigned SplatBitSize;
977 bool HasAnyUndefs;
978 unsigned EltSize = Ty.getScalarSizeInBits();
979 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: 1));
980
981 if (!Subtarget.hasDSP())
982 return SDValue();
983
984 if (!BV ||
985 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
986 MinSplatBits: EltSize, isBigEndian: !Subtarget.isLittle()) ||
987 (SplatBitSize != EltSize) ||
988 (SplatValue.getZExtValue() >= EltSize))
989 return SDValue();
990
991 SDLoc DL(N);
992 return DAG.getNode(Opcode: Opc, DL, VT: Ty, N1: N->getOperand(Num: 0),
993 N2: DAG.getConstant(Val: SplatValue.getZExtValue(), DL, VT: MVT::i32));
994}
995
996static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
997 TargetLowering::DAGCombinerInfo &DCI,
998 const MipsSubtarget &Subtarget) {
999 EVT Ty = N->getValueType(ResNo: 0);
1000
1001 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1002 return SDValue();
1003
1004 return performDSPShiftCombine(Opc: MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
1005}
1006
1007// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
1008// constant splats into MipsISD::SHRA_DSP for DSPr2.
1009//
1010// Performs the following transformations:
1011// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
1012// sign/zero-extension is completely overwritten by the new one performed by
1013// the ISD::SRA and ISD::SHL nodes.
1014// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
1015// sequence.
1016//
1017// See performDSPShiftCombine for more information about the transformation
1018// used for DSPr2.
1019static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
1020 TargetLowering::DAGCombinerInfo &DCI,
1021 const MipsSubtarget &Subtarget) {
1022 EVT Ty = N->getValueType(ResNo: 0);
1023
1024 if (Subtarget.hasMSA()) {
1025 SDValue Op0 = N->getOperand(Num: 0);
1026 SDValue Op1 = N->getOperand(Num: 1);
1027
1028 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
1029 // where $d + sizeof($c) == 32
1030 // or $d + sizeof($c) <= 32 and SExt
1031 // -> (MipsVExtractSExt $a, $b, $c)
1032 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(Num: 1)) {
1033 SDValue Op0Op0 = Op0->getOperand(Num: 0);
1034 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Val&: Op1);
1035
1036 if (!ShAmount)
1037 return SDValue();
1038
1039 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
1040 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
1041 return SDValue();
1042
1043 EVT ExtendTy = cast<VTSDNode>(Val: Op0Op0->getOperand(Num: 2))->getVT();
1044 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
1045
1046 if (TotalBits == 32 ||
1047 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
1048 TotalBits <= 32)) {
1049 SDValue Ops[] = { Op0Op0->getOperand(Num: 0), Op0Op0->getOperand(Num: 1),
1050 Op0Op0->getOperand(Num: 2) };
1051 return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL: SDLoc(Op0Op0),
1052 VTList: Op0Op0->getVTList(),
1053 Ops: ArrayRef(Ops, Op0Op0->getNumOperands()));
1054 }
1055 }
1056 }
1057
1058 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
1059 return SDValue();
1060
1061 return performDSPShiftCombine(Opc: MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
1062}
1063
1064
1065static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
1066 TargetLowering::DAGCombinerInfo &DCI,
1067 const MipsSubtarget &Subtarget) {
1068 EVT Ty = N->getValueType(ResNo: 0);
1069
1070 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
1071 return SDValue();
1072
1073 return performDSPShiftCombine(Opc: MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
1074}
1075
1076static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
1077 bool IsV216 = (Ty == MVT::v2i16);
1078
1079 switch (CC) {
1080 case ISD::SETEQ:
1081 case ISD::SETNE: return true;
1082 case ISD::SETLT:
1083 case ISD::SETLE:
1084 case ISD::SETGT:
1085 case ISD::SETGE: return IsV216;
1086 case ISD::SETULT:
1087 case ISD::SETULE:
1088 case ISD::SETUGT:
1089 case ISD::SETUGE: return !IsV216;
1090 default: return false;
1091 }
1092}
1093
1094static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
1095 EVT Ty = N->getValueType(ResNo: 0);
1096
1097 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1098 return SDValue();
1099
1100 if (!isLegalDSPCondCode(Ty, CC: cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get()))
1101 return SDValue();
1102
1103 return DAG.getNode(Opcode: MipsISD::SETCC_DSP, DL: SDLoc(N), VT: Ty, N1: N->getOperand(Num: 0),
1104 N2: N->getOperand(Num: 1), N3: N->getOperand(Num: 2));
1105}
1106
1107static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
1108 EVT Ty = N->getValueType(ResNo: 0);
1109
1110 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
1111 SDValue SetCC = N->getOperand(Num: 0);
1112
1113 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
1114 return SDValue();
1115
1116 return DAG.getNode(Opcode: MipsISD::SELECT_CC_DSP, DL: SDLoc(N), VT: Ty,
1117 N1: SetCC.getOperand(i: 0), N2: SetCC.getOperand(i: 1),
1118 N3: N->getOperand(Num: 1), N4: N->getOperand(Num: 2), N5: SetCC.getOperand(i: 2));
1119 }
1120
1121 return SDValue();
1122}
1123
1124static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
1125 const MipsSubtarget &Subtarget) {
1126 EVT Ty = N->getValueType(ResNo: 0);
1127
1128 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1129 // Try the following combines:
1130 // (xor (or $a, $b), (build_vector allones))
1131 // (xor (or $a, $b), (bitcast (build_vector allones)))
1132 SDValue Op0 = N->getOperand(Num: 0);
1133 SDValue Op1 = N->getOperand(Num: 1);
1134 SDValue NotOp;
1135
1136 if (ISD::isBuildVectorAllOnes(N: Op0.getNode()))
1137 NotOp = Op1;
1138 else if (ISD::isBuildVectorAllOnes(N: Op1.getNode()))
1139 NotOp = Op0;
1140 else
1141 return SDValue();
1142
1143 if (NotOp->getOpcode() == ISD::OR)
1144 return DAG.getNode(Opcode: MipsISD::VNOR, DL: SDLoc(N), VT: Ty, N1: NotOp->getOperand(Num: 0),
1145 N2: NotOp->getOperand(Num: 1));
1146 }
1147
1148 return SDValue();
1149}
1150
1151SDValue
1152MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
1153 SelectionDAG &DAG = DCI.DAG;
1154 SDValue Val;
1155
1156 switch (N->getOpcode()) {
1157 case ISD::AND:
1158 Val = performANDCombine(N, DAG, DCI, Subtarget);
1159 break;
1160 case ISD::OR:
1161 Val = performORCombine(N, DAG, DCI, Subtarget);
1162 break;
1163 case ISD::MUL:
1164 return performMULCombine(N, DAG, DCI, TL: this, Subtarget);
1165 case ISD::SHL:
1166 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1167 break;
1168 case ISD::SRA:
1169 return performSRACombine(N, DAG, DCI, Subtarget);
1170 case ISD::SRL:
1171 return performSRLCombine(N, DAG, DCI, Subtarget);
1172 case ISD::VSELECT:
1173 return performVSELECTCombine(N, DAG);
1174 case ISD::XOR:
1175 Val = performXORCombine(N, DAG, Subtarget);
1176 break;
1177 case ISD::SETCC:
1178 Val = performSETCCCombine(N, DAG);
1179 break;
1180 }
1181
1182 if (Val.getNode()) {
1183 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1184 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1185 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1186 return Val;
1187 }
1188
1189 return MipsTargetLowering::PerformDAGCombine(N, DCI);
1190}
1191
1192MachineBasicBlock *
1193MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1194 MachineBasicBlock *BB) const {
1195 switch (MI.getOpcode()) {
1196 default:
1197 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, MBB: BB);
1198 case Mips::BPOSGE32_PSEUDO:
1199 return emitBPOSGE32(MI, BB);
1200 case Mips::SNZ_B_PSEUDO:
1201 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_B);
1202 case Mips::SNZ_H_PSEUDO:
1203 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_H);
1204 case Mips::SNZ_W_PSEUDO:
1205 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_W);
1206 case Mips::SNZ_D_PSEUDO:
1207 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_D);
1208 case Mips::SNZ_V_PSEUDO:
1209 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BNZ_V);
1210 case Mips::SZ_B_PSEUDO:
1211 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_B);
1212 case Mips::SZ_H_PSEUDO:
1213 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_H);
1214 case Mips::SZ_W_PSEUDO:
1215 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_W);
1216 case Mips::SZ_D_PSEUDO:
1217 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_D);
1218 case Mips::SZ_V_PSEUDO:
1219 return emitMSACBranchPseudo(MI, BB, BranchOp: Mips::BZ_V);
1220 case Mips::COPY_FW_PSEUDO:
1221 return emitCOPY_FW(MI, BB);
1222 case Mips::COPY_FD_PSEUDO:
1223 return emitCOPY_FD(MI, BB);
1224 case Mips::INSERT_FW_PSEUDO:
1225 return emitINSERT_FW(MI, BB);
1226 case Mips::INSERT_FD_PSEUDO:
1227 return emitINSERT_FD(MI, BB);
1228 case Mips::INSERT_B_VIDX_PSEUDO:
1229 case Mips::INSERT_B_VIDX64_PSEUDO:
1230 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 1, IsFP: false);
1231 case Mips::INSERT_H_VIDX_PSEUDO:
1232 case Mips::INSERT_H_VIDX64_PSEUDO:
1233 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 2, IsFP: false);
1234 case Mips::INSERT_W_VIDX_PSEUDO:
1235 case Mips::INSERT_W_VIDX64_PSEUDO:
1236 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: false);
1237 case Mips::INSERT_D_VIDX_PSEUDO:
1238 case Mips::INSERT_D_VIDX64_PSEUDO:
1239 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: false);
1240 case Mips::INSERT_FW_VIDX_PSEUDO:
1241 case Mips::INSERT_FW_VIDX64_PSEUDO:
1242 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 4, IsFP: true);
1243 case Mips::INSERT_FD_VIDX_PSEUDO:
1244 case Mips::INSERT_FD_VIDX64_PSEUDO:
1245 return emitINSERT_DF_VIDX(MI, BB, EltSizeInBytes: 8, IsFP: true);
1246 case Mips::FILL_FW_PSEUDO:
1247 return emitFILL_FW(MI, BB);
1248 case Mips::FILL_FD_PSEUDO:
1249 return emitFILL_FD(MI, BB);
1250 case Mips::FEXP2_W_1_PSEUDO:
1251 return emitFEXP2_W_1(MI, BB);
1252 case Mips::FEXP2_D_1_PSEUDO:
1253 return emitFEXP2_D_1(MI, BB);
1254 case Mips::ST_F16:
1255 return emitST_F16_PSEUDO(MI, BB);
1256 case Mips::LD_F16:
1257 return emitLD_F16_PSEUDO(MI, BB);
1258 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1259 return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: false);
1260 case Mips::MSA_FP_ROUND_W_PSEUDO:
1261 return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: false);
1262 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1263 return emitFPEXTEND_PSEUDO(MI, BB, IsFGR64: true);
1264 case Mips::MSA_FP_ROUND_D_PSEUDO:
1265 return emitFPROUND_PSEUDO(MI, BBi: BB, IsFGR64: true);
1266 }
1267}
1268
1269bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1270 const CCState &CCInfo, unsigned NextStackOffset,
1271 const MipsFunctionInfo &FI) const {
1272 // Exception has to be cleared with eret.
1273 if (FI.isISR())
1274 return false;
1275
1276 // Return false if either the callee or caller has a byval argument.
1277 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1278 return false;
1279
1280 // Return true if the callee's argument area is no larger than the caller's.
1281 return NextStackOffset <= FI.getIncomingArgSize();
1282}
1283
1284void MipsSETargetLowering::
1285getOpndList(SmallVectorImpl<SDValue> &Ops,
1286 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1287 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1288 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1289 SDValue Chain) const {
1290 Ops.push_back(Elt: Callee);
1291 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1292 InternalLinkage, IsCallReloc, CLI, Callee,
1293 Chain);
1294}
1295
1296SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1297 LoadSDNode &Nd = *cast<LoadSDNode>(Val&: Op);
1298
1299 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1300 return MipsTargetLowering::lowerLOAD(Op, DAG);
1301
1302 // Replace a double precision load with two i32 loads and a buildpair64.
1303 SDLoc DL(Op);
1304 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1305 EVT PtrVT = Ptr.getValueType();
1306
1307 // i32 load from lower address.
1308 SDValue Lo = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo: MachinePointerInfo(),
1309 Alignment: Nd.getAlign(), MMOFlags: Nd.getMemOperand()->getFlags());
1310
1311 // i32 load from higher address.
1312 Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT));
1313 SDValue Hi = DAG.getLoad(
1314 VT: MVT::i32, dl: DL, Chain: Lo.getValue(R: 1), Ptr, PtrInfo: MachinePointerInfo(),
1315 Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4), MMOFlags: Nd.getMemOperand()->getFlags());
1316
1317 if (!Subtarget.isLittle())
1318 std::swap(a&: Lo, b&: Hi);
1319
1320 SDValue BP = DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
1321 SDValue Ops[2] = {BP, Hi.getValue(R: 1)};
1322 return DAG.getMergeValues(Ops, dl: DL);
1323}
1324
1325SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1326 StoreSDNode &Nd = *cast<StoreSDNode>(Val&: Op);
1327
1328 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1329 return MipsTargetLowering::lowerSTORE(Op, DAG);
1330
1331 // Replace a double precision store with two extractelement64s and i32 stores.
1332 SDLoc DL(Op);
1333 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1334 EVT PtrVT = Ptr.getValueType();
1335 SDValue Lo = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32,
1336 N1: Val, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1337 SDValue Hi = DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32,
1338 N1: Val, N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
1339
1340 if (!Subtarget.isLittle())
1341 std::swap(a&: Lo, b&: Hi);
1342
1343 // i32 store to lower address.
1344 Chain = DAG.getStore(Chain, dl: DL, Val: Lo, Ptr, PtrInfo: MachinePointerInfo(), Alignment: Nd.getAlign(),
1345 MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo());
1346
1347 // i32 store to higher address.
1348 Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Ptr, N2: DAG.getConstant(Val: 4, DL, VT: PtrVT));
1349 return DAG.getStore(Chain, dl: DL, Val: Hi, Ptr, PtrInfo: MachinePointerInfo(),
1350 Alignment: commonAlignment(A: Nd.getAlign(), Offset: 4),
1351 MMOFlags: Nd.getMemOperand()->getFlags(), AAInfo: Nd.getAAInfo());
1352}
1353
1354SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1355 SelectionDAG &DAG) const {
1356 SDLoc DL(Op);
1357 MVT Src = Op.getOperand(i: 0).getValueType().getSimpleVT();
1358 MVT Dest = Op.getValueType().getSimpleVT();
1359
1360 // Bitcast i64 to double.
1361 if (Src == MVT::i64 && Dest == MVT::f64) {
1362 SDValue Lo, Hi;
1363 std::tie(args&: Lo, args&: Hi) =
1364 DAG.SplitScalar(N: Op.getOperand(i: 0), DL, LoVT: MVT::i32, HiVT: MVT::i32);
1365 return DAG.getNode(Opcode: MipsISD::BuildPairF64, DL, VT: MVT::f64, N1: Lo, N2: Hi);
1366 }
1367
1368 // Bitcast double to i64.
1369 if (Src == MVT::f64 && Dest == MVT::i64) {
1370 // Skip lower bitcast when operand0 has converted float results to integer
1371 // which was done by function SoftenFloatResult.
1372 if (getTypeAction(Context&: *DAG.getContext(), VT: Op.getOperand(i: 0).getValueType()) ==
1373 TargetLowering::TypeSoftenFloat)
1374 return SDValue();
1375 SDValue Lo =
1376 DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0),
1377 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1378 SDValue Hi =
1379 DAG.getNode(Opcode: MipsISD::ExtractElementF64, DL, VT: MVT::i32, N1: Op.getOperand(i: 0),
1380 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
1381 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
1382 }
1383
1384 // Skip other cases of bitcast and use default lowering.
1385 return SDValue();
1386}
1387
1388SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1389 bool HasLo, bool HasHi,
1390 SelectionDAG &DAG) const {
1391 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1392 assert(!Subtarget.hasMips32r6());
1393
1394 EVT Ty = Op.getOperand(i: 0).getValueType();
1395 SDLoc DL(Op);
1396 SDValue Mult = DAG.getNode(Opcode: NewOpc, DL, VT: MVT::Untyped,
1397 N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
1398 SDValue Lo, Hi;
1399
1400 if (HasLo)
1401 Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: Ty, Operand: Mult);
1402 if (HasHi)
1403 Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: Ty, Operand: Mult);
1404
1405 if (!HasLo || !HasHi)
1406 return HasLo ? Lo : Hi;
1407
1408 SDValue Vals[] = { Lo, Hi };
1409 return DAG.getMergeValues(Ops: Vals, dl: DL);
1410}
1411
1412static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) {
1413 SDValue InLo, InHi;
1414 std::tie(args&: InLo, args&: InHi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i32, HiVT: MVT::i32);
1415 return DAG.getNode(Opcode: MipsISD::MTLOHI, DL, VT: MVT::Untyped, N1: InLo, N2: InHi);
1416}
1417
1418static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) {
1419 SDValue Lo = DAG.getNode(Opcode: MipsISD::MFLO, DL, VT: MVT::i32, Operand: Op);
1420 SDValue Hi = DAG.getNode(Opcode: MipsISD::MFHI, DL, VT: MVT::i32, Operand: Op);
1421 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i64, N1: Lo, N2: Hi);
1422}
1423
1424// This function expands mips intrinsic nodes which have 64-bit input operands
1425// or output values.
1426//
1427// out64 = intrinsic-node in64
1428// =>
1429// lo = copy (extract-element (in64, 0))
1430// hi = copy (extract-element (in64, 1))
1431// mips-specific-node
1432// v0 = copy lo
1433// v1 = copy hi
1434// out64 = merge-values (v0, v1)
1435//
1436static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1437 SDLoc DL(Op);
1438 bool HasChainIn = Op->getOperand(Num: 0).getValueType() == MVT::Other;
1439 SmallVector<SDValue, 3> Ops;
1440 unsigned OpNo = 0;
1441
1442 // See if Op has a chain input.
1443 if (HasChainIn)
1444 Ops.push_back(Elt: Op->getOperand(Num: OpNo++));
1445
1446 // The next operand is the intrinsic opcode.
1447 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1448
1449 // See if the next operand has type i64.
1450 SDValue Opnd = Op->getOperand(Num: ++OpNo), In64;
1451
1452 if (Opnd.getValueType() == MVT::i64)
1453 In64 = initAccumulator(In: Opnd, DL, DAG);
1454 else
1455 Ops.push_back(Elt: Opnd);
1456
1457 // Push the remaining operands.
1458 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1459 Ops.push_back(Elt: Op->getOperand(Num: OpNo));
1460
1461 // Add In64 to the end of the list.
1462 if (In64.getNode())
1463 Ops.push_back(Elt: In64);
1464
1465 // Scan output.
1466 SmallVector<EVT, 2> ResTys;
1467
1468 for (EVT Ty : Op->values())
1469 ResTys.push_back(Elt: (Ty == MVT::i64) ? MVT::Untyped : Ty);
1470
1471 // Create node.
1472 SDValue Val = DAG.getNode(Opcode: Opc, DL, ResultTys: ResTys, Ops);
1473 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Op: Val, DL, DAG) : Val;
1474
1475 if (!HasChainIn)
1476 return Out;
1477
1478 assert(Val->getValueType(1) == MVT::Other);
1479 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1480 return DAG.getMergeValues(Ops: Vals, dl: DL);
1481}
1482
1483// Lower an MSA copy intrinsic into the specified SelectionDAG node
1484static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1485 SDLoc DL(Op);
1486 SDValue Vec = Op->getOperand(Num: 1);
1487 SDValue Idx = Op->getOperand(Num: 2);
1488 EVT ResTy = Op->getValueType(ResNo: 0);
1489 EVT EltTy = Vec->getValueType(ResNo: 0).getVectorElementType();
1490
1491 SDValue Result = DAG.getNode(Opcode: Opc, DL, VT: ResTy, N1: Vec, N2: Idx,
1492 N3: DAG.getValueType(EltTy));
1493
1494 return Result;
1495}
1496
1497static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1498 EVT ResVecTy = Op->getValueType(ResNo: 0);
1499 EVT ViaVecTy = ResVecTy;
1500 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1501 SDLoc DL(Op);
1502
1503 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1504 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1505 // lanes.
1506 SDValue LaneA = Op->getOperand(Num: OpNr);
1507 SDValue LaneB;
1508
1509 if (ResVecTy == MVT::v2i64) {
1510 // In case of the index being passed as an immediate value, set the upper
1511 // lane to 0 so that the splati.d instruction can be matched.
1512 if (isa<ConstantSDNode>(Val: LaneA))
1513 LaneB = DAG.getConstant(Val: 0, DL, VT: MVT::i32);
1514 // Having the index passed in a register, set the upper lane to the same
1515 // value as the lower - this results in the BUILD_VECTOR node not being
1516 // expanded through stack. This way we are able to pattern match the set of
1517 // nodes created here to splat.d.
1518 else
1519 LaneB = LaneA;
1520 ViaVecTy = MVT::v4i32;
1521 if(BigEndian)
1522 std::swap(a&: LaneA, b&: LaneB);
1523 } else
1524 LaneB = LaneA;
1525
1526 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1527 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1528
1529 SDValue Result = DAG.getBuildVector(
1530 VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1531
1532 if (ViaVecTy != ResVecTy) {
1533 SDValue One = DAG.getConstant(Val: 1, DL, VT: ViaVecTy);
1534 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVecTy,
1535 Operand: DAG.getNode(Opcode: ISD::AND, DL, VT: ViaVecTy, N1: Result, N2: One));
1536 }
1537
1538 return Result;
1539}
1540
1541static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1542 bool IsSigned = false) {
1543 auto *CImm = cast<ConstantSDNode>(Val: Op->getOperand(Num: ImmOp));
1544 return DAG.getConstant(
1545 Val: APInt(Op->getValueType(ResNo: 0).getScalarType().getSizeInBits(),
1546 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1547 DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0));
1548}
1549
1550static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1551 bool BigEndian, SelectionDAG &DAG) {
1552 EVT ViaVecTy = VecTy;
1553 SDValue SplatValueA = SplatValue;
1554 SDValue SplatValueB = SplatValue;
1555 SDLoc DL(SplatValue);
1556
1557 if (VecTy == MVT::v2i64) {
1558 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1559 ViaVecTy = MVT::v4i32;
1560
1561 SplatValueA = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValue);
1562 SplatValueB = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SplatValue,
1563 N2: DAG.getConstant(Val: 32, DL, VT: MVT::i32));
1564 SplatValueB = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: SplatValueB);
1565 }
1566
1567 // We currently hold the parts in little endian order. Swap them if
1568 // necessary.
1569 if (BigEndian)
1570 std::swap(a&: SplatValueA, b&: SplatValueB);
1571
1572 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1573 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1574 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1575 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1576
1577 SDValue Result = DAG.getBuildVector(
1578 VT: ViaVecTy, DL, Ops: ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1579
1580 if (VecTy != ViaVecTy)
1581 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecTy, Operand: Result);
1582
1583 return Result;
1584}
1585
1586static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
1587 unsigned Opc, SDValue Imm,
1588 bool BigEndian) {
1589 EVT VecTy = Op->getValueType(ResNo: 0);
1590 SDValue Exp2Imm;
1591 SDLoc DL(Op);
1592
1593 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1594 // here for now.
1595 if (VecTy == MVT::v2i64) {
1596 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Val&: Imm)) {
1597 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1598
1599 SDValue BitImmHiOp = DAG.getConstant(Val: BitImm.lshr(shiftAmt: 32).trunc(width: 32), DL,
1600 VT: MVT::i32);
1601 SDValue BitImmLoOp = DAG.getConstant(Val: BitImm.trunc(width: 32), DL, VT: MVT::i32);
1602
1603 if (BigEndian)
1604 std::swap(a&: BitImmLoOp, b&: BitImmHiOp);
1605
1606 Exp2Imm = DAG.getNode(
1607 Opcode: ISD::BITCAST, DL, VT: MVT::v2i64,
1608 Operand: DAG.getBuildVector(VT: MVT::v4i32, DL,
1609 Ops: {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1610 }
1611 }
1612
1613 if (!Exp2Imm.getNode()) {
1614 // We couldnt constant fold, do a vector shift instead
1615
1616 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1617 // only values 0-63 are valid.
1618 if (VecTy == MVT::v2i64)
1619 Imm = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Imm);
1620
1621 Exp2Imm = getBuildVectorSplat(VecTy, SplatValue: Imm, BigEndian, DAG);
1622
1623 Exp2Imm = DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: DAG.getConstant(Val: 1, DL, VT: VecTy),
1624 N2: Exp2Imm);
1625 }
1626
1627 return DAG.getNode(Opcode: Opc, DL, VT: VecTy, N1: Op->getOperand(Num: 1), N2: Exp2Imm);
1628}
1629
1630static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) {
1631 SDLoc DL(Op);
1632 EVT ResTy = Op->getValueType(ResNo: 0);
1633 SDValue Vec = Op->getOperand(Num: 2);
1634 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1635 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1636 SDValue ConstValue = DAG.getConstant(Val: Vec.getScalarValueSizeInBits() - 1,
1637 DL, VT: ResEltTy);
1638 SDValue SplatVec = getBuildVectorSplat(VecTy: ResTy, SplatValue: ConstValue, BigEndian, DAG);
1639
1640 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Vec, N2: SplatVec);
1641}
1642
1643static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
1644 EVT ResTy = Op->getValueType(ResNo: 0);
1645 SDLoc DL(Op);
1646 SDValue One = DAG.getConstant(Val: 1, DL, VT: ResTy);
1647 SDValue Bit = DAG.getNode(Opcode: ISD::SHL, DL, VT: ResTy, N1: One, N2: truncateVecElts(Op, DAG));
1648
1649 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1),
1650 N2: DAG.getNOT(DL, Val: Bit, VT: ResTy));
1651}
1652
1653static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
1654 SDLoc DL(Op);
1655 EVT ResTy = Op->getValueType(ResNo: 0);
1656 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1657 << Op->getConstantOperandAPInt(Num: 2);
1658 SDValue BitMask = DAG.getConstant(Val: ~BitImm, DL, VT: ResTy);
1659
1660 return DAG.getNode(Opcode: ISD::AND, DL, VT: ResTy, N1: Op->getOperand(Num: 1), N2: BitMask);
1661}
1662
1663SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1664 SelectionDAG &DAG) const {
1665 SDLoc DL(Op);
1666 unsigned Intrinsic = Op->getConstantOperandVal(Num: 0);
1667 switch (Intrinsic) {
1668 default:
1669 return SDValue();
1670 case Intrinsic::mips_shilo:
1671 return lowerDSPIntr(Op, DAG, Opc: MipsISD::SHILO);
1672 case Intrinsic::mips_dpau_h_qbl:
1673 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBL);
1674 case Intrinsic::mips_dpau_h_qbr:
1675 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAU_H_QBR);
1676 case Intrinsic::mips_dpsu_h_qbl:
1677 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBL);
1678 case Intrinsic::mips_dpsu_h_qbr:
1679 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSU_H_QBR);
1680 case Intrinsic::mips_dpa_w_ph:
1681 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPA_W_PH);
1682 case Intrinsic::mips_dps_w_ph:
1683 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPS_W_PH);
1684 case Intrinsic::mips_dpax_w_ph:
1685 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAX_W_PH);
1686 case Intrinsic::mips_dpsx_w_ph:
1687 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSX_W_PH);
1688 case Intrinsic::mips_mulsa_w_ph:
1689 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSA_W_PH);
1690 case Intrinsic::mips_mult:
1691 return lowerDSPIntr(Op, DAG, Opc: MipsISD::Mult);
1692 case Intrinsic::mips_multu:
1693 return lowerDSPIntr(Op, DAG, Opc: MipsISD::Multu);
1694 case Intrinsic::mips_madd:
1695 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAdd);
1696 case Intrinsic::mips_maddu:
1697 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAddu);
1698 case Intrinsic::mips_msub:
1699 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSub);
1700 case Intrinsic::mips_msubu:
1701 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MSubu);
1702 case Intrinsic::mips_addv_b:
1703 case Intrinsic::mips_addv_h:
1704 case Intrinsic::mips_addv_w:
1705 case Intrinsic::mips_addv_d:
1706 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1707 N2: Op->getOperand(Num: 2));
1708 case Intrinsic::mips_addvi_b:
1709 case Intrinsic::mips_addvi_h:
1710 case Intrinsic::mips_addvi_w:
1711 case Intrinsic::mips_addvi_d:
1712 return DAG.getNode(Opcode: ISD::ADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1713 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
1714 case Intrinsic::mips_and_v:
1715 return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1716 N2: Op->getOperand(Num: 2));
1717 case Intrinsic::mips_andi_b:
1718 return DAG.getNode(Opcode: ISD::AND, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1719 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
1720 case Intrinsic::mips_bclr_b:
1721 case Intrinsic::mips_bclr_h:
1722 case Intrinsic::mips_bclr_w:
1723 case Intrinsic::mips_bclr_d:
1724 return lowerMSABitClear(Op, DAG);
1725 case Intrinsic::mips_bclri_b:
1726 case Intrinsic::mips_bclri_h:
1727 case Intrinsic::mips_bclri_w:
1728 case Intrinsic::mips_bclri_d:
1729 return lowerMSABitClearImm(Op, DAG);
1730 case Intrinsic::mips_binsli_b:
1731 case Intrinsic::mips_binsli_h:
1732 case Intrinsic::mips_binsli_w:
1733 case Intrinsic::mips_binsli_d: {
1734 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1735 EVT VecTy = Op->getValueType(ResNo: 0);
1736 EVT EltTy = VecTy.getVectorElementType();
1737 if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits())
1738 report_fatal_error(reason: "Immediate out of range");
1739 APInt Mask = APInt::getHighBitsSet(numBits: EltTy.getSizeInBits(),
1740 hiBitsSet: Op->getConstantOperandVal(Num: 3) + 1);
1741 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy,
1742 N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true),
1743 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1744 }
1745 case Intrinsic::mips_binsri_b:
1746 case Intrinsic::mips_binsri_h:
1747 case Intrinsic::mips_binsri_w:
1748 case Intrinsic::mips_binsri_d: {
1749 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1750 EVT VecTy = Op->getValueType(ResNo: 0);
1751 EVT EltTy = VecTy.getVectorElementType();
1752 if (Op->getConstantOperandVal(Num: 3) >= EltTy.getSizeInBits())
1753 report_fatal_error(reason: "Immediate out of range");
1754 APInt Mask = APInt::getLowBitsSet(numBits: EltTy.getSizeInBits(),
1755 loBitsSet: Op->getConstantOperandVal(Num: 3) + 1);
1756 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: VecTy,
1757 N1: DAG.getConstant(Val: Mask, DL, VT: VecTy, isTarget: true),
1758 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1759 }
1760 case Intrinsic::mips_bmnz_v:
1761 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3),
1762 N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 1));
1763 case Intrinsic::mips_bmnzi_b:
1764 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1765 N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 2),
1766 N3: Op->getOperand(Num: 1));
1767 case Intrinsic::mips_bmz_v:
1768 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 3),
1769 N2: Op->getOperand(Num: 1), N3: Op->getOperand(Num: 2));
1770 case Intrinsic::mips_bmzi_b:
1771 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1772 N1: lowerMSASplatImm(Op, ImmOp: 3, DAG), N2: Op->getOperand(Num: 1),
1773 N3: Op->getOperand(Num: 2));
1774 case Intrinsic::mips_bneg_b:
1775 case Intrinsic::mips_bneg_h:
1776 case Intrinsic::mips_bneg_w:
1777 case Intrinsic::mips_bneg_d: {
1778 EVT VecTy = Op->getValueType(ResNo: 0);
1779 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
1780
1781 return DAG.getNode(Opcode: ISD::XOR, DL, VT: VecTy, N1: Op->getOperand(Num: 1),
1782 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One,
1783 N2: truncateVecElts(Op, DAG)));
1784 }
1785 case Intrinsic::mips_bnegi_b:
1786 case Intrinsic::mips_bnegi_h:
1787 case Intrinsic::mips_bnegi_w:
1788 case Intrinsic::mips_bnegi_d:
1789 return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::XOR, Imm: Op->getOperand(Num: 2),
1790 BigEndian: !Subtarget.isLittle());
1791 case Intrinsic::mips_bnz_b:
1792 case Intrinsic::mips_bnz_h:
1793 case Intrinsic::mips_bnz_w:
1794 case Intrinsic::mips_bnz_d:
1795 return DAG.getNode(Opcode: MipsISD::VALL_NONZERO, DL, VT: Op->getValueType(ResNo: 0),
1796 Operand: Op->getOperand(Num: 1));
1797 case Intrinsic::mips_bnz_v:
1798 return DAG.getNode(Opcode: MipsISD::VANY_NONZERO, DL, VT: Op->getValueType(ResNo: 0),
1799 Operand: Op->getOperand(Num: 1));
1800 case Intrinsic::mips_bsel_v:
1801 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1802 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1803 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3),
1804 N3: Op->getOperand(Num: 2));
1805 case Intrinsic::mips_bseli_b:
1806 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1807 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: Op->getValueType(ResNo: 0),
1808 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 3, DAG),
1809 N3: Op->getOperand(Num: 2));
1810 case Intrinsic::mips_bset_b:
1811 case Intrinsic::mips_bset_h:
1812 case Intrinsic::mips_bset_w:
1813 case Intrinsic::mips_bset_d: {
1814 EVT VecTy = Op->getValueType(ResNo: 0);
1815 SDValue One = DAG.getConstant(Val: 1, DL, VT: VecTy);
1816
1817 return DAG.getNode(Opcode: ISD::OR, DL, VT: VecTy, N1: Op->getOperand(Num: 1),
1818 N2: DAG.getNode(Opcode: ISD::SHL, DL, VT: VecTy, N1: One,
1819 N2: truncateVecElts(Op, DAG)));
1820 }
1821 case Intrinsic::mips_bseti_b:
1822 case Intrinsic::mips_bseti_h:
1823 case Intrinsic::mips_bseti_w:
1824 case Intrinsic::mips_bseti_d:
1825 return lowerMSABinaryBitImmIntr(Op, DAG, Opc: ISD::OR, Imm: Op->getOperand(Num: 2),
1826 BigEndian: !Subtarget.isLittle());
1827 case Intrinsic::mips_bz_b:
1828 case Intrinsic::mips_bz_h:
1829 case Intrinsic::mips_bz_w:
1830 case Intrinsic::mips_bz_d:
1831 return DAG.getNode(Opcode: MipsISD::VALL_ZERO, DL, VT: Op->getValueType(ResNo: 0),
1832 Operand: Op->getOperand(Num: 1));
1833 case Intrinsic::mips_bz_v:
1834 return DAG.getNode(Opcode: MipsISD::VANY_ZERO, DL, VT: Op->getValueType(ResNo: 0),
1835 Operand: Op->getOperand(Num: 1));
1836 case Intrinsic::mips_ceq_b:
1837 case Intrinsic::mips_ceq_h:
1838 case Intrinsic::mips_ceq_w:
1839 case Intrinsic::mips_ceq_d:
1840 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1841 RHS: Op->getOperand(Num: 2), Cond: ISD::SETEQ);
1842 case Intrinsic::mips_ceqi_b:
1843 case Intrinsic::mips_ceqi_h:
1844 case Intrinsic::mips_ceqi_w:
1845 case Intrinsic::mips_ceqi_d:
1846 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1847 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETEQ);
1848 case Intrinsic::mips_cle_s_b:
1849 case Intrinsic::mips_cle_s_h:
1850 case Intrinsic::mips_cle_s_w:
1851 case Intrinsic::mips_cle_s_d:
1852 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1853 RHS: Op->getOperand(Num: 2), Cond: ISD::SETLE);
1854 case Intrinsic::mips_clei_s_b:
1855 case Intrinsic::mips_clei_s_h:
1856 case Intrinsic::mips_clei_s_w:
1857 case Intrinsic::mips_clei_s_d:
1858 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1859 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLE);
1860 case Intrinsic::mips_cle_u_b:
1861 case Intrinsic::mips_cle_u_h:
1862 case Intrinsic::mips_cle_u_w:
1863 case Intrinsic::mips_cle_u_d:
1864 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1865 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE);
1866 case Intrinsic::mips_clei_u_b:
1867 case Intrinsic::mips_clei_u_h:
1868 case Intrinsic::mips_clei_u_w:
1869 case Intrinsic::mips_clei_u_d:
1870 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1871 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULE);
1872 case Intrinsic::mips_clt_s_b:
1873 case Intrinsic::mips_clt_s_h:
1874 case Intrinsic::mips_clt_s_w:
1875 case Intrinsic::mips_clt_s_d:
1876 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1877 RHS: Op->getOperand(Num: 2), Cond: ISD::SETLT);
1878 case Intrinsic::mips_clti_s_b:
1879 case Intrinsic::mips_clti_s_h:
1880 case Intrinsic::mips_clti_s_w:
1881 case Intrinsic::mips_clti_s_d:
1882 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1883 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true), Cond: ISD::SETLT);
1884 case Intrinsic::mips_clt_u_b:
1885 case Intrinsic::mips_clt_u_h:
1886 case Intrinsic::mips_clt_u_w:
1887 case Intrinsic::mips_clt_u_d:
1888 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1889 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT);
1890 case Intrinsic::mips_clti_u_b:
1891 case Intrinsic::mips_clti_u_h:
1892 case Intrinsic::mips_clti_u_w:
1893 case Intrinsic::mips_clti_u_d:
1894 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1895 RHS: lowerMSASplatImm(Op, ImmOp: 2, DAG), Cond: ISD::SETULT);
1896 case Intrinsic::mips_copy_s_b:
1897 case Intrinsic::mips_copy_s_h:
1898 case Intrinsic::mips_copy_s_w:
1899 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT);
1900 case Intrinsic::mips_copy_s_d:
1901 if (Subtarget.hasMips64())
1902 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1903 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_SEXT_ELT);
1904 else {
1905 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1906 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1907 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op),
1908 VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1909 N2: Op->getOperand(Num: 2));
1910 }
1911 case Intrinsic::mips_copy_u_b:
1912 case Intrinsic::mips_copy_u_h:
1913 case Intrinsic::mips_copy_u_w:
1914 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT);
1915 case Intrinsic::mips_copy_u_d:
1916 if (Subtarget.hasMips64())
1917 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1918 return lowerMSACopyIntr(Op, DAG, Opc: MipsISD::VEXTRACT_ZEXT_ELT);
1919 else {
1920 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1921 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1922 // Note: When i64 is illegal, this results in copy_s.w instructions
1923 // instead of copy_u.w instructions. This makes no difference to the
1924 // behaviour since i64 is only illegal when the register file is 32-bit.
1925 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op),
1926 VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1927 N2: Op->getOperand(Num: 2));
1928 }
1929 case Intrinsic::mips_div_s_b:
1930 case Intrinsic::mips_div_s_h:
1931 case Intrinsic::mips_div_s_w:
1932 case Intrinsic::mips_div_s_d:
1933 return DAG.getNode(Opcode: ISD::SDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1934 N2: Op->getOperand(Num: 2));
1935 case Intrinsic::mips_div_u_b:
1936 case Intrinsic::mips_div_u_h:
1937 case Intrinsic::mips_div_u_w:
1938 case Intrinsic::mips_div_u_d:
1939 return DAG.getNode(Opcode: ISD::UDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1940 N2: Op->getOperand(Num: 2));
1941 case Intrinsic::mips_fadd_w:
1942 case Intrinsic::mips_fadd_d:
1943 // TODO: If intrinsics have fast-math-flags, propagate them.
1944 return DAG.getNode(Opcode: ISD::FADD, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1945 N2: Op->getOperand(Num: 2));
1946 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1947 case Intrinsic::mips_fceq_w:
1948 case Intrinsic::mips_fceq_d:
1949 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1950 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOEQ);
1951 case Intrinsic::mips_fcle_w:
1952 case Intrinsic::mips_fcle_d:
1953 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1954 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLE);
1955 case Intrinsic::mips_fclt_w:
1956 case Intrinsic::mips_fclt_d:
1957 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1958 RHS: Op->getOperand(Num: 2), Cond: ISD::SETOLT);
1959 case Intrinsic::mips_fcne_w:
1960 case Intrinsic::mips_fcne_d:
1961 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1962 RHS: Op->getOperand(Num: 2), Cond: ISD::SETONE);
1963 case Intrinsic::mips_fcor_w:
1964 case Intrinsic::mips_fcor_d:
1965 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1966 RHS: Op->getOperand(Num: 2), Cond: ISD::SETO);
1967 case Intrinsic::mips_fcueq_w:
1968 case Intrinsic::mips_fcueq_d:
1969 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1970 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUEQ);
1971 case Intrinsic::mips_fcule_w:
1972 case Intrinsic::mips_fcule_d:
1973 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1974 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULE);
1975 case Intrinsic::mips_fcult_w:
1976 case Intrinsic::mips_fcult_d:
1977 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1978 RHS: Op->getOperand(Num: 2), Cond: ISD::SETULT);
1979 case Intrinsic::mips_fcun_w:
1980 case Intrinsic::mips_fcun_d:
1981 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1982 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUO);
1983 case Intrinsic::mips_fcune_w:
1984 case Intrinsic::mips_fcune_d:
1985 return DAG.getSetCC(DL, VT: Op->getValueType(ResNo: 0), LHS: Op->getOperand(Num: 1),
1986 RHS: Op->getOperand(Num: 2), Cond: ISD::SETUNE);
1987 case Intrinsic::mips_fdiv_w:
1988 case Intrinsic::mips_fdiv_d:
1989 // TODO: If intrinsics have fast-math-flags, propagate them.
1990 return DAG.getNode(Opcode: ISD::FDIV, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
1991 N2: Op->getOperand(Num: 2));
1992 case Intrinsic::mips_ffint_u_w:
1993 case Intrinsic::mips_ffint_u_d:
1994 return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0),
1995 Operand: Op->getOperand(Num: 1));
1996 case Intrinsic::mips_ffint_s_w:
1997 case Intrinsic::mips_ffint_s_d:
1998 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Op->getValueType(ResNo: 0),
1999 Operand: Op->getOperand(Num: 1));
2000 case Intrinsic::mips_fill_b:
2001 case Intrinsic::mips_fill_h:
2002 case Intrinsic::mips_fill_w:
2003 case Intrinsic::mips_fill_d: {
2004 EVT ResTy = Op->getValueType(ResNo: 0);
2005 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(),
2006 Op->getOperand(Num: 1));
2007
2008 // If ResTy is v2i64 then the type legalizer will break this node down into
2009 // an equivalent v4i32.
2010 return DAG.getBuildVector(VT: ResTy, DL, Ops);
2011 }
2012 case Intrinsic::mips_fexp2_w:
2013 case Intrinsic::mips_fexp2_d: {
2014 // TODO: If intrinsics have fast-math-flags, propagate them.
2015 EVT ResTy = Op->getValueType(ResNo: 0);
2016 return DAG.getNode(
2017 Opcode: ISD::FMUL, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2018 N2: DAG.getNode(Opcode: ISD::FEXP2, DL: SDLoc(Op), VT: ResTy, Operand: Op->getOperand(Num: 2)));
2019 }
2020 case Intrinsic::mips_flog2_w:
2021 case Intrinsic::mips_flog2_d:
2022 return DAG.getNode(Opcode: ISD::FLOG2, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2023 case Intrinsic::mips_fmadd_w:
2024 case Intrinsic::mips_fmadd_d:
2025 return DAG.getNode(Opcode: ISD::FMA, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2026 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2027 case Intrinsic::mips_fmul_w:
2028 case Intrinsic::mips_fmul_d:
2029 // TODO: If intrinsics have fast-math-flags, propagate them.
2030 return DAG.getNode(Opcode: ISD::FMUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2031 N2: Op->getOperand(Num: 2));
2032 case Intrinsic::mips_fmsub_w:
2033 case Intrinsic::mips_fmsub_d: {
2034 // TODO: If intrinsics have fast-math-flags, propagate them.
2035 return DAG.getNode(Opcode: MipsISD::FMS, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2036 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2037 }
2038 case Intrinsic::mips_frint_w:
2039 case Intrinsic::mips_frint_d:
2040 return DAG.getNode(Opcode: ISD::FRINT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2041 case Intrinsic::mips_fsqrt_w:
2042 case Intrinsic::mips_fsqrt_d:
2043 return DAG.getNode(Opcode: ISD::FSQRT, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2044 case Intrinsic::mips_fsub_w:
2045 case Intrinsic::mips_fsub_d:
2046 // TODO: If intrinsics have fast-math-flags, propagate them.
2047 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2048 N2: Op->getOperand(Num: 2));
2049 case Intrinsic::mips_ftrunc_u_w:
2050 case Intrinsic::mips_ftrunc_u_d:
2051 return DAG.getNode(Opcode: ISD::FP_TO_UINT, DL, VT: Op->getValueType(ResNo: 0),
2052 Operand: Op->getOperand(Num: 1));
2053 case Intrinsic::mips_ftrunc_s_w:
2054 case Intrinsic::mips_ftrunc_s_d:
2055 return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL, VT: Op->getValueType(ResNo: 0),
2056 Operand: Op->getOperand(Num: 1));
2057 case Intrinsic::mips_ilvev_b:
2058 case Intrinsic::mips_ilvev_h:
2059 case Intrinsic::mips_ilvev_w:
2060 case Intrinsic::mips_ilvev_d:
2061 return DAG.getNode(Opcode: MipsISD::ILVEV, DL, VT: Op->getValueType(ResNo: 0),
2062 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2063 case Intrinsic::mips_ilvl_b:
2064 case Intrinsic::mips_ilvl_h:
2065 case Intrinsic::mips_ilvl_w:
2066 case Intrinsic::mips_ilvl_d:
2067 return DAG.getNode(Opcode: MipsISD::ILVL, DL, VT: Op->getValueType(ResNo: 0),
2068 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2069 case Intrinsic::mips_ilvod_b:
2070 case Intrinsic::mips_ilvod_h:
2071 case Intrinsic::mips_ilvod_w:
2072 case Intrinsic::mips_ilvod_d:
2073 return DAG.getNode(Opcode: MipsISD::ILVOD, DL, VT: Op->getValueType(ResNo: 0),
2074 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2075 case Intrinsic::mips_ilvr_b:
2076 case Intrinsic::mips_ilvr_h:
2077 case Intrinsic::mips_ilvr_w:
2078 case Intrinsic::mips_ilvr_d:
2079 return DAG.getNode(Opcode: MipsISD::ILVR, DL, VT: Op->getValueType(ResNo: 0),
2080 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2081 case Intrinsic::mips_insert_b:
2082 case Intrinsic::mips_insert_h:
2083 case Intrinsic::mips_insert_w:
2084 case Intrinsic::mips_insert_d:
2085 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(Op), VT: Op->getValueType(ResNo: 0),
2086 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 3), N3: Op->getOperand(Num: 2));
2087 case Intrinsic::mips_insve_b:
2088 case Intrinsic::mips_insve_h:
2089 case Intrinsic::mips_insve_w:
2090 case Intrinsic::mips_insve_d: {
2091 // Report an error for out of range values.
2092 int64_t Max;
2093 switch (Intrinsic) {
2094 case Intrinsic::mips_insve_b: Max = 15; break;
2095 case Intrinsic::mips_insve_h: Max = 7; break;
2096 case Intrinsic::mips_insve_w: Max = 3; break;
2097 case Intrinsic::mips_insve_d: Max = 1; break;
2098 default: llvm_unreachable("Unmatched intrinsic");
2099 }
2100 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2101 if (Value < 0 || Value > Max)
2102 report_fatal_error(reason: "Immediate out of range");
2103 return DAG.getNode(Opcode: MipsISD::INSVE, DL, VT: Op->getValueType(ResNo: 0),
2104 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3),
2105 N4: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
2106 }
2107 case Intrinsic::mips_ldi_b:
2108 case Intrinsic::mips_ldi_h:
2109 case Intrinsic::mips_ldi_w:
2110 case Intrinsic::mips_ldi_d:
2111 return lowerMSASplatImm(Op, ImmOp: 1, DAG, IsSigned: true);
2112 case Intrinsic::mips_lsa:
2113 case Intrinsic::mips_dlsa: {
2114 EVT ResTy = Op->getValueType(ResNo: 0);
2115 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2116 N2: DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Op), VT: ResTy,
2117 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2118 }
2119 case Intrinsic::mips_maddv_b:
2120 case Intrinsic::mips_maddv_h:
2121 case Intrinsic::mips_maddv_w:
2122 case Intrinsic::mips_maddv_d: {
2123 EVT ResTy = Op->getValueType(ResNo: 0);
2124 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2125 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy,
2126 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2127 }
2128 case Intrinsic::mips_max_s_b:
2129 case Intrinsic::mips_max_s_h:
2130 case Intrinsic::mips_max_s_w:
2131 case Intrinsic::mips_max_s_d:
2132 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0),
2133 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2134 case Intrinsic::mips_max_u_b:
2135 case Intrinsic::mips_max_u_h:
2136 case Intrinsic::mips_max_u_w:
2137 case Intrinsic::mips_max_u_d:
2138 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0),
2139 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2140 case Intrinsic::mips_maxi_s_b:
2141 case Intrinsic::mips_maxi_s_h:
2142 case Intrinsic::mips_maxi_s_w:
2143 case Intrinsic::mips_maxi_s_d:
2144 return DAG.getNode(Opcode: ISD::SMAX, DL, VT: Op->getValueType(ResNo: 0),
2145 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true));
2146 case Intrinsic::mips_maxi_u_b:
2147 case Intrinsic::mips_maxi_u_h:
2148 case Intrinsic::mips_maxi_u_w:
2149 case Intrinsic::mips_maxi_u_d:
2150 return DAG.getNode(Opcode: ISD::UMAX, DL, VT: Op->getValueType(ResNo: 0),
2151 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2152 case Intrinsic::mips_min_s_b:
2153 case Intrinsic::mips_min_s_h:
2154 case Intrinsic::mips_min_s_w:
2155 case Intrinsic::mips_min_s_d:
2156 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0),
2157 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2158 case Intrinsic::mips_min_u_b:
2159 case Intrinsic::mips_min_u_h:
2160 case Intrinsic::mips_min_u_w:
2161 case Intrinsic::mips_min_u_d:
2162 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0),
2163 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2164 case Intrinsic::mips_mini_s_b:
2165 case Intrinsic::mips_mini_s_h:
2166 case Intrinsic::mips_mini_s_w:
2167 case Intrinsic::mips_mini_s_d:
2168 return DAG.getNode(Opcode: ISD::SMIN, DL, VT: Op->getValueType(ResNo: 0),
2169 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG, IsSigned: true));
2170 case Intrinsic::mips_mini_u_b:
2171 case Intrinsic::mips_mini_u_h:
2172 case Intrinsic::mips_mini_u_w:
2173 case Intrinsic::mips_mini_u_d:
2174 return DAG.getNode(Opcode: ISD::UMIN, DL, VT: Op->getValueType(ResNo: 0),
2175 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2176 case Intrinsic::mips_mod_s_b:
2177 case Intrinsic::mips_mod_s_h:
2178 case Intrinsic::mips_mod_s_w:
2179 case Intrinsic::mips_mod_s_d:
2180 return DAG.getNode(Opcode: ISD::SREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2181 N2: Op->getOperand(Num: 2));
2182 case Intrinsic::mips_mod_u_b:
2183 case Intrinsic::mips_mod_u_h:
2184 case Intrinsic::mips_mod_u_w:
2185 case Intrinsic::mips_mod_u_d:
2186 return DAG.getNode(Opcode: ISD::UREM, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2187 N2: Op->getOperand(Num: 2));
2188 case Intrinsic::mips_mulv_b:
2189 case Intrinsic::mips_mulv_h:
2190 case Intrinsic::mips_mulv_w:
2191 case Intrinsic::mips_mulv_d:
2192 return DAG.getNode(Opcode: ISD::MUL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2193 N2: Op->getOperand(Num: 2));
2194 case Intrinsic::mips_msubv_b:
2195 case Intrinsic::mips_msubv_h:
2196 case Intrinsic::mips_msubv_w:
2197 case Intrinsic::mips_msubv_d: {
2198 EVT ResTy = Op->getValueType(ResNo: 0);
2199 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: ResTy, N1: Op->getOperand(Num: 1),
2200 N2: DAG.getNode(Opcode: ISD::MUL, DL: SDLoc(Op), VT: ResTy,
2201 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 3)));
2202 }
2203 case Intrinsic::mips_nlzc_b:
2204 case Intrinsic::mips_nlzc_h:
2205 case Intrinsic::mips_nlzc_w:
2206 case Intrinsic::mips_nlzc_d:
2207 return DAG.getNode(Opcode: ISD::CTLZ, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2208 case Intrinsic::mips_nor_v: {
2209 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2210 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2211 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2212 }
2213 case Intrinsic::mips_nori_b: {
2214 SDValue Res = DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2215 N1: Op->getOperand(Num: 1),
2216 N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2217 return DAG.getNOT(DL, Val: Res, VT: Res->getValueType(ResNo: 0));
2218 }
2219 case Intrinsic::mips_or_v:
2220 return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2221 N2: Op->getOperand(Num: 2));
2222 case Intrinsic::mips_ori_b:
2223 return DAG.getNode(Opcode: ISD::OR, DL, VT: Op->getValueType(ResNo: 0),
2224 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2225 case Intrinsic::mips_pckev_b:
2226 case Intrinsic::mips_pckev_h:
2227 case Intrinsic::mips_pckev_w:
2228 case Intrinsic::mips_pckev_d:
2229 return DAG.getNode(Opcode: MipsISD::PCKEV, DL, VT: Op->getValueType(ResNo: 0),
2230 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2231 case Intrinsic::mips_pckod_b:
2232 case Intrinsic::mips_pckod_h:
2233 case Intrinsic::mips_pckod_w:
2234 case Intrinsic::mips_pckod_d:
2235 return DAG.getNode(Opcode: MipsISD::PCKOD, DL, VT: Op->getValueType(ResNo: 0),
2236 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2));
2237 case Intrinsic::mips_pcnt_b:
2238 case Intrinsic::mips_pcnt_h:
2239 case Intrinsic::mips_pcnt_w:
2240 case Intrinsic::mips_pcnt_d:
2241 return DAG.getNode(Opcode: ISD::CTPOP, DL, VT: Op->getValueType(ResNo: 0), Operand: Op->getOperand(Num: 1));
2242 case Intrinsic::mips_sat_s_b:
2243 case Intrinsic::mips_sat_s_h:
2244 case Intrinsic::mips_sat_s_w:
2245 case Intrinsic::mips_sat_s_d:
2246 case Intrinsic::mips_sat_u_b:
2247 case Intrinsic::mips_sat_u_h:
2248 case Intrinsic::mips_sat_u_w:
2249 case Intrinsic::mips_sat_u_d: {
2250 // Report an error for out of range values.
2251 int64_t Max;
2252 switch (Intrinsic) {
2253 case Intrinsic::mips_sat_s_b:
2254 case Intrinsic::mips_sat_u_b: Max = 7; break;
2255 case Intrinsic::mips_sat_s_h:
2256 case Intrinsic::mips_sat_u_h: Max = 15; break;
2257 case Intrinsic::mips_sat_s_w:
2258 case Intrinsic::mips_sat_u_w: Max = 31; break;
2259 case Intrinsic::mips_sat_s_d:
2260 case Intrinsic::mips_sat_u_d: Max = 63; break;
2261 default: llvm_unreachable("Unmatched intrinsic");
2262 }
2263 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2264 if (Value < 0 || Value > Max)
2265 report_fatal_error(reason: "Immediate out of range");
2266 return SDValue();
2267 }
2268 case Intrinsic::mips_shf_b:
2269 case Intrinsic::mips_shf_h:
2270 case Intrinsic::mips_shf_w: {
2271 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2272 if (Value < 0 || Value > 255)
2273 report_fatal_error(reason: "Immediate out of range");
2274 return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: Op->getValueType(ResNo: 0),
2275 N1: Op->getOperand(Num: 2), N2: Op->getOperand(Num: 1));
2276 }
2277 case Intrinsic::mips_sldi_b:
2278 case Intrinsic::mips_sldi_h:
2279 case Intrinsic::mips_sldi_w:
2280 case Intrinsic::mips_sldi_d: {
2281 // Report an error for out of range values.
2282 int64_t Max;
2283 switch (Intrinsic) {
2284 case Intrinsic::mips_sldi_b: Max = 15; break;
2285 case Intrinsic::mips_sldi_h: Max = 7; break;
2286 case Intrinsic::mips_sldi_w: Max = 3; break;
2287 case Intrinsic::mips_sldi_d: Max = 1; break;
2288 default: llvm_unreachable("Unmatched intrinsic");
2289 }
2290 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 3))->getSExtValue();
2291 if (Value < 0 || Value > Max)
2292 report_fatal_error(reason: "Immediate out of range");
2293 return SDValue();
2294 }
2295 case Intrinsic::mips_sll_b:
2296 case Intrinsic::mips_sll_h:
2297 case Intrinsic::mips_sll_w:
2298 case Intrinsic::mips_sll_d:
2299 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2300 N2: truncateVecElts(Op, DAG));
2301 case Intrinsic::mips_slli_b:
2302 case Intrinsic::mips_slli_h:
2303 case Intrinsic::mips_slli_w:
2304 case Intrinsic::mips_slli_d:
2305 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Op->getValueType(ResNo: 0),
2306 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2307 case Intrinsic::mips_splat_b:
2308 case Intrinsic::mips_splat_h:
2309 case Intrinsic::mips_splat_w:
2310 case Intrinsic::mips_splat_d:
2311 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2312 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2313 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2314 // Instead we lower to MipsISD::VSHF and match from there.
2315 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2316 N1: lowerMSASplatZExt(Op, OpNr: 2, DAG), N2: Op->getOperand(Num: 1),
2317 N3: Op->getOperand(Num: 1));
2318 case Intrinsic::mips_splati_b:
2319 case Intrinsic::mips_splati_h:
2320 case Intrinsic::mips_splati_w:
2321 case Intrinsic::mips_splati_d:
2322 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2323 N1: lowerMSASplatImm(Op, ImmOp: 2, DAG), N2: Op->getOperand(Num: 1),
2324 N3: Op->getOperand(Num: 1));
2325 case Intrinsic::mips_sra_b:
2326 case Intrinsic::mips_sra_h:
2327 case Intrinsic::mips_sra_w:
2328 case Intrinsic::mips_sra_d:
2329 return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2330 N2: truncateVecElts(Op, DAG));
2331 case Intrinsic::mips_srai_b:
2332 case Intrinsic::mips_srai_h:
2333 case Intrinsic::mips_srai_w:
2334 case Intrinsic::mips_srai_d:
2335 return DAG.getNode(Opcode: ISD::SRA, DL, VT: Op->getValueType(ResNo: 0),
2336 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2337 case Intrinsic::mips_srari_b:
2338 case Intrinsic::mips_srari_h:
2339 case Intrinsic::mips_srari_w:
2340 case Intrinsic::mips_srari_d: {
2341 // Report an error for out of range values.
2342 int64_t Max;
2343 switch (Intrinsic) {
2344 case Intrinsic::mips_srari_b: Max = 7; break;
2345 case Intrinsic::mips_srari_h: Max = 15; break;
2346 case Intrinsic::mips_srari_w: Max = 31; break;
2347 case Intrinsic::mips_srari_d: Max = 63; break;
2348 default: llvm_unreachable("Unmatched intrinsic");
2349 }
2350 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2351 if (Value < 0 || Value > Max)
2352 report_fatal_error(reason: "Immediate out of range");
2353 return SDValue();
2354 }
2355 case Intrinsic::mips_srl_b:
2356 case Intrinsic::mips_srl_h:
2357 case Intrinsic::mips_srl_w:
2358 case Intrinsic::mips_srl_d:
2359 return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2360 N2: truncateVecElts(Op, DAG));
2361 case Intrinsic::mips_srli_b:
2362 case Intrinsic::mips_srli_h:
2363 case Intrinsic::mips_srli_w:
2364 case Intrinsic::mips_srli_d:
2365 return DAG.getNode(Opcode: ISD::SRL, DL, VT: Op->getValueType(ResNo: 0),
2366 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2367 case Intrinsic::mips_srlri_b:
2368 case Intrinsic::mips_srlri_h:
2369 case Intrinsic::mips_srlri_w:
2370 case Intrinsic::mips_srlri_d: {
2371 // Report an error for out of range values.
2372 int64_t Max;
2373 switch (Intrinsic) {
2374 case Intrinsic::mips_srlri_b: Max = 7; break;
2375 case Intrinsic::mips_srlri_h: Max = 15; break;
2376 case Intrinsic::mips_srlri_w: Max = 31; break;
2377 case Intrinsic::mips_srlri_d: Max = 63; break;
2378 default: llvm_unreachable("Unmatched intrinsic");
2379 }
2380 int64_t Value = cast<ConstantSDNode>(Val: Op->getOperand(Num: 2))->getSExtValue();
2381 if (Value < 0 || Value > Max)
2382 report_fatal_error(reason: "Immediate out of range");
2383 return SDValue();
2384 }
2385 case Intrinsic::mips_subv_b:
2386 case Intrinsic::mips_subv_h:
2387 case Intrinsic::mips_subv_w:
2388 case Intrinsic::mips_subv_d:
2389 return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2390 N2: Op->getOperand(Num: 2));
2391 case Intrinsic::mips_subvi_b:
2392 case Intrinsic::mips_subvi_h:
2393 case Intrinsic::mips_subvi_w:
2394 case Intrinsic::mips_subvi_d:
2395 return DAG.getNode(Opcode: ISD::SUB, DL, VT: Op->getValueType(ResNo: 0),
2396 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2397 case Intrinsic::mips_vshf_b:
2398 case Intrinsic::mips_vshf_h:
2399 case Intrinsic::mips_vshf_w:
2400 case Intrinsic::mips_vshf_d:
2401 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: Op->getValueType(ResNo: 0),
2402 N1: Op->getOperand(Num: 1), N2: Op->getOperand(Num: 2), N3: Op->getOperand(Num: 3));
2403 case Intrinsic::mips_xor_v:
2404 return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0), N1: Op->getOperand(Num: 1),
2405 N2: Op->getOperand(Num: 2));
2406 case Intrinsic::mips_xori_b:
2407 return DAG.getNode(Opcode: ISD::XOR, DL, VT: Op->getValueType(ResNo: 0),
2408 N1: Op->getOperand(Num: 1), N2: lowerMSASplatImm(Op, ImmOp: 2, DAG));
2409 case Intrinsic::thread_pointer: {
2410 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2411 return DAG.getNode(Opcode: MipsISD::ThreadPointer, DL, VT: PtrVT);
2412 }
2413 }
2414}
2415
2416static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2417 const MipsSubtarget &Subtarget) {
2418 SDLoc DL(Op);
2419 SDValue ChainIn = Op->getOperand(Num: 0);
2420 SDValue Address = Op->getOperand(Num: 2);
2421 SDValue Offset = Op->getOperand(Num: 3);
2422 EVT ResTy = Op->getValueType(ResNo: 0);
2423 EVT PtrTy = Address->getValueType(ResNo: 0);
2424
2425 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2426 // however takes an i32 signed constant offset. The actual type of the
2427 // intrinsic is a scaled signed i10.
2428 if (Subtarget.isABI_N64())
2429 Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset);
2430
2431 Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset);
2432 return DAG.getLoad(VT: ResTy, dl: DL, Chain: ChainIn, Ptr: Address, PtrInfo: MachinePointerInfo(),
2433 Alignment: Align(16));
2434}
2435
2436SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2437 SelectionDAG &DAG) const {
2438 unsigned Intr = Op->getConstantOperandVal(Num: 1);
2439 switch (Intr) {
2440 default:
2441 return SDValue();
2442 case Intrinsic::mips_extp:
2443 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTP);
2444 case Intrinsic::mips_extpdp:
2445 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTPDP);
2446 case Intrinsic::mips_extr_w:
2447 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_W);
2448 case Intrinsic::mips_extr_r_w:
2449 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_R_W);
2450 case Intrinsic::mips_extr_rs_w:
2451 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_RS_W);
2452 case Intrinsic::mips_extr_s_h:
2453 return lowerDSPIntr(Op, DAG, Opc: MipsISD::EXTR_S_H);
2454 case Intrinsic::mips_mthlip:
2455 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MTHLIP);
2456 case Intrinsic::mips_mulsaq_s_w_ph:
2457 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MULSAQ_S_W_PH);
2458 case Intrinsic::mips_maq_s_w_phl:
2459 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHL);
2460 case Intrinsic::mips_maq_s_w_phr:
2461 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_S_W_PHR);
2462 case Intrinsic::mips_maq_sa_w_phl:
2463 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHL);
2464 case Intrinsic::mips_maq_sa_w_phr:
2465 return lowerDSPIntr(Op, DAG, Opc: MipsISD::MAQ_SA_W_PHR);
2466 case Intrinsic::mips_dpaq_s_w_ph:
2467 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_S_W_PH);
2468 case Intrinsic::mips_dpsq_s_w_ph:
2469 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_S_W_PH);
2470 case Intrinsic::mips_dpaq_sa_l_w:
2471 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQ_SA_L_W);
2472 case Intrinsic::mips_dpsq_sa_l_w:
2473 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQ_SA_L_W);
2474 case Intrinsic::mips_dpaqx_s_w_ph:
2475 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_S_W_PH);
2476 case Intrinsic::mips_dpaqx_sa_w_ph:
2477 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPAQX_SA_W_PH);
2478 case Intrinsic::mips_dpsqx_s_w_ph:
2479 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_S_W_PH);
2480 case Intrinsic::mips_dpsqx_sa_w_ph:
2481 return lowerDSPIntr(Op, DAG, Opc: MipsISD::DPSQX_SA_W_PH);
2482 case Intrinsic::mips_ld_b:
2483 case Intrinsic::mips_ld_h:
2484 case Intrinsic::mips_ld_w:
2485 case Intrinsic::mips_ld_d:
2486 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2487 }
2488}
2489
2490static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2491 const MipsSubtarget &Subtarget) {
2492 SDLoc DL(Op);
2493 SDValue ChainIn = Op->getOperand(Num: 0);
2494 SDValue Value = Op->getOperand(Num: 2);
2495 SDValue Address = Op->getOperand(Num: 3);
2496 SDValue Offset = Op->getOperand(Num: 4);
2497 EVT PtrTy = Address->getValueType(ResNo: 0);
2498
2499 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2500 // however takes an i32 signed constant offset. The actual type of the
2501 // intrinsic is a scaled signed i10.
2502 if (Subtarget.isABI_N64())
2503 Offset = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: PtrTy, Operand: Offset);
2504
2505 Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: Address, N2: Offset);
2506
2507 return DAG.getStore(Chain: ChainIn, dl: DL, Val: Value, Ptr: Address, PtrInfo: MachinePointerInfo(),
2508 Alignment: Align(16));
2509}
2510
2511SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2512 SelectionDAG &DAG) const {
2513 unsigned Intr = Op->getConstantOperandVal(Num: 1);
2514 switch (Intr) {
2515 default:
2516 return SDValue();
2517 case Intrinsic::mips_st_b:
2518 case Intrinsic::mips_st_h:
2519 case Intrinsic::mips_st_w:
2520 case Intrinsic::mips_st_d:
2521 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2522 }
2523}
2524
2525// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2526//
2527// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2528// choose to sign-extend but we could have equally chosen zero-extend. The
2529// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2530// result into this node later (possibly changing it to a zero-extend in the
2531// process).
2532SDValue MipsSETargetLowering::
2533lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2534 SDLoc DL(Op);
2535 EVT ResTy = Op->getValueType(ResNo: 0);
2536 SDValue Op0 = Op->getOperand(Num: 0);
2537 EVT VecTy = Op0->getValueType(ResNo: 0);
2538
2539 if (!VecTy.is128BitVector())
2540 return SDValue();
2541
2542 if (ResTy.isInteger()) {
2543 SDValue Op1 = Op->getOperand(Num: 1);
2544 EVT EltTy = VecTy.getVectorElementType();
2545 return DAG.getNode(Opcode: MipsISD::VEXTRACT_SEXT_ELT, DL, VT: ResTy, N1: Op0, N2: Op1,
2546 N3: DAG.getValueType(EltTy));
2547 }
2548
2549 return Op;
2550}
2551
2552static bool isConstantOrUndef(const SDValue Op) {
2553 if (Op->isUndef())
2554 return true;
2555 if (isa<ConstantSDNode>(Val: Op))
2556 return true;
2557 if (isa<ConstantFPSDNode>(Val: Op))
2558 return true;
2559 return false;
2560}
2561
2562static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2563 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2564 if (isConstantOrUndef(Op: Op->getOperand(Num: i)))
2565 return true;
2566 return false;
2567}
2568
2569// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2570// backend.
2571//
2572// Lowers according to the following rules:
2573// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2574// 2 less than or equal to 64 and the value fits into a signed 10-bit
2575// immediate
2576// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2577// is a power of 2 less than or equal to 64 and the value does not fit into a
2578// signed 10-bit immediate
2579// - Non-constant splats are legal as-is.
2580// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2581// - All others are illegal and must be expanded.
2582SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2583 SelectionDAG &DAG) const {
2584 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Val&: Op);
2585 EVT ResTy = Op->getValueType(ResNo: 0);
2586 SDLoc DL(Op);
2587 APInt SplatValue, SplatUndef;
2588 unsigned SplatBitSize;
2589 bool HasAnyUndefs;
2590
2591 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2592 return SDValue();
2593
2594 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2595 HasAnyUndefs, MinSplatBits: 8,
2596 isBigEndian: !Subtarget.isLittle()) && SplatBitSize <= 64) {
2597 // We can only cope with 8, 16, 32, or 64-bit elements
2598 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2599 SplatBitSize != 64)
2600 return SDValue();
2601
2602 // If the value isn't an integer type we will have to bitcast
2603 // from an integer type first. Also, if there are any undefs, we must
2604 // lower them to defined values first.
2605 if (ResTy.isInteger() && !HasAnyUndefs)
2606 return Op;
2607
2608 EVT ViaVecTy;
2609
2610 switch (SplatBitSize) {
2611 default:
2612 return SDValue();
2613 case 8:
2614 ViaVecTy = MVT::v16i8;
2615 break;
2616 case 16:
2617 ViaVecTy = MVT::v8i16;
2618 break;
2619 case 32:
2620 ViaVecTy = MVT::v4i32;
2621 break;
2622 case 64:
2623 // There's no fill.d to fall back on for 64-bit values
2624 return SDValue();
2625 }
2626
2627 // SelectionDAG::getConstant will promote SplatValue appropriately.
2628 SDValue Result = DAG.getConstant(Val: SplatValue, DL, VT: ViaVecTy);
2629
2630 // Bitcast to the type we originally wanted
2631 if (ViaVecTy != ResTy)
2632 Result = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(Node), VT: ResTy, Operand: Result);
2633
2634 return Result;
2635 } else if (DAG.isSplatValue(V: Op, /* AllowUndefs */ false))
2636 return Op;
2637 else if (!isConstantOrUndefBUILD_VECTOR(Op: Node)) {
2638 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2639 // The resulting code is the same length as the expansion, but it doesn't
2640 // use memory operations
2641 EVT ResTy = Node->getValueType(ResNo: 0);
2642
2643 assert(ResTy.isVector());
2644
2645 unsigned NumElts = ResTy.getVectorNumElements();
2646 SDValue Vector = DAG.getUNDEF(VT: ResTy);
2647 for (unsigned i = 0; i < NumElts; ++i) {
2648 Vector = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: ResTy, N1: Vector,
2649 N2: Node->getOperand(Num: i),
2650 N3: DAG.getConstant(Val: i, DL, VT: MVT::i32));
2651 }
2652 return Vector;
2653 }
2654
2655 return SDValue();
2656}
2657
2658// Lower VECTOR_SHUFFLE into SHF (if possible).
2659//
2660// SHF splits the vector into blocks of four elements, then shuffles these
2661// elements according to a <4 x i2> constant (encoded as an integer immediate).
2662//
2663// It is therefore possible to lower into SHF when the mask takes the form:
2664// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2665// When undef's appear they are treated as if they were whatever value is
2666// necessary in order to fit the above forms.
2667//
2668// For example:
2669// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2670// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2671// i32 7, i32 6, i32 5, i32 4>
2672// is lowered to:
2673// (SHF_H $w0, $w1, 27)
2674// where the 27 comes from:
2675// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2676static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
2677 SmallVector<int, 16> Indices,
2678 SelectionDAG &DAG) {
2679 int SHFIndices[4] = { -1, -1, -1, -1 };
2680
2681 if (Indices.size() < 4)
2682 return SDValue();
2683
2684 for (unsigned i = 0; i < 4; ++i) {
2685 for (unsigned j = i; j < Indices.size(); j += 4) {
2686 int Idx = Indices[j];
2687
2688 // Convert from vector index to 4-element subvector index
2689 // If an index refers to an element outside of the subvector then give up
2690 if (Idx != -1) {
2691 Idx -= 4 * (j / 4);
2692 if (Idx < 0 || Idx >= 4)
2693 return SDValue();
2694 }
2695
2696 // If the mask has an undef, replace it with the current index.
2697 // Note that it might still be undef if the current index is also undef
2698 if (SHFIndices[i] == -1)
2699 SHFIndices[i] = Idx;
2700
2701 // Check that non-undef values are the same as in the mask. If they
2702 // aren't then give up
2703 if (!(Idx == -1 || Idx == SHFIndices[i]))
2704 return SDValue();
2705 }
2706 }
2707
2708 // Calculate the immediate. Replace any remaining undefs with zero
2709 APInt Imm(32, 0);
2710 for (int i = 3; i >= 0; --i) {
2711 int Idx = SHFIndices[i];
2712
2713 if (Idx == -1)
2714 Idx = 0;
2715
2716 Imm <<= 2;
2717 Imm |= Idx & 0x3;
2718 }
2719
2720 SDLoc DL(Op);
2721 return DAG.getNode(Opcode: MipsISD::SHF, DL, VT: ResTy,
2722 N1: DAG.getTargetConstant(Val: Imm, DL, VT: MVT::i32),
2723 N2: Op->getOperand(Num: 0));
2724}
2725
2726/// Determine whether a range fits a regular pattern of values.
2727/// This function accounts for the possibility of jumping over the End iterator.
2728template <typename ValType>
2729static bool
2730fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
2731 unsigned CheckStride,
2732 typename SmallVectorImpl<ValType>::const_iterator End,
2733 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2734 auto &I = Begin;
2735
2736 while (I != End) {
2737 if (*I != -1 && *I != ExpectedIndex)
2738 return false;
2739 ExpectedIndex += ExpectedIndexStride;
2740
2741 // Incrementing past End is undefined behaviour so we must increment one
2742 // step at a time and check for End at each step.
2743 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2744 ; // Empty loop body.
2745 }
2746 return true;
2747}
2748
2749// Determine whether VECTOR_SHUFFLE is a SPLATI.
2750//
2751// It is a SPLATI when the mask is:
2752// <x, x, x, ...>
2753// where x is any valid index.
2754//
2755// When undef's appear in the mask they are treated as if they were whatever
2756// value is necessary in order to fit the above form.
2757static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy,
2758 SmallVector<int, 16> Indices,
2759 SelectionDAG &DAG) {
2760 assert((Indices.size() % 2) == 0);
2761
2762 int SplatIndex = -1;
2763 for (const auto &V : Indices) {
2764 if (V != -1) {
2765 SplatIndex = V;
2766 break;
2767 }
2768 }
2769
2770 return fitsRegularPattern<int>(Begin: Indices.begin(), CheckStride: 1, End: Indices.end(), ExpectedIndex: SplatIndex,
2771 ExpectedIndexStride: 0);
2772}
2773
2774// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2775//
2776// ILVEV interleaves the even elements from each vector.
2777//
2778// It is possible to lower into ILVEV when the mask consists of two of the
2779// following forms interleaved:
2780// <0, 2, 4, ...>
2781// <n, n+2, n+4, ...>
2782// where n is the number of elements in the vector.
2783// For example:
2784// <0, 0, 2, 2, 4, 4, ...>
2785// <0, n, 2, n+2, 4, n+4, ...>
2786//
2787// When undef's appear in the mask they are treated as if they were whatever
2788// value is necessary in order to fit the above forms.
2789static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
2790 SmallVector<int, 16> Indices,
2791 SelectionDAG &DAG) {
2792 assert((Indices.size() % 2) == 0);
2793
2794 SDValue Wt;
2795 SDValue Ws;
2796 const auto &Begin = Indices.begin();
2797 const auto &End = Indices.end();
2798
2799 // Check even elements are taken from the even elements of one half or the
2800 // other and pick an operand accordingly.
2801 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2802 Wt = Op->getOperand(Num: 0);
2803 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2804 Wt = Op->getOperand(Num: 1);
2805 else
2806 return SDValue();
2807
2808 // Check odd elements are taken from the even elements of one half or the
2809 // other and pick an operand accordingly.
2810 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2811 Ws = Op->getOperand(Num: 0);
2812 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2813 Ws = Op->getOperand(Num: 1);
2814 else
2815 return SDValue();
2816
2817 return DAG.getNode(Opcode: MipsISD::ILVEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2818}
2819
2820// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2821//
2822// ILVOD interleaves the odd elements from each vector.
2823//
2824// It is possible to lower into ILVOD when the mask consists of two of the
2825// following forms interleaved:
2826// <1, 3, 5, ...>
2827// <n+1, n+3, n+5, ...>
2828// where n is the number of elements in the vector.
2829// For example:
2830// <1, 1, 3, 3, 5, 5, ...>
2831// <1, n+1, 3, n+3, 5, n+5, ...>
2832//
2833// When undef's appear in the mask they are treated as if they were whatever
2834// value is necessary in order to fit the above forms.
2835static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
2836 SmallVector<int, 16> Indices,
2837 SelectionDAG &DAG) {
2838 assert((Indices.size() % 2) == 0);
2839
2840 SDValue Wt;
2841 SDValue Ws;
2842 const auto &Begin = Indices.begin();
2843 const auto &End = Indices.end();
2844
2845 // Check even elements are taken from the odd elements of one half or the
2846 // other and pick an operand accordingly.
2847 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2848 Wt = Op->getOperand(Num: 0);
2849 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2850 Wt = Op->getOperand(Num: 1);
2851 else
2852 return SDValue();
2853
2854 // Check odd elements are taken from the odd elements of one half or the
2855 // other and pick an operand accordingly.
2856 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
2857 Ws = Op->getOperand(Num: 0);
2858 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
2859 Ws = Op->getOperand(Num: 1);
2860 else
2861 return SDValue();
2862
2863 return DAG.getNode(Opcode: MipsISD::ILVOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2864}
2865
2866// Lower VECTOR_SHUFFLE into ILVR (if possible).
2867//
2868// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2869// each vector.
2870//
2871// It is possible to lower into ILVR when the mask consists of two of the
2872// following forms interleaved:
2873// <0, 1, 2, ...>
2874// <n, n+1, n+2, ...>
2875// where n is the number of elements in the vector.
2876// For example:
2877// <0, 0, 1, 1, 2, 2, ...>
2878// <0, n, 1, n+1, 2, n+2, ...>
2879//
2880// When undef's appear in the mask they are treated as if they were whatever
2881// value is necessary in order to fit the above forms.
2882static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
2883 SmallVector<int, 16> Indices,
2884 SelectionDAG &DAG) {
2885 assert((Indices.size() % 2) == 0);
2886
2887 SDValue Wt;
2888 SDValue Ws;
2889 const auto &Begin = Indices.begin();
2890 const auto &End = Indices.end();
2891
2892 // Check even elements are taken from the right (lowest-indexed) elements of
2893 // one half or the other and pick an operand accordingly.
2894 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2895 Wt = Op->getOperand(Num: 0);
2896 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1))
2897 Wt = Op->getOperand(Num: 1);
2898 else
2899 return SDValue();
2900
2901 // Check odd elements are taken from the right (lowest-indexed) elements of
2902 // one half or the other and pick an operand accordingly.
2903 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: 0, ExpectedIndexStride: 1))
2904 Ws = Op->getOperand(Num: 0);
2905 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 1))
2906 Ws = Op->getOperand(Num: 1);
2907 else
2908 return SDValue();
2909
2910 return DAG.getNode(Opcode: MipsISD::ILVR, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2911}
2912
2913// Lower VECTOR_SHUFFLE into ILVL (if possible).
2914//
2915// ILVL interleaves consecutive elements from the left (highest-indexed) half
2916// of each vector.
2917//
2918// It is possible to lower into ILVL when the mask consists of two of the
2919// following forms interleaved:
2920// <x, x+1, x+2, ...>
2921// <n+x, n+x+1, n+x+2, ...>
2922// where n is the number of elements in the vector and x is half n.
2923// For example:
2924// <x, x, x+1, x+1, x+2, x+2, ...>
2925// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2926//
2927// When undef's appear in the mask they are treated as if they were whatever
2928// value is necessary in order to fit the above forms.
2929static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
2930 SmallVector<int, 16> Indices,
2931 SelectionDAG &DAG) {
2932 assert((Indices.size() % 2) == 0);
2933
2934 unsigned HalfSize = Indices.size() / 2;
2935 SDValue Wt;
2936 SDValue Ws;
2937 const auto &Begin = Indices.begin();
2938 const auto &End = Indices.end();
2939
2940 // Check even elements are taken from the left (highest-indexed) elements of
2941 // one half or the other and pick an operand accordingly.
2942 if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2943 Wt = Op->getOperand(Num: 0);
2944 else if (fitsRegularPattern<int>(Begin, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize, ExpectedIndexStride: 1))
2945 Wt = Op->getOperand(Num: 1);
2946 else
2947 return SDValue();
2948
2949 // Check odd elements are taken from the left (highest-indexed) elements of
2950 // one half or the other and pick an operand accordingly.
2951 if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: HalfSize, ExpectedIndexStride: 1))
2952 Ws = Op->getOperand(Num: 0);
2953 else if (fitsRegularPattern<int>(Begin: Begin + 1, CheckStride: 2, End, ExpectedIndex: Indices.size() + HalfSize,
2954 ExpectedIndexStride: 1))
2955 Ws = Op->getOperand(Num: 1);
2956 else
2957 return SDValue();
2958
2959 return DAG.getNode(Opcode: MipsISD::ILVL, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
2960}
2961
2962// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2963//
2964// PCKEV copies the even elements of each vector into the result vector.
2965//
2966// It is possible to lower into PCKEV when the mask consists of two of the
2967// following forms concatenated:
2968// <0, 2, 4, ...>
2969// <n, n+2, n+4, ...>
2970// where n is the number of elements in the vector.
2971// For example:
2972// <0, 2, 4, ..., 0, 2, 4, ...>
2973// <0, 2, 4, ..., n, n+2, n+4, ...>
2974//
2975// When undef's appear in the mask they are treated as if they were whatever
2976// value is necessary in order to fit the above forms.
2977static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
2978 SmallVector<int, 16> Indices,
2979 SelectionDAG &DAG) {
2980 assert((Indices.size() % 2) == 0);
2981
2982 SDValue Wt;
2983 SDValue Ws;
2984 const auto &Begin = Indices.begin();
2985 const auto &Mid = Indices.begin() + Indices.size() / 2;
2986 const auto &End = Indices.end();
2987
2988 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 0, ExpectedIndexStride: 2))
2989 Wt = Op->getOperand(Num: 0);
2990 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2991 Wt = Op->getOperand(Num: 1);
2992 else
2993 return SDValue();
2994
2995 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 0, ExpectedIndexStride: 2))
2996 Ws = Op->getOperand(Num: 0);
2997 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size(), ExpectedIndexStride: 2))
2998 Ws = Op->getOperand(Num: 1);
2999 else
3000 return SDValue();
3001
3002 return DAG.getNode(Opcode: MipsISD::PCKEV, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
3003}
3004
3005// Lower VECTOR_SHUFFLE into PCKOD (if possible).
3006//
3007// PCKOD copies the odd elements of each vector into the result vector.
3008//
3009// It is possible to lower into PCKOD when the mask consists of two of the
3010// following forms concatenated:
3011// <1, 3, 5, ...>
3012// <n+1, n+3, n+5, ...>
3013// where n is the number of elements in the vector.
3014// For example:
3015// <1, 3, 5, ..., 1, 3, 5, ...>
3016// <1, 3, 5, ..., n+1, n+3, n+5, ...>
3017//
3018// When undef's appear in the mask they are treated as if they were whatever
3019// value is necessary in order to fit the above forms.
3020static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
3021 SmallVector<int, 16> Indices,
3022 SelectionDAG &DAG) {
3023 assert((Indices.size() % 2) == 0);
3024
3025 SDValue Wt;
3026 SDValue Ws;
3027 const auto &Begin = Indices.begin();
3028 const auto &Mid = Indices.begin() + Indices.size() / 2;
3029 const auto &End = Indices.end();
3030
3031 if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: 1, ExpectedIndexStride: 2))
3032 Wt = Op->getOperand(Num: 0);
3033 else if (fitsRegularPattern<int>(Begin, CheckStride: 1, End: Mid, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
3034 Wt = Op->getOperand(Num: 1);
3035 else
3036 return SDValue();
3037
3038 if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: 1, ExpectedIndexStride: 2))
3039 Ws = Op->getOperand(Num: 0);
3040 else if (fitsRegularPattern<int>(Begin: Mid, CheckStride: 1, End, ExpectedIndex: Indices.size() + 1, ExpectedIndexStride: 2))
3041 Ws = Op->getOperand(Num: 1);
3042 else
3043 return SDValue();
3044
3045 return DAG.getNode(Opcode: MipsISD::PCKOD, DL: SDLoc(Op), VT: ResTy, N1: Ws, N2: Wt);
3046}
3047
3048// Lower VECTOR_SHUFFLE into VSHF.
3049//
3050// This mostly consists of converting the shuffle indices in Indices into a
3051// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
3052// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
3053// if the type is v8i16 and all the indices are less than 8 then the second
3054// operand is unused and can be replaced with anything. We choose to replace it
3055// with the used operand since this reduces the number of instructions overall.
3056//
3057// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
3058// UNDEFs as same as SPLATI index.
3059// For other instances we use the last valid index if UNDEF is
3060// encountered.
3061static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
3062 const SmallVector<int, 16> &Indices,
3063 const bool isSPLATI,
3064 SelectionDAG &DAG) {
3065 SmallVector<SDValue, 16> Ops;
3066 SDValue Op0;
3067 SDValue Op1;
3068 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
3069 EVT MaskEltTy = MaskVecTy.getVectorElementType();
3070 bool Using1stVec = false;
3071 bool Using2ndVec = false;
3072 SDLoc DL(Op);
3073 int ResTyNumElts = ResTy.getVectorNumElements();
3074
3075 assert(Indices[0] >= 0 &&
3076 "shuffle mask starts with an UNDEF, which is not expected");
3077
3078 for (int i = 0; i < ResTyNumElts; ++i) {
3079 // Idx == -1 means UNDEF
3080 int Idx = Indices[i];
3081
3082 if (0 <= Idx && Idx < ResTyNumElts)
3083 Using1stVec = true;
3084 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
3085 Using2ndVec = true;
3086 }
3087 int LastValidIndex = 0;
3088 for (size_t i = 0; i < Indices.size(); i++) {
3089 int Idx = Indices[i];
3090 if (Idx < 0) {
3091 // Continue using splati index or use the last valid index.
3092 Idx = isSPLATI ? Indices[0] : LastValidIndex;
3093 } else {
3094 LastValidIndex = Idx;
3095 }
3096 Ops.push_back(Elt: DAG.getTargetConstant(Val: Idx, DL, VT: MaskEltTy));
3097 }
3098
3099 SDValue MaskVec = DAG.getBuildVector(VT: MaskVecTy, DL, Ops);
3100
3101 if (Using1stVec && Using2ndVec) {
3102 Op0 = Op->getOperand(Num: 0);
3103 Op1 = Op->getOperand(Num: 1);
3104 } else if (Using1stVec)
3105 Op0 = Op1 = Op->getOperand(Num: 0);
3106 else if (Using2ndVec)
3107 Op0 = Op1 = Op->getOperand(Num: 1);
3108 else
3109 llvm_unreachable("shuffle vector mask references neither vector operand?");
3110
3111 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
3112 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
3113 // VSHF concatenates the vectors in a bitwise fashion:
3114 // <0b00, 0b01> + <0b10, 0b11> ->
3115 // 0b0100 + 0b1110 -> 0b01001110
3116 // <0b10, 0b11, 0b00, 0b01>
3117 // We must therefore swap the operands to get the correct result.
3118 return DAG.getNode(Opcode: MipsISD::VSHF, DL, VT: ResTy, N1: MaskVec, N2: Op1, N3: Op0);
3119}
3120
3121// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
3122// indices in the shuffle.
3123SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3124 SelectionDAG &DAG) const {
3125 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Val&: Op);
3126 EVT ResTy = Op->getValueType(ResNo: 0);
3127
3128 if (!ResTy.is128BitVector())
3129 return SDValue();
3130
3131 int ResTyNumElts = ResTy.getVectorNumElements();
3132 SmallVector<int, 16> Indices;
3133
3134 for (int i = 0; i < ResTyNumElts; ++i)
3135 Indices.push_back(Elt: Node->getMaskElt(Idx: i));
3136
3137 // splati.[bhwd] is preferable to the others but is matched from
3138 // MipsISD::VSHF.
3139 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
3140 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: true, DAG);
3141 SDValue Result;
3142 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3143 return Result;
3144 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3145 return Result;
3146 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3147 return Result;
3148 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3149 return Result;
3150 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3151 return Result;
3152 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3153 return Result;
3154 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3155 return Result;
3156 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, isSPLATI: false, DAG);
3157}
3158
3159MachineBasicBlock *
3160MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3161 MachineBasicBlock *BB) const {
3162 // $bb:
3163 // bposge32_pseudo $vr0
3164 // =>
3165 // $bb:
3166 // bposge32 $tbb
3167 // $fbb:
3168 // li $vr2, 0
3169 // b $sink
3170 // $tbb:
3171 // li $vr1, 1
3172 // $sink:
3173 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3174
3175 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3176 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3177 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3178 DebugLoc DL = MI.getDebugLoc();
3179 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3180 MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB));
3181 MachineFunction *F = BB->getParent();
3182 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3183 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3184 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB);
3185 F->insert(MBBI: It, MBB: FBB);
3186 F->insert(MBBI: It, MBB: TBB);
3187 F->insert(MBBI: It, MBB: Sink);
3188
3189 // Transfer the remainder of BB and its successor edges to Sink.
3190 Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)),
3191 To: BB->end());
3192 Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3193
3194 // Add successors.
3195 BB->addSuccessor(Succ: FBB);
3196 BB->addSuccessor(Succ: TBB);
3197 FBB->addSuccessor(Succ: Sink);
3198 TBB->addSuccessor(Succ: Sink);
3199
3200 // Insert the real bposge32 instruction to $BB.
3201 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32)).addMBB(MBB: TBB);
3202 // Insert the real bposge32c instruction to $BB.
3203 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: Mips::BPOSGE32C_MMR3)).addMBB(MBB: TBB);
3204
3205 // Fill $FBB.
3206 Register VR2 = RegInfo.createVirtualRegister(RegClass: RC);
3207 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR2)
3208 .addReg(RegNo: Mips::ZERO).addImm(Val: 0);
3209 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink);
3210
3211 // Fill $TBB.
3212 Register VR1 = RegInfo.createVirtualRegister(RegClass: RC);
3213 BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: VR1)
3214 .addReg(RegNo: Mips::ZERO).addImm(Val: 1);
3215
3216 // Insert phi function to $Sink.
3217 BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI),
3218 DestReg: MI.getOperand(i: 0).getReg())
3219 .addReg(RegNo: VR2)
3220 .addMBB(MBB: FBB)
3221 .addReg(RegNo: VR1)
3222 .addMBB(MBB: TBB);
3223
3224 MI.eraseFromParent(); // The pseudo instruction is gone now.
3225 return Sink;
3226}
3227
3228MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3229 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3230 // $bb:
3231 // vany_nonzero $rd, $ws
3232 // =>
3233 // $bb:
3234 // bnz.b $ws, $tbb
3235 // b $fbb
3236 // $fbb:
3237 // li $rd1, 0
3238 // b $sink
3239 // $tbb:
3240 // li $rd2, 1
3241 // $sink:
3242 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3243
3244 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3245 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3246 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3247 DebugLoc DL = MI.getDebugLoc();
3248 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3249 MachineFunction::iterator It = std::next(x: MachineFunction::iterator(BB));
3250 MachineFunction *F = BB->getParent();
3251 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3252 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
3253 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(BB: LLVM_BB);
3254 F->insert(MBBI: It, MBB: FBB);
3255 F->insert(MBBI: It, MBB: TBB);
3256 F->insert(MBBI: It, MBB: Sink);
3257
3258 // Transfer the remainder of BB and its successor edges to Sink.
3259 Sink->splice(Where: Sink->begin(), Other: BB, From: std::next(x: MachineBasicBlock::iterator(MI)),
3260 To: BB->end());
3261 Sink->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
3262
3263 // Add successors.
3264 BB->addSuccessor(Succ: FBB);
3265 BB->addSuccessor(Succ: TBB);
3266 FBB->addSuccessor(Succ: Sink);
3267 TBB->addSuccessor(Succ: Sink);
3268
3269 // Insert the real bnz.b instruction to $BB.
3270 BuildMI(BB, MIMD: DL, MCID: TII->get(Opcode: BranchOp))
3271 .addReg(RegNo: MI.getOperand(i: 1).getReg())
3272 .addMBB(MBB: TBB);
3273
3274 // Fill $FBB.
3275 Register RD1 = RegInfo.createVirtualRegister(RegClass: RC);
3276 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD1)
3277 .addReg(RegNo: Mips::ZERO).addImm(Val: 0);
3278 BuildMI(BB&: *FBB, I: FBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::B)).addMBB(MBB: Sink);
3279
3280 // Fill $TBB.
3281 Register RD2 = RegInfo.createVirtualRegister(RegClass: RC);
3282 BuildMI(BB&: *TBB, I: TBB->end(), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: RD2)
3283 .addReg(RegNo: Mips::ZERO).addImm(Val: 1);
3284
3285 // Insert phi function to $Sink.
3286 BuildMI(BB&: *Sink, I: Sink->begin(), MIMD: DL, MCID: TII->get(Opcode: Mips::PHI),
3287 DestReg: MI.getOperand(i: 0).getReg())
3288 .addReg(RegNo: RD1)
3289 .addMBB(MBB: FBB)
3290 .addReg(RegNo: RD2)
3291 .addMBB(MBB: TBB);
3292
3293 MI.eraseFromParent(); // The pseudo instruction is gone now.
3294 return Sink;
3295}
3296
3297// Emit the COPY_FW pseudo instruction.
3298//
3299// copy_fw_pseudo $fd, $ws, n
3300// =>
3301// copy_u_w $rt, $ws, $n
3302// mtc1 $rt, $fd
3303//
3304// When n is zero, the equivalent operation can be performed with (potentially)
3305// zero instructions due to register overlaps. This optimization is never valid
3306// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3307MachineBasicBlock *
3308MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3309 MachineBasicBlock *BB) const {
3310 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3311 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3312 DebugLoc DL = MI.getDebugLoc();
3313 Register Fd = MI.getOperand(i: 0).getReg();
3314 Register Ws = MI.getOperand(i: 1).getReg();
3315 unsigned Lane = MI.getOperand(i: 2).getImm();
3316
3317 if (Lane == 0) {
3318 unsigned Wt = Ws;
3319 if (!Subtarget.useOddSPReg()) {
3320 // We must copy to an even-numbered MSA register so that the
3321 // single-precision sub-register is also guaranteed to be even-numbered.
3322 Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WEvensRegClass);
3323
3324 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Wt).addReg(RegNo: Ws);
3325 }
3326
3327 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_lo);
3328 } else {
3329 Register Wt = RegInfo.createVirtualRegister(
3330 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3331 : &Mips::MSA128WEvensRegClass);
3332
3333 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: Lane);
3334 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_lo);
3335 }
3336
3337 MI.eraseFromParent(); // The pseudo instruction is gone now.
3338 return BB;
3339}
3340
3341// Emit the COPY_FD pseudo instruction.
3342//
3343// copy_fd_pseudo $fd, $ws, n
3344// =>
3345// splati.d $wt, $ws, $n
3346// copy $fd, $wt:sub_64
3347//
3348// When n is zero, the equivalent operation can be performed with (potentially)
3349// zero instructions due to register overlaps. This optimization is always
3350// valid because FR=1 mode which is the only supported mode in MSA.
3351MachineBasicBlock *
3352MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3353 MachineBasicBlock *BB) const {
3354 assert(Subtarget.isFP64bit());
3355
3356 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3357 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3358 Register Fd = MI.getOperand(i: 0).getReg();
3359 Register Ws = MI.getOperand(i: 1).getReg();
3360 unsigned Lane = MI.getOperand(i: 2).getImm() * 2;
3361 DebugLoc DL = MI.getDebugLoc();
3362
3363 if (Lane == 0)
3364 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Ws, Flags: {}, SubReg: Mips::sub_64);
3365 else {
3366 Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3367
3368 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wt).addReg(RegNo: Ws).addImm(Val: 1);
3369 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Fd).addReg(RegNo: Wt, Flags: {}, SubReg: Mips::sub_64);
3370 }
3371
3372 MI.eraseFromParent(); // The pseudo instruction is gone now.
3373 return BB;
3374}
3375
3376// Emit the INSERT_FW pseudo instruction.
3377//
3378// insert_fw_pseudo $wd, $wd_in, $n, $fs
3379// =>
3380// subreg_to_reg $wt:sub_lo, $fs
3381// insve_w $wd[$n], $wd_in, $wt[0]
3382MachineBasicBlock *
3383MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3384 MachineBasicBlock *BB) const {
3385 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3386 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3387 DebugLoc DL = MI.getDebugLoc();
3388 Register Wd = MI.getOperand(i: 0).getReg();
3389 Register Wd_in = MI.getOperand(i: 1).getReg();
3390 unsigned Lane = MI.getOperand(i: 2).getImm();
3391 Register Fs = MI.getOperand(i: 3).getReg();
3392 Register Wt = RegInfo.createVirtualRegister(
3393 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3394 : &Mips::MSA128WEvensRegClass);
3395
3396 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3397 .addReg(RegNo: Fs)
3398 .addImm(Val: Mips::sub_lo);
3399 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_W), DestReg: Wd)
3400 .addReg(RegNo: Wd_in)
3401 .addImm(Val: Lane)
3402 .addReg(RegNo: Wt)
3403 .addImm(Val: 0);
3404
3405 MI.eraseFromParent(); // The pseudo instruction is gone now.
3406 return BB;
3407}
3408
3409// Emit the INSERT_FD pseudo instruction.
3410//
3411// insert_fd_pseudo $wd, $fs, n
3412// =>
3413// subreg_to_reg $wt:sub_64, $fs
3414// insve_d $wd[$n], $wd_in, $wt[0]
3415MachineBasicBlock *
3416MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3417 MachineBasicBlock *BB) const {
3418 assert(Subtarget.isFP64bit());
3419
3420 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3421 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3422 DebugLoc DL = MI.getDebugLoc();
3423 Register Wd = MI.getOperand(i: 0).getReg();
3424 Register Wd_in = MI.getOperand(i: 1).getReg();
3425 unsigned Lane = MI.getOperand(i: 2).getImm();
3426 Register Fs = MI.getOperand(i: 3).getReg();
3427 Register Wt = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3428
3429 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3430 .addReg(RegNo: Fs)
3431 .addImm(Val: Mips::sub_64);
3432 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSVE_D), DestReg: Wd)
3433 .addReg(RegNo: Wd_in)
3434 .addImm(Val: Lane)
3435 .addReg(RegNo: Wt)
3436 .addImm(Val: 0);
3437
3438 MI.eraseFromParent(); // The pseudo instruction is gone now.
3439 return BB;
3440}
3441
3442// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3443//
3444// For integer:
3445// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3446// =>
3447// (SLL $lanetmp1, $lane, <log2size)
3448// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3449// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3450// (NEG $lanetmp2, $lanetmp1)
3451// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3452//
3453// For floating point:
3454// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3455// =>
3456// (SUBREG_TO_REG $wt, $fs, <subreg>)
3457// (SLL $lanetmp1, $lane, <log2size)
3458// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3459// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3460// (NEG $lanetmp2, $lanetmp1)
3461// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3462MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3463 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3464 bool IsFP) const {
3465 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3466 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3467 DebugLoc DL = MI.getDebugLoc();
3468 Register Wd = MI.getOperand(i: 0).getReg();
3469 Register SrcVecReg = MI.getOperand(i: 1).getReg();
3470 Register LaneReg = MI.getOperand(i: 2).getReg();
3471 Register SrcValReg = MI.getOperand(i: 3).getReg();
3472
3473 const TargetRegisterClass *VecRC = nullptr;
3474 // FIXME: This should be true for N32 too.
3475 const TargetRegisterClass *GPRRC =
3476 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3477 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3478 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3479 unsigned EltLog2Size;
3480 unsigned InsertOp = 0;
3481 unsigned InsveOp = 0;
3482 switch (EltSizeInBytes) {
3483 default:
3484 llvm_unreachable("Unexpected size");
3485 case 1:
3486 EltLog2Size = 0;
3487 InsertOp = Mips::INSERT_B;
3488 InsveOp = Mips::INSVE_B;
3489 VecRC = &Mips::MSA128BRegClass;
3490 break;
3491 case 2:
3492 EltLog2Size = 1;
3493 InsertOp = Mips::INSERT_H;
3494 InsveOp = Mips::INSVE_H;
3495 VecRC = &Mips::MSA128HRegClass;
3496 break;
3497 case 4:
3498 EltLog2Size = 2;
3499 InsertOp = Mips::INSERT_W;
3500 InsveOp = Mips::INSVE_W;
3501 VecRC = &Mips::MSA128WRegClass;
3502 break;
3503 case 8:
3504 EltLog2Size = 3;
3505 InsertOp = Mips::INSERT_D;
3506 InsveOp = Mips::INSVE_D;
3507 VecRC = &Mips::MSA128DRegClass;
3508 break;
3509 }
3510
3511 if (IsFP) {
3512 Register Wt = RegInfo.createVirtualRegister(RegClass: VecRC);
3513 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Wt)
3514 .addReg(RegNo: SrcValReg)
3515 .addImm(Val: EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3516 SrcValReg = Wt;
3517 }
3518
3519 // Convert the lane index into a byte index
3520 if (EltSizeInBytes != 1) {
3521 Register LaneTmp1 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3522 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: ShiftOp), DestReg: LaneTmp1)
3523 .addReg(RegNo: LaneReg)
3524 .addImm(Val: EltLog2Size);
3525 LaneReg = LaneTmp1;
3526 }
3527
3528 // Rotate bytes around so that the desired lane is element zero
3529 Register WdTmp1 = RegInfo.createVirtualRegister(RegClass: VecRC);
3530 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: WdTmp1)
3531 .addReg(RegNo: SrcVecReg)
3532 .addReg(RegNo: SrcVecReg)
3533 .addReg(RegNo: LaneReg, Flags: {}, SubReg: SubRegIdx);
3534
3535 Register WdTmp2 = RegInfo.createVirtualRegister(RegClass: VecRC);
3536 if (IsFP) {
3537 // Use insve.df to insert to element zero
3538 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsveOp), DestReg: WdTmp2)
3539 .addReg(RegNo: WdTmp1)
3540 .addImm(Val: 0)
3541 .addReg(RegNo: SrcValReg)
3542 .addImm(Val: 0);
3543 } else {
3544 // Use insert.df to insert to element zero
3545 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: InsertOp), DestReg: WdTmp2)
3546 .addReg(RegNo: WdTmp1)
3547 .addReg(RegNo: SrcValReg)
3548 .addImm(Val: 0);
3549 }
3550
3551 // Rotate elements the rest of the way for a full rotation.
3552 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3553 // the lane index to do this.
3554 Register LaneTmp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3555 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3556 DestReg: LaneTmp2)
3557 .addReg(RegNo: Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3558 .addReg(RegNo: LaneReg);
3559 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SLD_B), DestReg: Wd)
3560 .addReg(RegNo: WdTmp2)
3561 .addReg(RegNo: WdTmp2)
3562 .addReg(RegNo: LaneTmp2, Flags: {}, SubReg: SubRegIdx);
3563
3564 MI.eraseFromParent(); // The pseudo instruction is gone now.
3565 return BB;
3566}
3567
3568// Emit the FILL_FW pseudo instruction.
3569//
3570// fill_fw_pseudo $wd, $fs
3571// =>
3572// implicit_def $wt1
3573// insert_subreg $wt2:subreg_lo, $wt1, $fs
3574// splati.w $wd, $wt2[0]
3575MachineBasicBlock *
3576MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3577 MachineBasicBlock *BB) const {
3578 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3579 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3580 DebugLoc DL = MI.getDebugLoc();
3581 Register Wd = MI.getOperand(i: 0).getReg();
3582 Register Fs = MI.getOperand(i: 1).getReg();
3583 Register Wt1 = RegInfo.createVirtualRegister(
3584 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3585 : &Mips::MSA128WEvensRegClass);
3586 Register Wt2 = RegInfo.createVirtualRegister(
3587 RegClass: Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3588 : &Mips::MSA128WEvensRegClass);
3589
3590 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1);
3591 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2)
3592 .addReg(RegNo: Wt1)
3593 .addReg(RegNo: Fs)
3594 .addImm(Val: Mips::sub_lo);
3595 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_W), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0);
3596
3597 MI.eraseFromParent(); // The pseudo instruction is gone now.
3598 return BB;
3599}
3600
3601// Emit the FILL_FD pseudo instruction.
3602//
3603// fill_fd_pseudo $wd, $fs
3604// =>
3605// implicit_def $wt1
3606// insert_subreg $wt2:subreg_64, $wt1, $fs
3607// splati.d $wd, $wt2[0]
3608MachineBasicBlock *
3609MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3610 MachineBasicBlock *BB) const {
3611 assert(Subtarget.isFP64bit());
3612
3613 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3614 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3615 DebugLoc DL = MI.getDebugLoc();
3616 Register Wd = MI.getOperand(i: 0).getReg();
3617 Register Fs = MI.getOperand(i: 1).getReg();
3618 Register Wt1 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3619 Register Wt2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3620
3621 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::IMPLICIT_DEF), DestReg: Wt1);
3622 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_SUBREG), DestReg: Wt2)
3623 .addReg(RegNo: Wt1)
3624 .addReg(RegNo: Fs)
3625 .addImm(Val: Mips::sub_64);
3626 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SPLATI_D), DestReg: Wd).addReg(RegNo: Wt2).addImm(Val: 0);
3627
3628 MI.eraseFromParent(); // The pseudo instruction is gone now.
3629 return BB;
3630}
3631
3632// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3633// register.
3634//
3635// STF16 MSA128F16:$wd, mem_simm10:$addr
3636// =>
3637// copy_u.h $rtemp,$wd[0]
3638// sh $rtemp, $addr
3639//
3640// Safety: We can't use st.h & co as they would over write the memory after
3641// the destination. It would require half floats be allocated 16 bytes(!) of
3642// space.
3643MachineBasicBlock *
3644MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3645 MachineBasicBlock *BB) const {
3646
3647 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3648 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3649 DebugLoc DL = MI.getDebugLoc();
3650 Register Ws = MI.getOperand(i: 0).getReg();
3651 Register Rt = MI.getOperand(i: 1).getReg();
3652 const MachineMemOperand &MMO = **MI.memoperands_begin();
3653 unsigned Imm = MMO.getOffset();
3654
3655 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3656 // spill and reload can expand as a GPR64 operand. Examine the
3657 // operand in detail and default to ABI.
3658 const TargetRegisterClass *RC =
3659 MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg())
3660 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3661 : &Mips::GPR64RegClass);
3662 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3663 Register Rs = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass);
3664
3665 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_U_H), DestReg: Rs).addReg(RegNo: Ws).addImm(Val: 0);
3666 if(!UsingMips32) {
3667 Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR64RegClass);
3668 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::SUBREG_TO_REG), DestReg: Tmp)
3669 .addReg(RegNo: Rs)
3670 .addImm(Val: Mips::sub_32);
3671 Rs = Tmp;
3672 }
3673 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::SH : Mips::SH64))
3674 .addReg(RegNo: Rs)
3675 .addReg(RegNo: Rt)
3676 .addImm(Val: Imm)
3677 .addMemOperand(MMO: BB->getParent()->getMachineMemOperand(
3678 MMO: &MMO, Offset: MMO.getOffset(), Size: MMO.getSize()));
3679
3680 MI.eraseFromParent();
3681 return BB;
3682}
3683
3684// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3685//
3686// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3687// =>
3688// lh $rtemp, $addr
3689// fill.h $wd, $rtemp
3690//
3691// Safety: We can't use ld.h & co as they over-read from the source.
3692// Additionally, if the address is not modulo 16, 2 cases can occur:
3693// a) Segmentation fault as the load instruction reads from a memory page
3694// memory it's not supposed to.
3695// b) The load crosses an implementation specific boundary, requiring OS
3696// intervention.
3697MachineBasicBlock *
3698MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3699 MachineBasicBlock *BB) const {
3700
3701 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3702 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3703 DebugLoc DL = MI.getDebugLoc();
3704 Register Wd = MI.getOperand(i: 0).getReg();
3705
3706 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3707 // spill and reload can expand as a GPR64 operand. Examine the
3708 // operand in detail and default to ABI.
3709 const TargetRegisterClass *RC =
3710 MI.getOperand(i: 1).isReg() ? RegInfo.getRegClass(Reg: MI.getOperand(i: 1).getReg())
3711 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3712 : &Mips::GPR64RegClass);
3713
3714 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3715 Register Rt = RegInfo.createVirtualRegister(RegClass: RC);
3716
3717 MachineInstrBuilder MIB =
3718 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: UsingMips32 ? Mips::LH : Mips::LH64), DestReg: Rt);
3719 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands()))
3720 MIB.add(MO);
3721
3722 if(!UsingMips32) {
3723 Register Tmp = RegInfo.createVirtualRegister(RegClass: &Mips::GPR32RegClass);
3724 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY), DestReg: Tmp)
3725 .addReg(RegNo: Rt, Flags: {}, SubReg: Mips::sub_32);
3726 Rt = Tmp;
3727 }
3728
3729 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FILL_H), DestReg: Wd).addReg(RegNo: Rt);
3730
3731 MI.eraseFromParent();
3732 return BB;
3733}
3734
3735// Emit the FPROUND_PSEUDO instruction.
3736//
3737// Round an FGR64Opnd, FGR32Opnd to an f16.
3738//
3739// Safety: Cycle the operand through the GPRs so the result always ends up
3740// the correct MSA register.
3741//
3742// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3743// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3744// (which they can be, as the MSA registers are defined to alias the
3745// FPU's 64 bit and 32 bit registers) the result can be accessed using
3746// the correct register class. That requires operands be tie-able across
3747// register classes which have a sub/super register class relationship.
3748//
3749// For FPG32Opnd:
3750//
3751// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3752// =>
3753// mfc1 $rtemp, $fs
3754// fill.w $rtemp, $wtemp
3755// fexdo.w $wd, $wtemp, $wtemp
3756//
3757// For FPG64Opnd on mips32r2+:
3758//
3759// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3760// =>
3761// mfc1 $rtemp, $fs
3762// fill.w $rtemp, $wtemp
3763// mfhc1 $rtemp2, $fs
3764// insert.w $wtemp[1], $rtemp2
3765// insert.w $wtemp[3], $rtemp2
3766// fexdo.w $wtemp2, $wtemp, $wtemp
3767// fexdo.h $wd, $temp2, $temp2
3768//
3769// For FGR64Opnd on mips64r2+:
3770//
3771// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3772// =>
3773// dmfc1 $rtemp, $fs
3774// fill.d $rtemp, $wtemp
3775// fexdo.w $wtemp2, $wtemp, $wtemp
3776// fexdo.h $wd, $wtemp2, $wtemp2
3777//
3778// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3779// undef bits are "just right" and the exception enable bits are
3780// set. By using fill.w to replicate $fs into all elements over
3781// insert.w for one element, we avoid that potiential case. If
3782// fexdo.[hw] causes an exception in, the exception is valid and it
3783// occurs for all elements.
3784MachineBasicBlock *
3785MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3786 MachineBasicBlock *BB,
3787 bool IsFGR64) const {
3788
3789 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3790 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3791 // it.
3792 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3793
3794 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3795 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3796
3797 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3798 DebugLoc DL = MI.getDebugLoc();
3799 Register Wd = MI.getOperand(i: 0).getReg();
3800 Register Fs = MI.getOperand(i: 1).getReg();
3801
3802 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3803 Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3804 const TargetRegisterClass *GPRRC =
3805 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3806 unsigned MFC1Opc = IsFGR64onMips64
3807 ? Mips::DMFC1
3808 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3809 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3810
3811 // Perform the register class copy as mentioned above.
3812 Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC);
3813 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MFC1Opc), DestReg: Rtemp).addReg(RegNo: Fs);
3814 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: FILLOpc), DestReg: Wtemp).addReg(RegNo: Rtemp);
3815 unsigned WPHI = Wtemp;
3816
3817 if (IsFGR64onMips32) {
3818 Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3819 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MFHC1_D64), DestReg: Rtemp2).addReg(RegNo: Fs);
3820 Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3821 Register Wtemp3 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3822 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp2)
3823 .addReg(RegNo: Wtemp)
3824 .addReg(RegNo: Rtemp2)
3825 .addImm(Val: 1);
3826 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::INSERT_W), DestReg: Wtemp3)
3827 .addReg(RegNo: Wtemp2)
3828 .addReg(RegNo: Rtemp2)
3829 .addImm(Val: 3);
3830 WPHI = Wtemp3;
3831 }
3832
3833 if (IsFGR64) {
3834 Register Wtemp2 = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3835 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_W), DestReg: Wtemp2)
3836 .addReg(RegNo: WPHI)
3837 .addReg(RegNo: WPHI);
3838 WPHI = Wtemp2;
3839 }
3840
3841 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXDO_H), DestReg: Wd).addReg(RegNo: WPHI).addReg(RegNo: WPHI);
3842
3843 MI.eraseFromParent();
3844 return BB;
3845}
3846
3847// Emit the FPEXTEND_PSEUDO instruction.
3848//
3849// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3850//
3851// Safety: Cycle the result through the GPRs so the result always ends up
3852// the correct floating point register.
3853//
3854// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3855// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3856// (which they can be, as the MSA registers are defined to alias the
3857// FPU's 64 bit and 32 bit registers) the result can be accessed using
3858// the correct register class. That requires operands be tie-able across
3859// register classes which have a sub/super register class relationship. I
3860// haven't checked.
3861//
3862// For FGR32Opnd:
3863//
3864// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3865// =>
3866// fexupr.w $wtemp, $ws
3867// copy_s.w $rtemp, $ws[0]
3868// mtc1 $rtemp, $fd
3869//
3870// For FGR64Opnd on Mips64:
3871//
3872// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3873// =>
3874// fexupr.w $wtemp, $ws
3875// fexupr.d $wtemp2, $wtemp
3876// copy_s.d $rtemp, $wtemp2s[0]
3877// dmtc1 $rtemp, $fd
3878//
3879// For FGR64Opnd on Mips32:
3880//
3881// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3882// =>
3883// fexupr.w $wtemp, $ws
3884// fexupr.d $wtemp2, $wtemp
3885// copy_s.w $rtemp, $wtemp2[0]
3886// mtc1 $rtemp, $ftemp
3887// copy_s.w $rtemp2, $wtemp2[1]
3888// $fd = mthc1 $rtemp2, $ftemp
3889MachineBasicBlock *
3890MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3891 MachineBasicBlock *BB,
3892 bool IsFGR64) const {
3893
3894 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3895 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3896 // it.
3897 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3898
3899 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3900 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3901
3902 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3903 DebugLoc DL = MI.getDebugLoc();
3904 Register Fd = MI.getOperand(i: 0).getReg();
3905 Register Ws = MI.getOperand(i: 1).getReg();
3906
3907 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3908 const TargetRegisterClass *GPRRC =
3909 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3910 unsigned MTC1Opc = IsFGR64onMips64
3911 ? Mips::DMTC1
3912 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3913 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3914
3915 Register Wtemp = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128WRegClass);
3916 Register WPHI = Wtemp;
3917
3918 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_W), DestReg: Wtemp).addReg(RegNo: Ws);
3919 if (IsFGR64) {
3920 WPHI = RegInfo.createVirtualRegister(RegClass: &Mips::MSA128DRegClass);
3921 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXUPR_D), DestReg: WPHI).addReg(RegNo: Wtemp);
3922 }
3923
3924 // Perform the safety regclass copy mentioned above.
3925 Register Rtemp = RegInfo.createVirtualRegister(RegClass: GPRRC);
3926 Register FPRPHI = IsFGR64onMips32
3927 ? RegInfo.createVirtualRegister(RegClass: &Mips::FGR64RegClass)
3928 : Fd;
3929 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: COPYOpc), DestReg: Rtemp).addReg(RegNo: WPHI).addImm(Val: 0);
3930 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: MTC1Opc), DestReg: FPRPHI).addReg(RegNo: Rtemp);
3931
3932 if (IsFGR64onMips32) {
3933 Register Rtemp2 = RegInfo.createVirtualRegister(RegClass: GPRRC);
3934 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::COPY_S_W), DestReg: Rtemp2)
3935 .addReg(RegNo: WPHI)
3936 .addImm(Val: 1);
3937 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::MTHC1_D64), DestReg: Fd)
3938 .addReg(RegNo: FPRPHI)
3939 .addReg(RegNo: Rtemp2);
3940 }
3941
3942 MI.eraseFromParent();
3943 return BB;
3944}
3945
3946// Emit the FEXP2_W_1 pseudo instructions.
3947//
3948// fexp2_w_1_pseudo $wd, $wt
3949// =>
3950// ldi.w $ws, 1
3951// fexp2.w $wd, $ws, $wt
3952MachineBasicBlock *
3953MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3954 MachineBasicBlock *BB) const {
3955 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3956 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3957 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3958 Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC);
3959 Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC);
3960 DebugLoc DL = MI.getDebugLoc();
3961
3962 // Splat 1.0 into a vector
3963 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_W), DestReg: Ws1).addImm(Val: 1);
3964 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_W), DestReg: Ws2).addReg(RegNo: Ws1);
3965
3966 // Emit 1.0 * fexp2(Wt)
3967 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_W), DestReg: MI.getOperand(i: 0).getReg())
3968 .addReg(RegNo: Ws2)
3969 .addReg(RegNo: MI.getOperand(i: 1).getReg());
3970
3971 MI.eraseFromParent(); // The pseudo instruction is gone now.
3972 return BB;
3973}
3974
3975// Emit the FEXP2_D_1 pseudo instructions.
3976//
3977// fexp2_d_1_pseudo $wd, $wt
3978// =>
3979// ldi.d $ws, 1
3980// fexp2.d $wd, $ws, $wt
3981MachineBasicBlock *
3982MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3983 MachineBasicBlock *BB) const {
3984 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3985 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3986 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3987 Register Ws1 = RegInfo.createVirtualRegister(RegClass: RC);
3988 Register Ws2 = RegInfo.createVirtualRegister(RegClass: RC);
3989 DebugLoc DL = MI.getDebugLoc();
3990
3991 // Splat 1.0 into a vector
3992 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::LDI_D), DestReg: Ws1).addImm(Val: 1);
3993 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FFINT_U_D), DestReg: Ws2).addReg(RegNo: Ws1);
3994
3995 // Emit 1.0 * fexp2(Wt)
3996 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Mips::FEXP2_D), DestReg: MI.getOperand(i: 0).getReg())
3997 .addReg(RegNo: Ws2)
3998 .addReg(RegNo: MI.getOperand(i: 1).getReg());
3999
4000 MI.eraseFromParent(); // The pseudo instruction is gone now.
4001 return BB;
4002}
4003