1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/ValueTypes.h"
38#include "llvm/CodeGenTypes/MachineValueType.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
81 MutableArrayRef<SDValue> Results);
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandSELECT(SDNode *Node);
142 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
143 SDValue ExpandStore(SDNode *N);
144 SDValue ExpandFNEG(SDNode *Node);
145 SDValue ExpandFABS(SDNode *Node);
146 SDValue ExpandFCOPYSIGN(SDNode *Node);
147 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 SDValue ExpandBITREVERSE(SDNode *Node);
150 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
151 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156
157 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
158 SmallVectorImpl<SDValue> &Results);
159 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32,
160 RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
161 RTLIB::Libcall Call_F128,
162 RTLIB::Libcall Call_PPCF128,
163 SmallVectorImpl<SDValue> &Results);
164
165 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
166
167 /// Implements vector promotion.
168 ///
169 /// This is essentially just bitcasting the operands to a different type and
170 /// bitcasting the result back to the original type.
171 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
172
173 /// Implements [SU]INT_TO_FP vector promotion.
174 ///
175 /// This is a [zs]ext of the input operand to a larger integer type.
176 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
177
178 /// Implements FP_TO_[SU]INT vector promotion of the result type.
179 ///
180 /// It is promoted to a larger integer type. The result is then
181 /// truncated back to the original type.
182 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
183
184 /// Implements vector setcc operation promotion.
185 ///
186 /// All vector operands are promoted to a vector type with larger element
187 /// type.
188 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
189
190 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
191
192 /// Calculate the reduction using a type of higher precision and round the
193 /// result to match the original type. Setting NonArithmetic signifies the
194 /// rounding of the result does not affect its value.
195 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
196 bool NonArithmetic);
197
198public:
199 VectorLegalizer(SelectionDAG& dag) :
200 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
201
202 /// Begin legalizer the vector operations in the DAG.
203 bool Run();
204};
205
206} // end anonymous namespace
207
208bool VectorLegalizer::Run() {
209 // Before we start legalizing vector nodes, check if there are any vectors.
210 bool HasVectors = false;
211 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
212 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) {
213 // Check if the values of the nodes contain vectors. We don't need to check
214 // the operands because we are going to check their values at some point.
215 HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); });
216
217 // If we found a vector node we can start the legalization.
218 if (HasVectors)
219 break;
220 }
221
222 // If this basic block has no vectors then no need to legalize vectors.
223 if (!HasVectors)
224 return false;
225
226 // The legalize process is inherently a bottom-up recursive process (users
227 // legalize their uses before themselves). Given infinite stack space, we
228 // could just start legalizing on the root and traverse the whole graph. In
229 // practice however, this causes us to run out of stack space on large basic
230 // blocks. To avoid this problem, compute an ordering of the nodes where each
231 // node is only legalized after all of its operands are legalized.
232 DAG.AssignTopologicalOrder();
233 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
234 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I)
235 LegalizeOp(Op: SDValue(&*I, 0));
236
237 // Finally, it's possible the root changed. Get the new root.
238 SDValue OldRoot = DAG.getRoot();
239 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
240 DAG.setRoot(LegalizedNodes[OldRoot]);
241
242 LegalizedNodes.clear();
243
244 // Remove dead nodes now.
245 DAG.RemoveDeadNodes();
246
247 return Changed;
248}
249
250SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
251 assert(Op->getNumValues() == Result->getNumValues() &&
252 "Unexpected number of results");
253 // Generic legalization: just pass the operand through.
254 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
255 AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i));
256 return SDValue(Result, Op.getResNo());
257}
258
259SDValue
260VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
261 MutableArrayRef<SDValue> Results) {
262 assert(Results.size() == Op->getNumValues() &&
263 "Unexpected number of results");
264 // Make sure that the generated code is itself legal.
265 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
266 Results[i] = LegalizeOp(Op: Results[i]);
267 AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]);
268 }
269
270 return Results[Op.getResNo()];
271}
272
273SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
274 // Note that LegalizeOp may be reentered even from single-use nodes, which
275 // means that we always must cache transformed nodes.
276 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op);
277 if (I != LegalizedNodes.end()) return I->second;
278
279 // Legalize the operands
280 SmallVector<SDValue, 8> Ops;
281 for (const SDValue &Oper : Op->op_values())
282 Ops.push_back(Elt: LegalizeOp(Op: Oper));
283
284 SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops);
285
286 bool HasVectorValueOrOp =
287 llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) ||
288 llvm::any_of(Range: Node->op_values(),
289 P: [](SDValue O) { return O.getValueType().isVector(); });
290 if (!HasVectorValueOrOp)
291 return TranslateLegalizeResults(Op, Result: Node);
292
293 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
294 EVT ValVT;
295 switch (Op.getOpcode()) {
296 default:
297 return TranslateLegalizeResults(Op, Result: Node);
298 case ISD::LOAD: {
299 LoadSDNode *LD = cast<LoadSDNode>(Val: Node);
300 ISD::LoadExtType ExtType = LD->getExtensionType();
301 EVT LoadedVT = LD->getMemoryVT();
302 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
303 Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT);
304 break;
305 }
306 case ISD::STORE: {
307 StoreSDNode *ST = cast<StoreSDNode>(Val: Node);
308 EVT StVT = ST->getMemoryVT();
309 MVT ValVT = ST->getValue().getSimpleValueType();
310 if (StVT.isVector() && ST->isTruncatingStore())
311 Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT);
312 break;
313 }
314 case ISD::MERGE_VALUES:
315 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
316 // This operation lies about being legal: when it claims to be legal,
317 // it should actually be expanded.
318 if (Action == TargetLowering::Legal)
319 Action = TargetLowering::Expand;
320 break;
321#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
322 case ISD::STRICT_##DAGN:
323#include "llvm/IR/ConstrainedOps.def"
324 ValVT = Node->getValueType(ResNo: 0);
325 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
326 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
327 ValVT = Node->getOperand(Num: 1).getValueType();
328 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
329 Op.getOpcode() == ISD::STRICT_FSETCCS) {
330 MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType();
331 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get();
332 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
333 if (Action == TargetLowering::Legal)
334 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
335 } else {
336 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT);
337 }
338 // If we're asked to expand a strict vector floating-point operation,
339 // by default we're going to simply unroll it. That is usually the
340 // best approach, except in the case where the resulting strict (scalar)
341 // operations would themselves use the fallback mutation to non-strict.
342 // In that specific case, just do the fallback on the vector op.
343 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
344 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) ==
345 TargetLowering::Legal) {
346 EVT EltVT = ValVT.getVectorElementType();
347 if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT)
348 == TargetLowering::Expand &&
349 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT)
350 == TargetLowering::Legal)
351 Action = TargetLowering::Legal;
352 }
353 break;
354 case ISD::ADD:
355 case ISD::SUB:
356 case ISD::MUL:
357 case ISD::MULHS:
358 case ISD::MULHU:
359 case ISD::SDIV:
360 case ISD::UDIV:
361 case ISD::SREM:
362 case ISD::UREM:
363 case ISD::SDIVREM:
364 case ISD::UDIVREM:
365 case ISD::FADD:
366 case ISD::FSUB:
367 case ISD::FMUL:
368 case ISD::FDIV:
369 case ISD::FREM:
370 case ISD::AND:
371 case ISD::OR:
372 case ISD::XOR:
373 case ISD::SHL:
374 case ISD::SRA:
375 case ISD::SRL:
376 case ISD::FSHL:
377 case ISD::FSHR:
378 case ISD::ROTL:
379 case ISD::ROTR:
380 case ISD::ABS:
381 case ISD::ABDS:
382 case ISD::ABDU:
383 case ISD::AVGCEILS:
384 case ISD::AVGCEILU:
385 case ISD::AVGFLOORS:
386 case ISD::AVGFLOORU:
387 case ISD::BSWAP:
388 case ISD::BITREVERSE:
389 case ISD::CTLZ:
390 case ISD::CTTZ:
391 case ISD::CTLZ_ZERO_UNDEF:
392 case ISD::CTTZ_ZERO_UNDEF:
393 case ISD::CTPOP:
394 case ISD::SELECT:
395 case ISD::VSELECT:
396 case ISD::SELECT_CC:
397 case ISD::ZERO_EXTEND:
398 case ISD::ANY_EXTEND:
399 case ISD::TRUNCATE:
400 case ISD::SIGN_EXTEND:
401 case ISD::FP_TO_SINT:
402 case ISD::FP_TO_UINT:
403 case ISD::FNEG:
404 case ISD::FABS:
405 case ISD::FMINNUM:
406 case ISD::FMAXNUM:
407 case ISD::FMINNUM_IEEE:
408 case ISD::FMAXNUM_IEEE:
409 case ISD::FMINIMUM:
410 case ISD::FMAXIMUM:
411 case ISD::FMINIMUMNUM:
412 case ISD::FMAXIMUMNUM:
413 case ISD::FCOPYSIGN:
414 case ISD::FSQRT:
415 case ISD::FSIN:
416 case ISD::FCOS:
417 case ISD::FTAN:
418 case ISD::FASIN:
419 case ISD::FACOS:
420 case ISD::FATAN:
421 case ISD::FATAN2:
422 case ISD::FSINH:
423 case ISD::FCOSH:
424 case ISD::FTANH:
425 case ISD::FLDEXP:
426 case ISD::FPOWI:
427 case ISD::FPOW:
428 case ISD::FLOG:
429 case ISD::FLOG2:
430 case ISD::FLOG10:
431 case ISD::FEXP:
432 case ISD::FEXP2:
433 case ISD::FEXP10:
434 case ISD::FCEIL:
435 case ISD::FTRUNC:
436 case ISD::FRINT:
437 case ISD::FNEARBYINT:
438 case ISD::FROUND:
439 case ISD::FROUNDEVEN:
440 case ISD::FFLOOR:
441 case ISD::FP_ROUND:
442 case ISD::FP_EXTEND:
443 case ISD::FPTRUNC_ROUND:
444 case ISD::FMA:
445 case ISD::SIGN_EXTEND_INREG:
446 case ISD::ANY_EXTEND_VECTOR_INREG:
447 case ISD::SIGN_EXTEND_VECTOR_INREG:
448 case ISD::ZERO_EXTEND_VECTOR_INREG:
449 case ISD::SMIN:
450 case ISD::SMAX:
451 case ISD::UMIN:
452 case ISD::UMAX:
453 case ISD::SMUL_LOHI:
454 case ISD::UMUL_LOHI:
455 case ISD::SADDO:
456 case ISD::UADDO:
457 case ISD::SSUBO:
458 case ISD::USUBO:
459 case ISD::SMULO:
460 case ISD::UMULO:
461 case ISD::FCANONICALIZE:
462 case ISD::FFREXP:
463 case ISD::FMODF:
464 case ISD::FSINCOS:
465 case ISD::FSINCOSPI:
466 case ISD::SADDSAT:
467 case ISD::UADDSAT:
468 case ISD::SSUBSAT:
469 case ISD::USUBSAT:
470 case ISD::SSHLSAT:
471 case ISD::USHLSAT:
472 case ISD::FP_TO_SINT_SAT:
473 case ISD::FP_TO_UINT_SAT:
474 case ISD::MGATHER:
475 case ISD::VECTOR_COMPRESS:
476 case ISD::SCMP:
477 case ISD::UCMP:
478 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
479 break;
480 case ISD::SMULFIX:
481 case ISD::SMULFIXSAT:
482 case ISD::UMULFIX:
483 case ISD::UMULFIXSAT:
484 case ISD::SDIVFIX:
485 case ISD::SDIVFIXSAT:
486 case ISD::UDIVFIX:
487 case ISD::UDIVFIXSAT: {
488 unsigned Scale = Node->getConstantOperandVal(Num: 2);
489 Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(),
490 VT: Node->getValueType(ResNo: 0), Scale);
491 break;
492 }
493 case ISD::LROUND:
494 case ISD::LLROUND:
495 case ISD::LRINT:
496 case ISD::LLRINT:
497 case ISD::SINT_TO_FP:
498 case ISD::UINT_TO_FP:
499 case ISD::VECREDUCE_ADD:
500 case ISD::VECREDUCE_MUL:
501 case ISD::VECREDUCE_AND:
502 case ISD::VECREDUCE_OR:
503 case ISD::VECREDUCE_XOR:
504 case ISD::VECREDUCE_SMAX:
505 case ISD::VECREDUCE_SMIN:
506 case ISD::VECREDUCE_UMAX:
507 case ISD::VECREDUCE_UMIN:
508 case ISD::VECREDUCE_FADD:
509 case ISD::VECREDUCE_FMAX:
510 case ISD::VECREDUCE_FMAXIMUM:
511 case ISD::VECREDUCE_FMIN:
512 case ISD::VECREDUCE_FMINIMUM:
513 case ISD::VECREDUCE_FMUL:
514 case ISD::VECTOR_FIND_LAST_ACTIVE:
515 Action = TLI.getOperationAction(Op: Node->getOpcode(),
516 VT: Node->getOperand(Num: 0).getValueType());
517 break;
518 case ISD::VECREDUCE_SEQ_FADD:
519 case ISD::VECREDUCE_SEQ_FMUL:
520 Action = TLI.getOperationAction(Op: Node->getOpcode(),
521 VT: Node->getOperand(Num: 1).getValueType());
522 break;
523 case ISD::SETCC: {
524 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
525 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get();
526 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
527 if (Action == TargetLowering::Legal)
528 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
529 break;
530 }
531 case ISD::PARTIAL_REDUCE_UMLA:
532 case ISD::PARTIAL_REDUCE_SMLA:
533 case ISD::PARTIAL_REDUCE_SUMLA:
534 Action =
535 TLI.getPartialReduceMLAAction(Opc: Op.getOpcode(), AccVT: Node->getValueType(ResNo: 0),
536 InputVT: Node->getOperand(Num: 1).getValueType());
537 break;
538
539#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
540 case ISD::VPID: { \
541 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
542 : Node->getOperand(LEGALPOS).getValueType(); \
543 if (ISD::VPID == ISD::VP_SETCC) { \
544 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
545 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
546 if (Action != TargetLowering::Legal) \
547 break; \
548 } \
549 /* Defer non-vector results to LegalizeDAG. */ \
550 if (!Node->getValueType(0).isVector() && \
551 Node->getValueType(0) != MVT::Other) { \
552 Action = TargetLowering::Legal; \
553 break; \
554 } \
555 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
556 } break;
557#include "llvm/IR/VPIntrinsics.def"
558 }
559
560 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
561
562 SmallVector<SDValue, 8> ResultVals;
563 switch (Action) {
564 default: llvm_unreachable("This action is not supported yet!");
565 case TargetLowering::Promote:
566 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
567 "This action is not supported yet!");
568 LLVM_DEBUG(dbgs() << "Promoting\n");
569 Promote(Node, Results&: ResultVals);
570 assert(!ResultVals.empty() && "No results for promotion?");
571 break;
572 case TargetLowering::Legal:
573 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
574 break;
575 case TargetLowering::Custom:
576 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
577 if (LowerOperationWrapper(N: Node, Results&: ResultVals))
578 break;
579 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
580 [[fallthrough]];
581 case TargetLowering::Expand:
582 LLVM_DEBUG(dbgs() << "Expanding\n");
583 Expand(Node, Results&: ResultVals);
584 break;
585 }
586
587 if (ResultVals.empty())
588 return TranslateLegalizeResults(Op, Result: Node);
589
590 Changed = true;
591 return RecursivelyLegalizeResults(Op, Results: ResultVals);
592}
593
594// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
595// merge them somehow?
596bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
597 SmallVectorImpl<SDValue> &Results) {
598 SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG);
599
600 if (!Res.getNode())
601 return false;
602
603 if (Res == SDValue(Node, 0))
604 return true;
605
606 // If the original node has one result, take the return value from
607 // LowerOperation as is. It might not be result number 0.
608 if (Node->getNumValues() == 1) {
609 Results.push_back(Elt: Res);
610 return true;
611 }
612
613 // If the original node has multiple results, then the return node should
614 // have the same number of results.
615 assert((Node->getNumValues() == Res->getNumValues()) &&
616 "Lowering returned the wrong number of results!");
617
618 // Places new result values base on N result number.
619 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
620 Results.push_back(Elt: Res.getValue(R: I));
621
622 return true;
623}
624
625void VectorLegalizer::PromoteSETCC(SDNode *Node,
626 SmallVectorImpl<SDValue> &Results) {
627 MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType();
628 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
629
630 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
631
632 SDLoc DL(Node);
633 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
634
635 Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0));
636 Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1));
637 Operands[2] = Node->getOperand(Num: 2);
638
639 if (Node->getOpcode() == ISD::VP_SETCC) {
640 Operands[3] = Node->getOperand(Num: 3); // mask
641 Operands[4] = Node->getOperand(Num: 4); // evl
642 }
643
644 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0),
645 Ops: Operands, Flags: Node->getFlags());
646
647 Results.push_back(Elt: Res);
648}
649
650void VectorLegalizer::PromoteSTRICT(SDNode *Node,
651 SmallVectorImpl<SDValue> &Results) {
652 MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType();
653 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
654
655 assert(VecVT.isFloatingPoint());
656
657 SDLoc DL(Node);
658 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
659 SmallVector<SDValue, 2> Chains;
660
661 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
662 if (Node->getOperand(Num: j).getValueType().isVector() &&
663 !(ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
664 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand.
665 {
666 // promote the vector operand.
667 SDValue Ext =
668 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other},
669 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)});
670 Operands[j] = Ext.getValue(R: 0);
671 Chains.push_back(Elt: Ext.getValue(R: 1));
672 } else
673 Operands[j] = Node->getOperand(Num: j); // Skip no vector operand.
674
675 SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1));
676
677 Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
678
679 SDValue Res =
680 DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags());
681
682 SDValue Round =
683 DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other},
684 Ops: {Res.getValue(R: 1), Res.getValue(R: 0),
685 DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)});
686
687 Results.push_back(Elt: Round.getValue(R: 0));
688 Results.push_back(Elt: Round.getValue(R: 1));
689}
690
691void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
692 SmallVectorImpl<SDValue> &Results,
693 bool NonArithmetic) {
694 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
695 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
696 MVT NewOpVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: OpVT);
697
698 SDLoc DL(Node);
699 SDValue NewOp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewOpVT, Operand: Node->getOperand(Num: 0));
700 SDValue Rdx =
701 DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewOpVT.getVectorElementType(), Operand: NewOp,
702 Flags: Node->getFlags());
703 SDValue Res =
704 DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Node->getValueType(ResNo: 0), N1: Rdx,
705 N2: DAG.getIntPtrConstant(Val: NonArithmetic, DL, /*isTarget=*/true));
706 Results.push_back(Elt: Res);
707}
708
709void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
710 // For a few operations there is a specific concept for promotion based on
711 // the operand's type.
712 switch (Node->getOpcode()) {
713 case ISD::SINT_TO_FP:
714 case ISD::UINT_TO_FP:
715 case ISD::STRICT_SINT_TO_FP:
716 case ISD::STRICT_UINT_TO_FP:
717 // "Promote" the operation by extending the operand.
718 PromoteINT_TO_FP(Node, Results);
719 return;
720 case ISD::FP_TO_UINT:
721 case ISD::FP_TO_SINT:
722 case ISD::STRICT_FP_TO_UINT:
723 case ISD::STRICT_FP_TO_SINT:
724 // Promote the operation by extending the operand.
725 PromoteFP_TO_INT(Node, Results);
726 return;
727 case ISD::VP_SETCC:
728 case ISD::SETCC:
729 // Promote the operation by extending the operand.
730 PromoteSETCC(Node, Results);
731 return;
732 case ISD::STRICT_FADD:
733 case ISD::STRICT_FSUB:
734 case ISD::STRICT_FMUL:
735 case ISD::STRICT_FDIV:
736 case ISD::STRICT_FSQRT:
737 case ISD::STRICT_FMA:
738 PromoteSTRICT(Node, Results);
739 return;
740 case ISD::VECREDUCE_FADD:
741 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
742 return;
743 case ISD::VECREDUCE_FMAX:
744 case ISD::VECREDUCE_FMAXIMUM:
745 case ISD::VECREDUCE_FMIN:
746 case ISD::VECREDUCE_FMINIMUM:
747 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
748 return;
749 case ISD::FP_ROUND:
750 case ISD::FP_EXTEND:
751 // These operations are used to do promotion so they can't be promoted
752 // themselves.
753 llvm_unreachable("Don't know how to promote this operation!");
754 case ISD::VP_FABS:
755 case ISD::VP_FCOPYSIGN:
756 case ISD::VP_FNEG:
757 // Promoting fabs, fneg, and fcopysign changes their semantics.
758 llvm_unreachable("These operations should not be promoted");
759 }
760
761 // There are currently two cases of vector promotion:
762 // 1) Bitcasting a vector of integers to a different type to a vector of the
763 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
764 // 2) Extending a vector of floats to a vector of the same number of larger
765 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
766 assert(Node->getNumValues() == 1 &&
767 "Can't promote a vector with multiple results!");
768 MVT VT = Node->getSimpleValueType(ResNo: 0);
769 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
770 SDLoc dl(Node);
771 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
772
773 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
774 // Do not promote the mask operand of a VP OP.
775 bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
776 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j;
777 if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote)
778 if (Node->getOperand(Num: j)
779 .getValueType()
780 .getVectorElementType()
781 .isFloatingPoint() &&
782 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
783 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
784 unsigned EVLIdx =
785 *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
786 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
787 Operands[j] =
788 DAG.getNode(Opcode: ISD::VP_FP_EXTEND, DL: dl, VT: NVT, N1: Node->getOperand(Num: j),
789 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
790 } else {
791 Operands[j] =
792 DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
793 }
794 else
795 Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
796 else
797 Operands[j] = Node->getOperand(Num: j);
798 }
799
800 SDValue Res =
801 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags());
802
803 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
804 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
805 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
806 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
807 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
808 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
809 Res = DAG.getNode(Opcode: ISD::VP_FP_ROUND, DL: dl, VT, N1: Res,
810 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
811 } else {
812 Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res,
813 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
814 }
815 else
816 Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res);
817
818 Results.push_back(Elt: Res);
819}
820
821void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
822 SmallVectorImpl<SDValue> &Results) {
823 // INT_TO_FP operations may require the input operand be promoted even
824 // when the type is otherwise legal.
825 bool IsStrict = Node->isStrictFPOpcode();
826 MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType();
827 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
828 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
829 "Vectors have different number of elements!");
830
831 SDLoc dl(Node);
832 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
833
834 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
835 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
836 ? ISD::ZERO_EXTEND
837 : ISD::SIGN_EXTEND;
838 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
839 if (Node->getOperand(Num: j).getValueType().isVector())
840 Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
841 else
842 Operands[j] = Node->getOperand(Num: j);
843 }
844
845 if (IsStrict) {
846 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl,
847 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands);
848 Results.push_back(Elt: Res);
849 Results.push_back(Elt: Res.getValue(R: 1));
850 return;
851 }
852
853 SDValue Res =
854 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands);
855 Results.push_back(Elt: Res);
856}
857
858// For FP_TO_INT we promote the result type to a vector type with wider
859// elements and then truncate the result. This is different from the default
860// PromoteVector which uses bitcast to promote thus assumning that the
861// promoted vector type has the same overall size.
862void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
863 SmallVectorImpl<SDValue> &Results) {
864 MVT VT = Node->getSimpleValueType(ResNo: 0);
865 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
866 bool IsStrict = Node->isStrictFPOpcode();
867 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
868 "Vectors have different number of elements!");
869
870 unsigned NewOpc = Node->getOpcode();
871 // Change FP_TO_UINT to FP_TO_SINT if possible.
872 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
873 if (NewOpc == ISD::FP_TO_UINT &&
874 TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT))
875 NewOpc = ISD::FP_TO_SINT;
876
877 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
878 TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT))
879 NewOpc = ISD::STRICT_FP_TO_SINT;
880
881 SDLoc dl(Node);
882 SDValue Promoted, Chain;
883 if (IsStrict) {
884 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other},
885 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)});
886 Chain = Promoted.getValue(R: 1);
887 } else
888 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0));
889
890 // Assert that the converted value fits in the original type. If it doesn't
891 // (eg: because the value being converted is too big), then the result of the
892 // original operation was undefined anyway, so the assert is still correct.
893 if (Node->getOpcode() == ISD::FP_TO_UINT ||
894 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
895 NewOpc = ISD::AssertZext;
896 else
897 NewOpc = ISD::AssertSext;
898
899 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted,
900 N2: DAG.getValueType(VT.getScalarType()));
901 Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted);
902 Results.push_back(Elt: Promoted);
903 if (IsStrict)
904 Results.push_back(Elt: Chain);
905}
906
907std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
908 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
909 return TLI.scalarizeVectorLoad(LD, DAG);
910}
911
912SDValue VectorLegalizer::ExpandStore(SDNode *N) {
913 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
914 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
915 return TF;
916}
917
918void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
919 switch (Node->getOpcode()) {
920 case ISD::LOAD: {
921 std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node);
922 Results.push_back(Elt: Tmp.first);
923 Results.push_back(Elt: Tmp.second);
924 return;
925 }
926 case ISD::STORE:
927 Results.push_back(Elt: ExpandStore(N: Node));
928 return;
929 case ISD::MERGE_VALUES:
930 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
931 Results.push_back(Elt: Node->getOperand(Num: i));
932 return;
933 case ISD::SIGN_EXTEND_INREG:
934 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
935 Results.push_back(Elt: Expanded);
936 return;
937 }
938 break;
939 case ISD::ANY_EXTEND_VECTOR_INREG:
940 Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node));
941 return;
942 case ISD::SIGN_EXTEND_VECTOR_INREG:
943 Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node));
944 return;
945 case ISD::ZERO_EXTEND_VECTOR_INREG:
946 Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node));
947 return;
948 case ISD::BSWAP:
949 if (SDValue Expanded = ExpandBSWAP(Node)) {
950 Results.push_back(Elt: Expanded);
951 return;
952 }
953 break;
954 case ISD::VP_BSWAP:
955 Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG));
956 return;
957 case ISD::VSELECT:
958 if (SDValue Expanded = ExpandVSELECT(Node)) {
959 Results.push_back(Elt: Expanded);
960 return;
961 }
962 break;
963 case ISD::VP_SELECT:
964 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
965 Results.push_back(Elt: Expanded);
966 return;
967 }
968 break;
969 case ISD::VP_SREM:
970 case ISD::VP_UREM:
971 if (SDValue Expanded = ExpandVP_REM(Node)) {
972 Results.push_back(Elt: Expanded);
973 return;
974 }
975 break;
976 case ISD::VP_FNEG:
977 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
978 Results.push_back(Elt: Expanded);
979 return;
980 }
981 break;
982 case ISD::VP_FABS:
983 if (SDValue Expanded = ExpandVP_FABS(Node)) {
984 Results.push_back(Elt: Expanded);
985 return;
986 }
987 break;
988 case ISD::VP_FCOPYSIGN:
989 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
990 Results.push_back(Elt: Expanded);
991 return;
992 }
993 break;
994 case ISD::SELECT:
995 if (SDValue Expanded = ExpandSELECT(Node)) {
996 Results.push_back(Elt: Expanded);
997 return;
998 }
999 break;
1000 case ISD::SELECT_CC: {
1001 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1002 EVT CondVT = TLI.getSetCCResultType(
1003 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
1004 SDValue SetCC =
1005 DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0),
1006 N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4));
1007 Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC,
1008 LHS: Node->getOperand(Num: 2),
1009 RHS: Node->getOperand(Num: 3)));
1010 return;
1011 }
1012 break;
1013 }
1014 case ISD::FP_TO_UINT:
1015 ExpandFP_TO_UINT(Node, Results);
1016 return;
1017 case ISD::UINT_TO_FP:
1018 ExpandUINT_TO_FLOAT(Node, Results);
1019 return;
1020 case ISD::FNEG:
1021 if (SDValue Expanded = ExpandFNEG(Node)) {
1022 Results.push_back(Elt: Expanded);
1023 return;
1024 }
1025 break;
1026 case ISD::FABS:
1027 if (SDValue Expanded = ExpandFABS(Node)) {
1028 Results.push_back(Elt: Expanded);
1029 return;
1030 }
1031 break;
1032 case ISD::FCOPYSIGN:
1033 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1034 Results.push_back(Elt: Expanded);
1035 return;
1036 }
1037 break;
1038 case ISD::FSUB:
1039 ExpandFSUB(Node, Results);
1040 return;
1041 case ISD::SETCC:
1042 case ISD::VP_SETCC:
1043 ExpandSETCC(Node, Results);
1044 return;
1045 case ISD::ABS:
1046 if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) {
1047 Results.push_back(Elt: Expanded);
1048 return;
1049 }
1050 break;
1051 case ISD::ABDS:
1052 case ISD::ABDU:
1053 if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) {
1054 Results.push_back(Elt: Expanded);
1055 return;
1056 }
1057 break;
1058 case ISD::AVGCEILS:
1059 case ISD::AVGCEILU:
1060 case ISD::AVGFLOORS:
1061 case ISD::AVGFLOORU:
1062 if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) {
1063 Results.push_back(Elt: Expanded);
1064 return;
1065 }
1066 break;
1067 case ISD::BITREVERSE:
1068 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1069 Results.push_back(Elt: Expanded);
1070 return;
1071 }
1072 break;
1073 case ISD::VP_BITREVERSE:
1074 if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) {
1075 Results.push_back(Elt: Expanded);
1076 return;
1077 }
1078 break;
1079 case ISD::CTPOP:
1080 if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) {
1081 Results.push_back(Elt: Expanded);
1082 return;
1083 }
1084 break;
1085 case ISD::VP_CTPOP:
1086 if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) {
1087 Results.push_back(Elt: Expanded);
1088 return;
1089 }
1090 break;
1091 case ISD::CTLZ:
1092 case ISD::CTLZ_ZERO_UNDEF:
1093 if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) {
1094 Results.push_back(Elt: Expanded);
1095 return;
1096 }
1097 break;
1098 case ISD::VP_CTLZ:
1099 case ISD::VP_CTLZ_ZERO_UNDEF:
1100 if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) {
1101 Results.push_back(Elt: Expanded);
1102 return;
1103 }
1104 break;
1105 case ISD::CTTZ:
1106 case ISD::CTTZ_ZERO_UNDEF:
1107 if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) {
1108 Results.push_back(Elt: Expanded);
1109 return;
1110 }
1111 break;
1112 case ISD::VP_CTTZ:
1113 case ISD::VP_CTTZ_ZERO_UNDEF:
1114 if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) {
1115 Results.push_back(Elt: Expanded);
1116 return;
1117 }
1118 break;
1119 case ISD::FSHL:
1120 case ISD::VP_FSHL:
1121 case ISD::FSHR:
1122 case ISD::VP_FSHR:
1123 if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) {
1124 Results.push_back(Elt: Expanded);
1125 return;
1126 }
1127 break;
1128 case ISD::ROTL:
1129 case ISD::ROTR:
1130 if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) {
1131 Results.push_back(Elt: Expanded);
1132 return;
1133 }
1134 break;
1135 case ISD::FMINNUM:
1136 case ISD::FMAXNUM:
1137 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) {
1138 Results.push_back(Elt: Expanded);
1139 return;
1140 }
1141 break;
1142 case ISD::FMINIMUM:
1143 case ISD::FMAXIMUM:
1144 Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG));
1145 return;
1146 case ISD::FMINIMUMNUM:
1147 case ISD::FMAXIMUMNUM:
1148 Results.push_back(Elt: TLI.expandFMINIMUMNUM_FMAXIMUMNUM(N: Node, DAG));
1149 return;
1150 case ISD::SMIN:
1151 case ISD::SMAX:
1152 case ISD::UMIN:
1153 case ISD::UMAX:
1154 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1155 Results.push_back(Elt: Expanded);
1156 return;
1157 }
1158 break;
1159 case ISD::UADDO:
1160 case ISD::USUBO:
1161 ExpandUADDSUBO(Node, Results);
1162 return;
1163 case ISD::SADDO:
1164 case ISD::SSUBO:
1165 ExpandSADDSUBO(Node, Results);
1166 return;
1167 case ISD::UMULO:
1168 case ISD::SMULO:
1169 ExpandMULO(Node, Results);
1170 return;
1171 case ISD::USUBSAT:
1172 case ISD::SSUBSAT:
1173 case ISD::UADDSAT:
1174 case ISD::SADDSAT:
1175 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1176 Results.push_back(Elt: Expanded);
1177 return;
1178 }
1179 break;
1180 case ISD::USHLSAT:
1181 case ISD::SSHLSAT:
1182 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1183 Results.push_back(Elt: Expanded);
1184 return;
1185 }
1186 break;
1187 case ISD::FP_TO_SINT_SAT:
1188 case ISD::FP_TO_UINT_SAT:
1189 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1190 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1191 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) {
1192 Results.push_back(Elt: Expanded);
1193 return;
1194 }
1195 }
1196 break;
1197 case ISD::SMULFIX:
1198 case ISD::UMULFIX:
1199 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1200 Results.push_back(Elt: Expanded);
1201 return;
1202 }
1203 break;
1204 case ISD::SMULFIXSAT:
1205 case ISD::UMULFIXSAT:
1206 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1207 // why. Maybe it results in worse codegen compared to the unroll for some
1208 // targets? This should probably be investigated. And if we still prefer to
1209 // unroll an explanation could be helpful.
1210 break;
1211 case ISD::SDIVFIX:
1212 case ISD::UDIVFIX:
1213 ExpandFixedPointDiv(Node, Results);
1214 return;
1215 case ISD::SDIVFIXSAT:
1216 case ISD::UDIVFIXSAT:
1217 break;
1218#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1219 case ISD::STRICT_##DAGN:
1220#include "llvm/IR/ConstrainedOps.def"
1221 ExpandStrictFPOp(Node, Results);
1222 return;
1223 case ISD::VECREDUCE_ADD:
1224 case ISD::VECREDUCE_MUL:
1225 case ISD::VECREDUCE_AND:
1226 case ISD::VECREDUCE_OR:
1227 case ISD::VECREDUCE_XOR:
1228 case ISD::VECREDUCE_SMAX:
1229 case ISD::VECREDUCE_SMIN:
1230 case ISD::VECREDUCE_UMAX:
1231 case ISD::VECREDUCE_UMIN:
1232 case ISD::VECREDUCE_FADD:
1233 case ISD::VECREDUCE_FMUL:
1234 case ISD::VECREDUCE_FMAX:
1235 case ISD::VECREDUCE_FMIN:
1236 case ISD::VECREDUCE_FMAXIMUM:
1237 case ISD::VECREDUCE_FMINIMUM:
1238 Results.push_back(Elt: TLI.expandVecReduce(Node, DAG));
1239 return;
1240 case ISD::PARTIAL_REDUCE_UMLA:
1241 case ISD::PARTIAL_REDUCE_SMLA:
1242 case ISD::PARTIAL_REDUCE_SUMLA:
1243 Results.push_back(Elt: TLI.expandPartialReduceMLA(Node, DAG));
1244 return;
1245 case ISD::VECREDUCE_SEQ_FADD:
1246 case ISD::VECREDUCE_SEQ_FMUL:
1247 Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG));
1248 return;
1249 case ISD::SREM:
1250 case ISD::UREM:
1251 ExpandREM(Node, Results);
1252 return;
1253 case ISD::VP_MERGE:
1254 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1255 Results.push_back(Elt: Expanded);
1256 return;
1257 }
1258 break;
1259 case ISD::FREM:
1260 if (tryExpandVecMathCall(Node, Call_F32: RTLIB::REM_F32, Call_F64: RTLIB::REM_F64,
1261 Call_F80: RTLIB::REM_F80, Call_F128: RTLIB::REM_F128,
1262 Call_PPCF128: RTLIB::REM_PPCF128, Results))
1263 return;
1264
1265 break;
1266 case ISD::FSINCOS:
1267 case ISD::FSINCOSPI: {
1268 EVT VT = Node->getValueType(ResNo: 0).getVectorElementType();
1269 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1270 ? RTLIB::getSINCOS(RetVT: VT)
1271 : RTLIB::getSINCOSPI(RetVT: VT);
1272 if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
1273 return;
1274 break;
1275 }
1276 case ISD::FMODF: {
1277 RTLIB::Libcall LC =
1278 RTLIB::getMODF(RetVT: Node->getValueType(ResNo: 0).getVectorElementType());
1279 if (DAG.expandMultipleResultFPLibCall(LC, Node, Results,
1280 /*CallRetResNo=*/0))
1281 return;
1282 break;
1283 }
1284 case ISD::VECTOR_COMPRESS:
1285 Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG));
1286 return;
1287 case ISD::VECTOR_FIND_LAST_ACTIVE:
1288 Results.push_back(Elt: TLI.expandVectorFindLastActive(N: Node, DAG));
1289 return;
1290 case ISD::SCMP:
1291 case ISD::UCMP:
1292 Results.push_back(Elt: TLI.expandCMP(Node, DAG));
1293 return;
1294
1295 case ISD::FADD:
1296 case ISD::FMUL:
1297 case ISD::FMA:
1298 case ISD::FDIV:
1299 case ISD::FCEIL:
1300 case ISD::FFLOOR:
1301 case ISD::FNEARBYINT:
1302 case ISD::FRINT:
1303 case ISD::FROUND:
1304 case ISD::FROUNDEVEN:
1305 case ISD::FTRUNC:
1306 case ISD::FSQRT:
1307 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1308 Results.push_back(Elt: Expanded);
1309 return;
1310 }
1311 break;
1312 }
1313
1314 SDValue Unrolled = DAG.UnrollVectorOp(N: Node);
1315 if (Node->getNumValues() == 1) {
1316 Results.push_back(Elt: Unrolled);
1317 } else {
1318 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1319 "VectorLegalizer Expand returned wrong number of results!");
1320 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1321 Results.push_back(Elt: Unrolled.getValue(R: I));
1322 }
1323}
1324
1325SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1326 // Lower a select instruction where the condition is a scalar and the
1327 // operands are vectors. Lower this select to VSELECT and implement it
1328 // using XOR AND OR. The selector bit is broadcasted.
1329 EVT VT = Node->getValueType(ResNo: 0);
1330 SDLoc DL(Node);
1331
1332 SDValue Mask = Node->getOperand(Num: 0);
1333 SDValue Op1 = Node->getOperand(Num: 1);
1334 SDValue Op2 = Node->getOperand(Num: 2);
1335
1336 assert(VT.isVector() && !Mask.getValueType().isVector()
1337 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1338
1339 // If we can't even use the basic vector operations of
1340 // AND,OR,XOR, we will have to scalarize the op.
1341 // Notice that the operation may be 'promoted' which means that it is
1342 // 'bitcasted' to another type which is handled.
1343 // Also, we need to be able to construct a splat vector using either
1344 // BUILD_VECTOR or SPLAT_VECTOR.
1345 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1346 // BUILD_VECTOR?
1347 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1348 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1349 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand ||
1350 TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1351 : ISD::SPLAT_VECTOR,
1352 VT) == TargetLowering::Expand)
1353 return SDValue();
1354
1355 // Generate a mask operand.
1356 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1357
1358 // What is the size of each element in the vector mask.
1359 EVT BitTy = MaskTy.getScalarType();
1360
1361 Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy),
1362 RHS: DAG.getConstant(Val: 0, DL, VT: BitTy));
1363
1364 // Broadcast the mask so that the entire vector is all one or all zero.
1365 Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask);
1366
1367 // Bitcast the operands to be the same type as the mask.
1368 // This is needed when we select between FP types because
1369 // the mask is a vector of integers.
1370 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1);
1371 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2);
1372
1373 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy);
1374
1375 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask);
1376 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask);
1377 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2);
1378 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1379}
1380
1381SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1382 EVT VT = Node->getValueType(ResNo: 0);
1383
1384 // Make sure that the SRA and SHL instructions are available.
1385 if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand ||
1386 TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand)
1387 return SDValue();
1388
1389 SDLoc DL(Node);
1390 EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
1391
1392 unsigned BW = VT.getScalarSizeInBits();
1393 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1394 SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT);
1395
1396 SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz);
1397 return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz);
1398}
1399
1400// Generically expand a vector anyext in register to a shuffle of the relevant
1401// lanes into the appropriate locations, with other lanes left undef.
1402SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1403 SDLoc DL(Node);
1404 EVT VT = Node->getValueType(ResNo: 0);
1405 int NumElements = VT.getVectorNumElements();
1406 SDValue Src = Node->getOperand(Num: 0);
1407 EVT SrcVT = Src.getValueType();
1408 int NumSrcElements = SrcVT.getVectorNumElements();
1409
1410 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1411 // into a larger vector type.
1412 if (SrcVT.bitsLE(VT)) {
1413 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1414 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1415 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1416 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1417 NumElements: NumSrcElements);
1418 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1419 }
1420
1421 // Build a base mask of undef shuffles.
1422 SmallVector<int, 16> ShuffleMask;
1423 ShuffleMask.resize(N: NumSrcElements, NV: -1);
1424
1425 // Place the extended lanes into the correct locations.
1426 int ExtLaneScale = NumSrcElements / NumElements;
1427 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1428 for (int i = 0; i < NumElements; ++i)
1429 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1430
1431 return DAG.getNode(
1432 Opcode: ISD::BITCAST, DL, VT,
1433 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getUNDEF(VT: SrcVT), Mask: ShuffleMask));
1434}
1435
1436SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1437 SDLoc DL(Node);
1438 EVT VT = Node->getValueType(ResNo: 0);
1439 SDValue Src = Node->getOperand(Num: 0);
1440 EVT SrcVT = Src.getValueType();
1441
1442 // First build an any-extend node which can be legalized above when we
1443 // recurse through it.
1444 SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src);
1445
1446 // Now we need sign extend. Do this by shifting the elements. Even if these
1447 // aren't legal operations, they have a better chance of being legalized
1448 // without full scalarization than the sign extension does.
1449 unsigned EltWidth = VT.getScalarSizeInBits();
1450 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1451 SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT);
1452 return DAG.getNode(Opcode: ISD::SRA, DL, VT,
1453 N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount),
1454 N2: ShiftAmount);
1455}
1456
1457// Generically expand a vector zext in register to a shuffle of the relevant
1458// lanes into the appropriate locations, a blend of zero into the high bits,
1459// and a bitcast to the wider element type.
1460SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1461 SDLoc DL(Node);
1462 EVT VT = Node->getValueType(ResNo: 0);
1463 int NumElements = VT.getVectorNumElements();
1464 SDValue Src = Node->getOperand(Num: 0);
1465 EVT SrcVT = Src.getValueType();
1466 int NumSrcElements = SrcVT.getVectorNumElements();
1467
1468 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1469 // into a larger vector type.
1470 if (SrcVT.bitsLE(VT)) {
1471 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1472 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1473 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1474 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1475 NumElements: NumSrcElements);
1476 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1477 }
1478
1479 // Build up a zero vector to blend into this one.
1480 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
1481
1482 // Shuffle the incoming lanes into the correct position, and pull all other
1483 // lanes from the zero vector.
1484 auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements));
1485
1486 int ExtLaneScale = NumSrcElements / NumElements;
1487 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1488 for (int i = 0; i < NumElements; ++i)
1489 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1490
1491 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
1492 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask));
1493}
1494
1495static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1496 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1497 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1498 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1499 ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J);
1500}
1501
1502SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1503 EVT VT = Node->getValueType(ResNo: 0);
1504
1505 // Scalable vectors can't use shuffle expansion.
1506 if (VT.isScalableVector())
1507 return TLI.expandBSWAP(N: Node, DAG);
1508
1509 // Generate a byte wise shuffle mask for the BSWAP.
1510 SmallVector<int, 16> ShuffleMask;
1511 createBSWAPShuffleMask(VT, ShuffleMask);
1512 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size());
1513
1514 // Only emit a shuffle if the mask is legal.
1515 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1516 SDLoc DL(Node);
1517 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1518 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), Mask: ShuffleMask);
1519 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1520 }
1521
1522 // If we have the appropriate vector bit operations, it is better to use them
1523 // than unrolling and expanding each component.
1524 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1525 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1526 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1527 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1528 return TLI.expandBSWAP(N: Node, DAG);
1529
1530 // Otherwise let the caller unroll.
1531 return SDValue();
1532}
1533
1534SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1535 EVT VT = Node->getValueType(ResNo: 0);
1536
1537 // We can't unroll or use shuffles for scalable vectors.
1538 if (VT.isScalableVector())
1539 return TLI.expandBITREVERSE(N: Node, DAG);
1540
1541 // If we have the scalar operation, it's probably cheaper to unroll it.
1542 if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType()))
1543 return SDValue();
1544
1545 // If the vector element width is a whole number of bytes, test if its legal
1546 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1547 // vector. This greatly reduces the number of bit shifts necessary.
1548 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1549 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1550 SmallVector<int, 16> BSWAPMask;
1551 createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask);
1552
1553 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size());
1554 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1555 (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) ||
1556 (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) &&
1557 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) &&
1558 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) &&
1559 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) {
1560 SDLoc DL(Node);
1561 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1562 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT),
1563 Mask: BSWAPMask);
1564 Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op);
1565 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1566 return Op;
1567 }
1568 }
1569
1570 // If we have the appropriate vector bit operations, it is better to use them
1571 // than unrolling and expanding each component.
1572 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1573 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1574 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1575 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1576 return TLI.expandBITREVERSE(N: Node, DAG);
1577
1578 // Otherwise unroll.
1579 return SDValue();
1580}
1581
1582SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1583 // Implement VSELECT in terms of XOR, AND, OR
1584 // on platforms which do not support blend natively.
1585 SDLoc DL(Node);
1586
1587 SDValue Mask = Node->getOperand(Num: 0);
1588 SDValue Op1 = Node->getOperand(Num: 1);
1589 SDValue Op2 = Node->getOperand(Num: 2);
1590
1591 EVT VT = Mask.getValueType();
1592
1593 // If we can't even use the basic vector operations of
1594 // AND,OR,XOR, we will have to scalarize the op.
1595 // Notice that the operation may be 'promoted' which means that it is
1596 // 'bitcasted' to another type which is handled.
1597 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1598 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1599 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand)
1600 return SDValue();
1601
1602 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1603 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1604 // vector constant to mask with.
1605 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1606 auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType());
1607 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1608 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1609 Op1.getValueType().getVectorElementType() == MVT::i1))
1610 return SDValue();
1611
1612 // If the mask and the type are different sizes, unroll the vector op. This
1613 // can occur when getSetCCResultType returns something that is different in
1614 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1615 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1616 return SDValue();
1617
1618 // Bitcast the operands to be the same type as the mask.
1619 // This is needed when we select between FP types because
1620 // the mask is a vector of integers.
1621 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1);
1622 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2);
1623
1624 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT);
1625
1626 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask);
1627 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask);
1628 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2);
1629 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1630}
1631
1632SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1633 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1634 // do not support it natively.
1635 SDLoc DL(Node);
1636
1637 SDValue Mask = Node->getOperand(Num: 0);
1638 SDValue Op1 = Node->getOperand(Num: 1);
1639 SDValue Op2 = Node->getOperand(Num: 2);
1640 SDValue EVL = Node->getOperand(Num: 3);
1641
1642 EVT VT = Mask.getValueType();
1643
1644 // If we can't even use the basic vector operations of
1645 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1646 if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand ||
1647 TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand ||
1648 TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand)
1649 return SDValue();
1650
1651 // This operation also isn't safe when the operands aren't also booleans.
1652 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1653 return SDValue();
1654
1655 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1656 SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL);
1657
1658 Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL);
1659 Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL);
1660 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL);
1661}
1662
1663SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1664 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1665 // indices less than the EVL/pivot are true. Combine that with the original
1666 // mask for a full-length mask. Use a full-length VSELECT to select between
1667 // the true and false values.
1668 SDLoc DL(Node);
1669
1670 SDValue Mask = Node->getOperand(Num: 0);
1671 SDValue Op1 = Node->getOperand(Num: 1);
1672 SDValue Op2 = Node->getOperand(Num: 2);
1673 SDValue EVL = Node->getOperand(Num: 3);
1674
1675 EVT MaskVT = Mask.getValueType();
1676 bool IsFixedLen = MaskVT.isFixedLengthVector();
1677
1678 EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(),
1679 EC: MaskVT.getVectorElementCount());
1680
1681 // If we can't construct the EVL mask efficiently, it's better to unroll.
1682 if ((IsFixedLen &&
1683 !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) ||
1684 (!IsFixedLen &&
1685 (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) ||
1686 !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT))))
1687 return SDValue();
1688
1689 // If using a SETCC would result in a different type than the mask type,
1690 // unroll.
1691 if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
1692 VT: EVLVecVT) != MaskVT)
1693 return SDValue();
1694
1695 SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT);
1696 SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL);
1697 SDValue EVLMask =
1698 DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT);
1699
1700 SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask);
1701 return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2);
1702}
1703
1704SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1705 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1706 EVT VT = Node->getValueType(ResNo: 0);
1707
1708 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1709
1710 if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) ||
1711 !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) ||
1712 !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT))
1713 return SDValue();
1714
1715 SDLoc DL(Node);
1716
1717 SDValue Dividend = Node->getOperand(Num: 0);
1718 SDValue Divisor = Node->getOperand(Num: 1);
1719 SDValue Mask = Node->getOperand(Num: 2);
1720 SDValue EVL = Node->getOperand(Num: 3);
1721
1722 // X % Y -> X-X/Y*Y
1723 SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL);
1724 SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL);
1725 return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL);
1726}
1727
1728SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1729 EVT VT = Node->getValueType(ResNo: 0);
1730 EVT IntVT = VT.changeVectorElementTypeToInteger();
1731
1732 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1733 return SDValue();
1734
1735 SDValue Mask = Node->getOperand(Num: 1);
1736 SDValue EVL = Node->getOperand(Num: 2);
1737
1738 SDLoc DL(Node);
1739 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1740 SDValue SignMask = DAG.getConstant(
1741 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1742 SDValue Xor = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: IntVT, N1: Cast, N2: SignMask, N3: Mask, N4: EVL);
1743 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
1744}
1745
1746SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1747 EVT VT = Node->getValueType(ResNo: 0);
1748 EVT IntVT = VT.changeVectorElementTypeToInteger();
1749
1750 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT))
1751 return SDValue();
1752
1753 SDValue Mask = Node->getOperand(Num: 1);
1754 SDValue EVL = Node->getOperand(Num: 2);
1755
1756 SDLoc DL(Node);
1757 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1758 SDValue ClearSignMask = DAG.getConstant(
1759 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1760 SDValue ClearSign =
1761 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask, N3: Mask, N4: EVL);
1762 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearSign);
1763}
1764
1765SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1766 EVT VT = Node->getValueType(ResNo: 0);
1767
1768 if (VT != Node->getOperand(Num: 1).getValueType())
1769 return SDValue();
1770
1771 EVT IntVT = VT.changeVectorElementTypeToInteger();
1772 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT) ||
1773 !TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1774 return SDValue();
1775
1776 SDValue Mask = Node->getOperand(Num: 2);
1777 SDValue EVL = Node->getOperand(Num: 3);
1778
1779 SDLoc DL(Node);
1780 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1781 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
1782
1783 SDValue SignMask = DAG.getConstant(
1784 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1785 SDValue SignBit =
1786 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Sign, N2: SignMask, N3: Mask, N4: EVL);
1787
1788 SDValue ClearSignMask = DAG.getConstant(
1789 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1790 SDValue ClearedSign =
1791 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask, N3: Mask, N4: EVL);
1792
1793 SDValue CopiedSign = DAG.getNode(Opcode: ISD::VP_OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
1794 N3: Mask, N4: EVL, Flags: SDNodeFlags::Disjoint);
1795
1796 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
1797}
1798
1799void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1800 SmallVectorImpl<SDValue> &Results) {
1801 // Attempt to expand using TargetLowering.
1802 SDValue Result, Chain;
1803 if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) {
1804 Results.push_back(Elt: Result);
1805 if (Node->isStrictFPOpcode())
1806 Results.push_back(Elt: Chain);
1807 return;
1808 }
1809
1810 // Otherwise go ahead and unroll.
1811 if (Node->isStrictFPOpcode()) {
1812 UnrollStrictFPOp(Node, Results);
1813 return;
1814 }
1815
1816 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1817}
1818
1819void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1820 SmallVectorImpl<SDValue> &Results) {
1821 bool IsStrict = Node->isStrictFPOpcode();
1822 unsigned OpNo = IsStrict ? 1 : 0;
1823 SDValue Src = Node->getOperand(Num: OpNo);
1824 EVT SrcVT = Src.getValueType();
1825 EVT DstVT = Node->getValueType(ResNo: 0);
1826 SDLoc DL(Node);
1827
1828 // Attempt to expand using TargetLowering.
1829 SDValue Result;
1830 SDValue Chain;
1831 if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) {
1832 Results.push_back(Elt: Result);
1833 if (IsStrict)
1834 Results.push_back(Elt: Chain);
1835 return;
1836 }
1837
1838 // Make sure that the SINT_TO_FP and SRL instructions are available.
1839 if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT: SrcVT) ==
1840 TargetLowering::Expand) ||
1841 (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: SrcVT) ==
1842 TargetLowering::Expand)) ||
1843 TLI.getOperationAction(Op: ISD::SRL, VT: SrcVT) == TargetLowering::Expand) {
1844 if (IsStrict) {
1845 UnrollStrictFPOp(Node, Results);
1846 return;
1847 }
1848
1849 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1850 return;
1851 }
1852
1853 unsigned BW = SrcVT.getScalarSizeInBits();
1854 assert((BW == 64 || BW == 32) &&
1855 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1856
1857 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1858 // UINT_TO_FP with a larger float and round to the smaller type
1859 if ((!IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT: DstVT)) ||
1860 (IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FMUL, VT: DstVT))) {
1861 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1862 SDValue UIToFP;
1863 SDValue Result;
1864 SDValue TargetZero = DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true);
1865 EVT FloatVecVT = SrcVT.changeVectorElementType(EltVT: FPVT);
1866 if (IsStrict) {
1867 UIToFP = DAG.getNode(Opcode: ISD::STRICT_UINT_TO_FP, DL, ResultTys: {FloatVecVT, MVT::Other},
1868 Ops: {Node->getOperand(Num: 0), Src});
1869 Result = DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {DstVT, MVT::Other},
1870 Ops: {Node->getOperand(Num: 0), UIToFP, TargetZero});
1871 Results.push_back(Elt: Result);
1872 Results.push_back(Elt: Result.getValue(R: 1));
1873 } else {
1874 UIToFP = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVecVT, Operand: Src);
1875 Result = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: DstVT, N1: UIToFP, N2: TargetZero);
1876 Results.push_back(Elt: Result);
1877 }
1878
1879 return;
1880 }
1881
1882 SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT: SrcVT);
1883
1884 // Constants to clear the upper part of the word.
1885 // Notice that we can also use SHL+SHR, but using a constant is slightly
1886 // faster on x86.
1887 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1888 SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT: SrcVT);
1889
1890 // Two to the power of half-word-size.
1891 SDValue TWOHW = DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: DstVT);
1892
1893 // Clear upper part of LO, lower HI
1894 SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT: SrcVT, N1: Src, N2: HalfWord);
1895 SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src, N2: HalfWordMask);
1896
1897 if (IsStrict) {
1898 // Convert hi and lo to floats
1899 // Convert the hi part back to the upper values
1900 // TODO: Can any fast-math-flags be set on these nodes?
1901 SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
1902 Ops: {Node->getOperand(Num: 0), HI});
1903 fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {DstVT, MVT::Other},
1904 Ops: {fHI.getValue(R: 1), fHI, TWOHW});
1905 SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
1906 Ops: {Node->getOperand(Num: 0), LO});
1907
1908 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1),
1909 N2: fLO.getValue(R: 1));
1910
1911 // Add the two halves
1912 SDValue Result =
1913 DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {DstVT, MVT::Other}, Ops: {TF, fHI, fLO});
1914
1915 Results.push_back(Elt: Result);
1916 Results.push_back(Elt: Result.getValue(R: 1));
1917 return;
1918 }
1919
1920 // Convert hi and lo to floats
1921 // Convert the hi part back to the upper values
1922 // TODO: Can any fast-math-flags be set on these nodes?
1923 SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: HI);
1924 fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: DstVT, N1: fHI, N2: TWOHW);
1925 SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: LO);
1926
1927 // Add the two halves
1928 Results.push_back(Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: DstVT, N1: fHI, N2: fLO));
1929}
1930
1931SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1932 EVT VT = Node->getValueType(ResNo: 0);
1933 EVT IntVT = VT.changeVectorElementTypeToInteger();
1934
1935 if (!TLI.isOperationLegalOrCustom(Op: ISD::XOR, VT: IntVT))
1936 return SDValue();
1937
1938 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1939 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
1940 !VT.isScalableVector())
1941 return SDValue();
1942
1943 SDLoc DL(Node);
1944 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1945 SDValue SignMask = DAG.getConstant(
1946 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1947 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT: IntVT, N1: Cast, N2: SignMask);
1948 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
1949}
1950
1951SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
1952 EVT VT = Node->getValueType(ResNo: 0);
1953 EVT IntVT = VT.changeVectorElementTypeToInteger();
1954
1955 if (!TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT))
1956 return SDValue();
1957
1958 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1959 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
1960 !VT.isScalableVector())
1961 return SDValue();
1962
1963 SDLoc DL(Node);
1964 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1965 SDValue ClearSignMask = DAG.getConstant(
1966 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1967 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask);
1968 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearedSign);
1969}
1970
1971SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
1972 EVT VT = Node->getValueType(ResNo: 0);
1973 EVT IntVT = VT.changeVectorElementTypeToInteger();
1974
1975 if (VT != Node->getOperand(Num: 1).getValueType() ||
1976 !TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT) ||
1977 !TLI.isOperationLegalOrCustom(Op: ISD::OR, VT: IntVT))
1978 return SDValue();
1979
1980 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1981 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
1982 !VT.isScalableVector())
1983 return SDValue();
1984
1985 SDLoc DL(Node);
1986 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1987 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
1988
1989 SDValue SignMask = DAG.getConstant(
1990 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1991 SDValue SignBit = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Sign, N2: SignMask);
1992
1993 SDValue ClearSignMask = DAG.getConstant(
1994 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1995 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask);
1996
1997 SDValue CopiedSign = DAG.getNode(Opcode: ISD::OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
1998 Flags: SDNodeFlags::Disjoint);
1999
2000 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
2001}
2002
2003void VectorLegalizer::ExpandFSUB(SDNode *Node,
2004 SmallVectorImpl<SDValue> &Results) {
2005 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2006 // we can defer this to operation legalization where it will be lowered as
2007 // a+(-b).
2008 EVT VT = Node->getValueType(ResNo: 0);
2009 if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) &&
2010 TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT))
2011 return; // Defer to LegalizeDAG
2012
2013 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2014 Results.push_back(Elt: Expanded);
2015 return;
2016 }
2017
2018 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
2019 Results.push_back(Elt: Tmp);
2020}
2021
2022void VectorLegalizer::ExpandSETCC(SDNode *Node,
2023 SmallVectorImpl<SDValue> &Results) {
2024 bool NeedInvert = false;
2025 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2026 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2027 Node->getOpcode() == ISD::STRICT_FSETCCS;
2028 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2029 unsigned Offset = IsStrict ? 1 : 0;
2030
2031 SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue();
2032 SDValue LHS = Node->getOperand(Num: 0 + Offset);
2033 SDValue RHS = Node->getOperand(Num: 1 + Offset);
2034 SDValue CC = Node->getOperand(Num: 2 + Offset);
2035
2036 MVT OpVT = LHS.getSimpleValueType();
2037 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
2038
2039 if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) {
2040 if (IsStrict) {
2041 UnrollStrictFPOp(Node, Results);
2042 return;
2043 }
2044 Results.push_back(Elt: UnrollVSETCC(Node));
2045 return;
2046 }
2047
2048 SDValue Mask, EVL;
2049 if (IsVP) {
2050 Mask = Node->getOperand(Num: 3 + Offset);
2051 EVL = Node->getOperand(Num: 4 + Offset);
2052 }
2053
2054 SDLoc dl(Node);
2055 bool Legalized =
2056 TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask,
2057 EVL, NeedInvert, dl, Chain, IsSignaling);
2058
2059 if (Legalized) {
2060 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2061 // condition code, create a new SETCC node.
2062 if (CC.getNode()) {
2063 if (IsStrict) {
2064 LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(),
2065 Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags());
2066 Chain = LHS.getValue(R: 1);
2067 } else if (IsVP) {
2068 LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0),
2069 Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags());
2070 } else {
2071 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC,
2072 Flags: Node->getFlags());
2073 }
2074 }
2075
2076 // If we expanded the SETCC by inverting the condition code, then wrap
2077 // the existing SETCC in a NOT to restore the intended condition.
2078 if (NeedInvert) {
2079 if (!IsVP)
2080 LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0));
2081 else
2082 LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0));
2083 }
2084 } else {
2085 assert(!IsStrict && "Don't know how to expand for strict nodes.");
2086
2087 // Otherwise, SETCC for the given comparison type must be completely
2088 // illegal; expand it into a SELECT_CC.
2089 EVT VT = Node->getValueType(ResNo: 0);
2090 LHS =
2091 DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS,
2092 N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()),
2093 N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()), N5: CC);
2094 LHS->setFlags(Node->getFlags());
2095 }
2096
2097 Results.push_back(Elt: LHS);
2098 if (IsStrict)
2099 Results.push_back(Elt: Chain);
2100}
2101
2102void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2103 SmallVectorImpl<SDValue> &Results) {
2104 SDValue Result, Overflow;
2105 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2106 Results.push_back(Elt: Result);
2107 Results.push_back(Elt: Overflow);
2108}
2109
2110void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2111 SmallVectorImpl<SDValue> &Results) {
2112 SDValue Result, Overflow;
2113 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2114 Results.push_back(Elt: Result);
2115 Results.push_back(Elt: Overflow);
2116}
2117
2118void VectorLegalizer::ExpandMULO(SDNode *Node,
2119 SmallVectorImpl<SDValue> &Results) {
2120 SDValue Result, Overflow;
2121 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2122 std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node);
2123
2124 Results.push_back(Elt: Result);
2125 Results.push_back(Elt: Overflow);
2126}
2127
2128void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2129 SmallVectorImpl<SDValue> &Results) {
2130 SDNode *N = Node;
2131 if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N),
2132 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG))
2133 Results.push_back(Elt: Expanded);
2134}
2135
2136void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2137 SmallVectorImpl<SDValue> &Results) {
2138 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2139 ExpandUINT_TO_FLOAT(Node, Results);
2140 return;
2141 }
2142 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2143 ExpandFP_TO_UINT(Node, Results);
2144 return;
2145 }
2146
2147 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2148 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2149 ExpandSETCC(Node, Results);
2150 return;
2151 }
2152
2153 UnrollStrictFPOp(Node, Results);
2154}
2155
2156void VectorLegalizer::ExpandREM(SDNode *Node,
2157 SmallVectorImpl<SDValue> &Results) {
2158 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2159 "Expected REM node");
2160
2161 SDValue Result;
2162 if (!TLI.expandREM(Node, Result, DAG))
2163 Result = DAG.UnrollVectorOp(N: Node);
2164 Results.push_back(Elt: Result);
2165}
2166
2167// Try to expand libm nodes into vector math routine calls. Callers provide the
2168// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2169// within TargetLibraryInfo. The only mappings considered are those where the
2170// result and all operands are the same vector type. While predicated nodes are
2171// not supported, we will emit calls to masked routines by passing in an all
2172// true mask.
2173bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2174 SmallVectorImpl<SDValue> &Results) {
2175 // Chain must be propagated but currently strict fp operations are down
2176 // converted to their none strict counterpart.
2177 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2178
2179 const char *LCName = TLI.getLibcallName(Call: LC);
2180 if (!LCName)
2181 return false;
2182 LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n");
2183
2184 EVT VT = Node->getValueType(ResNo: 0);
2185 ElementCount VL = VT.getVectorElementCount();
2186
2187 // Lookup a vector function equivalent to the specified libcall. Prefer
2188 // unmasked variants but we will generate a mask if need be.
2189 const TargetLibraryInfo &TLibInfo = DAG.getLibInfo();
2190 const VecDesc *VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, Masked: false);
2191 if (!VD)
2192 VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, /*Masked=*/true);
2193 if (!VD)
2194 return false;
2195
2196 LLVMContext *Ctx = DAG.getContext();
2197 Type *Ty = VT.getTypeForEVT(Context&: *Ctx);
2198 Type *ScalarTy = Ty->getScalarType();
2199
2200 // Construct a scalar function type based on Node's operands.
2201 SmallVector<Type *, 8> ArgTys;
2202 for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
2203 assert(Node->getOperand(i).getValueType() == VT &&
2204 "Expected matching vector types!");
2205 ArgTys.push_back(Elt: ScalarTy);
2206 }
2207 FunctionType *ScalarFTy = FunctionType::get(Result: ScalarTy, Params: ArgTys, isVarArg: false);
2208
2209 // Generate call information for the vector function.
2210 const std::string MangledName = VD->getVectorFunctionABIVariantString();
2211 auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, FTy: ScalarFTy);
2212 if (!OptVFInfo)
2213 return false;
2214
2215 LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName()
2216 << "\n");
2217
2218 // Sanity check just in case OptVFInfo has unexpected parameters.
2219 if (OptVFInfo->Shape.Parameters.size() !=
2220 Node->getNumOperands() + VD->isMasked())
2221 return false;
2222
2223 // Collect vector call operands.
2224
2225 SDLoc DL(Node);
2226 TargetLowering::ArgListTy Args;
2227 TargetLowering::ArgListEntry Entry;
2228 Entry.IsSExt = false;
2229 Entry.IsZExt = false;
2230
2231 unsigned OpNum = 0;
2232 for (auto &VFParam : OptVFInfo->Shape.Parameters) {
2233 if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
2234 EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *Ctx, VT);
2235 Entry.Node = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
2236 Entry.Ty = MaskVT.getTypeForEVT(Context&: *Ctx);
2237 Args.push_back(x: Entry);
2238 continue;
2239 }
2240
2241 // Only vector operands are supported.
2242 if (VFParam.ParamKind != VFParamKind::Vector)
2243 return false;
2244
2245 Entry.Node = Node->getOperand(Num: OpNum++);
2246 Entry.Ty = Ty;
2247 Args.push_back(x: Entry);
2248 }
2249
2250 // Emit a call to the vector function.
2251 SDValue Callee = DAG.getExternalSymbol(Sym: VD->getVectorFnName().data(),
2252 VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2253 TargetLowering::CallLoweringInfo CLI(DAG);
2254 CLI.setDebugLoc(DL)
2255 .setChain(DAG.getEntryNode())
2256 .setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee, ArgsList: std::move(Args));
2257
2258 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2259 Results.push_back(Elt: CallResult.first);
2260 return true;
2261}
2262
2263/// Try to expand the node to a vector libcall based on the result type.
2264bool VectorLegalizer::tryExpandVecMathCall(
2265 SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
2266 RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
2267 RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) {
2268 RTLIB::Libcall LC = RTLIB::getFPLibCall(
2269 VT: Node->getValueType(ResNo: 0).getVectorElementType(), Call_F32, Call_F64,
2270 Call_F80, Call_F128, Call_PPCF128);
2271
2272 if (LC == RTLIB::UNKNOWN_LIBCALL)
2273 return false;
2274
2275 return tryExpandVecMathCall(Node, LC, Results);
2276}
2277
2278void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2279 SmallVectorImpl<SDValue> &Results) {
2280 EVT VT = Node->getValueType(ResNo: 0);
2281 EVT EltVT = VT.getVectorElementType();
2282 unsigned NumElems = VT.getVectorNumElements();
2283 unsigned NumOpers = Node->getNumOperands();
2284 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2285
2286 EVT TmpEltVT = EltVT;
2287 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2288 Node->getOpcode() == ISD::STRICT_FSETCCS)
2289 TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2290 Context&: *DAG.getContext(), VT: TmpEltVT);
2291
2292 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2293 SDValue Chain = Node->getOperand(Num: 0);
2294 SDLoc dl(Node);
2295
2296 SmallVector<SDValue, 32> OpValues;
2297 SmallVector<SDValue, 32> OpChains;
2298 for (unsigned i = 0; i < NumElems; ++i) {
2299 SmallVector<SDValue, 4> Opers;
2300 SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl);
2301
2302 // The Chain is the first operand.
2303 Opers.push_back(Elt: Chain);
2304
2305 // Now process the remaining operands.
2306 for (unsigned j = 1; j < NumOpers; ++j) {
2307 SDValue Oper = Node->getOperand(Num: j);
2308 EVT OperVT = Oper.getValueType();
2309
2310 if (OperVT.isVector())
2311 Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl,
2312 VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx);
2313
2314 Opers.push_back(Elt: Oper);
2315 }
2316
2317 SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers);
2318 SDValue ScalarResult = ScalarOp.getValue(R: 0);
2319 SDValue ScalarChain = ScalarOp.getValue(R: 1);
2320
2321 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2322 Node->getOpcode() == ISD::STRICT_FSETCCS)
2323 ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult,
2324 LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT),
2325 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2326
2327 OpValues.push_back(Elt: ScalarResult);
2328 OpChains.push_back(Elt: ScalarChain);
2329 }
2330
2331 SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues);
2332 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains);
2333
2334 Results.push_back(Elt: Result);
2335 Results.push_back(Elt: NewChain);
2336}
2337
2338SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2339 EVT VT = Node->getValueType(ResNo: 0);
2340 unsigned NumElems = VT.getVectorNumElements();
2341 EVT EltVT = VT.getVectorElementType();
2342 SDValue LHS = Node->getOperand(Num: 0);
2343 SDValue RHS = Node->getOperand(Num: 1);
2344 SDValue CC = Node->getOperand(Num: 2);
2345 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2346 SDLoc dl(Node);
2347 SmallVector<SDValue, 8> Ops(NumElems);
2348 for (unsigned i = 0; i < NumElems; ++i) {
2349 SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS,
2350 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2351 SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS,
2352 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2353 // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2354 Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl,
2355 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2356 Context&: *DAG.getContext(), VT: TmpEltVT),
2357 N1: LHSElem, N2: RHSElem, N3: CC);
2358 Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i],
2359 LHS: DAG.getBoolConstant(V: true, DL: dl, VT: EltVT, OpVT: VT),
2360 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2361 }
2362 return DAG.getBuildVector(VT, DL: dl, Ops);
2363}
2364
2365bool SelectionDAG::LegalizeVectors() {
2366 return VectorLegalizer(*this).Run();
2367}
2368