1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/ValueTypes.h"
38#include "llvm/CodeGenTypes/MachineValueType.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
81 MutableArrayRef<SDValue> Results);
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandSELECT(SDNode *Node);
139 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
140 SDValue ExpandStore(SDNode *N);
141 SDValue ExpandFNEG(SDNode *Node);
142 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
143 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
144 void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
145 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
146 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
147 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
151
152 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
153 SmallVectorImpl<SDValue> &Results);
154 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32,
155 RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
156 RTLIB::Libcall Call_F128,
157 RTLIB::Libcall Call_PPCF128,
158 SmallVectorImpl<SDValue> &Results);
159
160 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
161
162 /// Implements vector promotion.
163 ///
164 /// This is essentially just bitcasting the operands to a different type and
165 /// bitcasting the result back to the original type.
166 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
167
168 /// Implements [SU]INT_TO_FP vector promotion.
169 ///
170 /// This is a [zs]ext of the input operand to a larger integer type.
171 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
172
173 /// Implements FP_TO_[SU]INT vector promotion of the result type.
174 ///
175 /// It is promoted to a larger integer type. The result is then
176 /// truncated back to the original type.
177 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
178
179 /// Implements vector setcc operation promotion.
180 ///
181 /// All vector operands are promoted to a vector type with larger element
182 /// type.
183 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
184
185 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
186
187public:
188 VectorLegalizer(SelectionDAG& dag) :
189 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
190
191 /// Begin legalizer the vector operations in the DAG.
192 bool Run();
193};
194
195} // end anonymous namespace
196
197bool VectorLegalizer::Run() {
198 // Before we start legalizing vector nodes, check if there are any vectors.
199 bool HasVectors = false;
200 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
201 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) {
202 // Check if the values of the nodes contain vectors. We don't need to check
203 // the operands because we are going to check their values at some point.
204 HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); });
205
206 // If we found a vector node we can start the legalization.
207 if (HasVectors)
208 break;
209 }
210
211 // If this basic block has no vectors then no need to legalize vectors.
212 if (!HasVectors)
213 return false;
214
215 // The legalize process is inherently a bottom-up recursive process (users
216 // legalize their uses before themselves). Given infinite stack space, we
217 // could just start legalizing on the root and traverse the whole graph. In
218 // practice however, this causes us to run out of stack space on large basic
219 // blocks. To avoid this problem, compute an ordering of the nodes where each
220 // node is only legalized after all of its operands are legalized.
221 DAG.AssignTopologicalOrder();
222 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
223 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I)
224 LegalizeOp(Op: SDValue(&*I, 0));
225
226 // Finally, it's possible the root changed. Get the new root.
227 SDValue OldRoot = DAG.getRoot();
228 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
229 DAG.setRoot(LegalizedNodes[OldRoot]);
230
231 LegalizedNodes.clear();
232
233 // Remove dead nodes now.
234 DAG.RemoveDeadNodes();
235
236 return Changed;
237}
238
239SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
240 assert(Op->getNumValues() == Result->getNumValues() &&
241 "Unexpected number of results");
242 // Generic legalization: just pass the operand through.
243 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
244 AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i));
245 return SDValue(Result, Op.getResNo());
246}
247
248SDValue
249VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
250 MutableArrayRef<SDValue> Results) {
251 assert(Results.size() == Op->getNumValues() &&
252 "Unexpected number of results");
253 // Make sure that the generated code is itself legal.
254 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
255 Results[i] = LegalizeOp(Op: Results[i]);
256 AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]);
257 }
258
259 return Results[Op.getResNo()];
260}
261
262SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
263 // Note that LegalizeOp may be reentered even from single-use nodes, which
264 // means that we always must cache transformed nodes.
265 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op);
266 if (I != LegalizedNodes.end()) return I->second;
267
268 // Legalize the operands
269 SmallVector<SDValue, 8> Ops;
270 for (const SDValue &Oper : Op->op_values())
271 Ops.push_back(Elt: LegalizeOp(Op: Oper));
272
273 SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops);
274
275 bool HasVectorValueOrOp =
276 llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) ||
277 llvm::any_of(Range: Node->op_values(),
278 P: [](SDValue O) { return O.getValueType().isVector(); });
279 if (!HasVectorValueOrOp)
280 return TranslateLegalizeResults(Op, Result: Node);
281
282 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
283 EVT ValVT;
284 switch (Op.getOpcode()) {
285 default:
286 return TranslateLegalizeResults(Op, Result: Node);
287 case ISD::LOAD: {
288 LoadSDNode *LD = cast<LoadSDNode>(Val: Node);
289 ISD::LoadExtType ExtType = LD->getExtensionType();
290 EVT LoadedVT = LD->getMemoryVT();
291 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
292 Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT);
293 break;
294 }
295 case ISD::STORE: {
296 StoreSDNode *ST = cast<StoreSDNode>(Val: Node);
297 EVT StVT = ST->getMemoryVT();
298 MVT ValVT = ST->getValue().getSimpleValueType();
299 if (StVT.isVector() && ST->isTruncatingStore())
300 Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT);
301 break;
302 }
303 case ISD::MERGE_VALUES:
304 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
305 // This operation lies about being legal: when it claims to be legal,
306 // it should actually be expanded.
307 if (Action == TargetLowering::Legal)
308 Action = TargetLowering::Expand;
309 break;
310#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
311 case ISD::STRICT_##DAGN:
312#include "llvm/IR/ConstrainedOps.def"
313 ValVT = Node->getValueType(ResNo: 0);
314 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
315 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
316 ValVT = Node->getOperand(Num: 1).getValueType();
317 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
318 Op.getOpcode() == ISD::STRICT_FSETCCS) {
319 MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType();
320 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get();
321 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
322 if (Action == TargetLowering::Legal)
323 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
324 } else {
325 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT);
326 }
327 // If we're asked to expand a strict vector floating-point operation,
328 // by default we're going to simply unroll it. That is usually the
329 // best approach, except in the case where the resulting strict (scalar)
330 // operations would themselves use the fallback mutation to non-strict.
331 // In that specific case, just do the fallback on the vector op.
332 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
333 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) ==
334 TargetLowering::Legal) {
335 EVT EltVT = ValVT.getVectorElementType();
336 if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT)
337 == TargetLowering::Expand &&
338 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT)
339 == TargetLowering::Legal)
340 Action = TargetLowering::Legal;
341 }
342 break;
343 case ISD::ADD:
344 case ISD::SUB:
345 case ISD::MUL:
346 case ISD::MULHS:
347 case ISD::MULHU:
348 case ISD::SDIV:
349 case ISD::UDIV:
350 case ISD::SREM:
351 case ISD::UREM:
352 case ISD::SDIVREM:
353 case ISD::UDIVREM:
354 case ISD::FADD:
355 case ISD::FSUB:
356 case ISD::FMUL:
357 case ISD::FDIV:
358 case ISD::FREM:
359 case ISD::AND:
360 case ISD::OR:
361 case ISD::XOR:
362 case ISD::SHL:
363 case ISD::SRA:
364 case ISD::SRL:
365 case ISD::FSHL:
366 case ISD::FSHR:
367 case ISD::ROTL:
368 case ISD::ROTR:
369 case ISD::ABS:
370 case ISD::ABDS:
371 case ISD::ABDU:
372 case ISD::AVGCEILS:
373 case ISD::AVGCEILU:
374 case ISD::AVGFLOORS:
375 case ISD::AVGFLOORU:
376 case ISD::BSWAP:
377 case ISD::BITREVERSE:
378 case ISD::CTLZ:
379 case ISD::CTTZ:
380 case ISD::CTLZ_ZERO_UNDEF:
381 case ISD::CTTZ_ZERO_UNDEF:
382 case ISD::CTPOP:
383 case ISD::SELECT:
384 case ISD::VSELECT:
385 case ISD::SELECT_CC:
386 case ISD::ZERO_EXTEND:
387 case ISD::ANY_EXTEND:
388 case ISD::TRUNCATE:
389 case ISD::SIGN_EXTEND:
390 case ISD::FP_TO_SINT:
391 case ISD::FP_TO_UINT:
392 case ISD::FNEG:
393 case ISD::FABS:
394 case ISD::FMINNUM:
395 case ISD::FMAXNUM:
396 case ISD::FMINNUM_IEEE:
397 case ISD::FMAXNUM_IEEE:
398 case ISD::FMINIMUM:
399 case ISD::FMAXIMUM:
400 case ISD::FCOPYSIGN:
401 case ISD::FSQRT:
402 case ISD::FSIN:
403 case ISD::FCOS:
404 case ISD::FTAN:
405 case ISD::FASIN:
406 case ISD::FACOS:
407 case ISD::FATAN:
408 case ISD::FSINH:
409 case ISD::FCOSH:
410 case ISD::FTANH:
411 case ISD::FLDEXP:
412 case ISD::FPOWI:
413 case ISD::FPOW:
414 case ISD::FLOG:
415 case ISD::FLOG2:
416 case ISD::FLOG10:
417 case ISD::FEXP:
418 case ISD::FEXP2:
419 case ISD::FEXP10:
420 case ISD::FCEIL:
421 case ISD::FTRUNC:
422 case ISD::FRINT:
423 case ISD::FNEARBYINT:
424 case ISD::FROUND:
425 case ISD::FROUNDEVEN:
426 case ISD::FFLOOR:
427 case ISD::FP_ROUND:
428 case ISD::FP_EXTEND:
429 case ISD::FPTRUNC_ROUND:
430 case ISD::FMA:
431 case ISD::SIGN_EXTEND_INREG:
432 case ISD::ANY_EXTEND_VECTOR_INREG:
433 case ISD::SIGN_EXTEND_VECTOR_INREG:
434 case ISD::ZERO_EXTEND_VECTOR_INREG:
435 case ISD::SMIN:
436 case ISD::SMAX:
437 case ISD::UMIN:
438 case ISD::UMAX:
439 case ISD::SMUL_LOHI:
440 case ISD::UMUL_LOHI:
441 case ISD::SADDO:
442 case ISD::UADDO:
443 case ISD::SSUBO:
444 case ISD::USUBO:
445 case ISD::SMULO:
446 case ISD::UMULO:
447 case ISD::FCANONICALIZE:
448 case ISD::FFREXP:
449 case ISD::SADDSAT:
450 case ISD::UADDSAT:
451 case ISD::SSUBSAT:
452 case ISD::USUBSAT:
453 case ISD::SSHLSAT:
454 case ISD::USHLSAT:
455 case ISD::FP_TO_SINT_SAT:
456 case ISD::FP_TO_UINT_SAT:
457 case ISD::MGATHER:
458 case ISD::VECTOR_COMPRESS:
459 case ISD::SCMP:
460 case ISD::UCMP:
461 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
462 break;
463 case ISD::SMULFIX:
464 case ISD::SMULFIXSAT:
465 case ISD::UMULFIX:
466 case ISD::UMULFIXSAT:
467 case ISD::SDIVFIX:
468 case ISD::SDIVFIXSAT:
469 case ISD::UDIVFIX:
470 case ISD::UDIVFIXSAT: {
471 unsigned Scale = Node->getConstantOperandVal(Num: 2);
472 Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(),
473 VT: Node->getValueType(ResNo: 0), Scale);
474 break;
475 }
476 case ISD::LRINT:
477 case ISD::LLRINT:
478 case ISD::SINT_TO_FP:
479 case ISD::UINT_TO_FP:
480 case ISD::VECREDUCE_ADD:
481 case ISD::VECREDUCE_MUL:
482 case ISD::VECREDUCE_AND:
483 case ISD::VECREDUCE_OR:
484 case ISD::VECREDUCE_XOR:
485 case ISD::VECREDUCE_SMAX:
486 case ISD::VECREDUCE_SMIN:
487 case ISD::VECREDUCE_UMAX:
488 case ISD::VECREDUCE_UMIN:
489 case ISD::VECREDUCE_FADD:
490 case ISD::VECREDUCE_FMUL:
491 case ISD::VECREDUCE_FMAX:
492 case ISD::VECREDUCE_FMIN:
493 case ISD::VECREDUCE_FMAXIMUM:
494 case ISD::VECREDUCE_FMINIMUM:
495 Action = TLI.getOperationAction(Op: Node->getOpcode(),
496 VT: Node->getOperand(Num: 0).getValueType());
497 break;
498 case ISD::VECREDUCE_SEQ_FADD:
499 case ISD::VECREDUCE_SEQ_FMUL:
500 Action = TLI.getOperationAction(Op: Node->getOpcode(),
501 VT: Node->getOperand(Num: 1).getValueType());
502 break;
503 case ISD::SETCC: {
504 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
505 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get();
506 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
507 if (Action == TargetLowering::Legal)
508 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
509 break;
510 }
511
512#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
513 case ISD::VPID: { \
514 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
515 : Node->getOperand(LEGALPOS).getValueType(); \
516 if (ISD::VPID == ISD::VP_SETCC) { \
517 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
518 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
519 if (Action != TargetLowering::Legal) \
520 break; \
521 } \
522 /* Defer non-vector results to LegalizeDAG. */ \
523 if (!Node->getValueType(0).isVector() && \
524 Node->getValueType(0) != MVT::Other) { \
525 Action = TargetLowering::Legal; \
526 break; \
527 } \
528 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
529 } break;
530#include "llvm/IR/VPIntrinsics.def"
531 }
532
533 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
534
535 SmallVector<SDValue, 8> ResultVals;
536 switch (Action) {
537 default: llvm_unreachable("This action is not supported yet!");
538 case TargetLowering::Promote:
539 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
540 "This action is not supported yet!");
541 LLVM_DEBUG(dbgs() << "Promoting\n");
542 Promote(Node, Results&: ResultVals);
543 assert(!ResultVals.empty() && "No results for promotion?");
544 break;
545 case TargetLowering::Legal:
546 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
547 break;
548 case TargetLowering::Custom:
549 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
550 if (LowerOperationWrapper(N: Node, Results&: ResultVals))
551 break;
552 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
553 [[fallthrough]];
554 case TargetLowering::Expand:
555 LLVM_DEBUG(dbgs() << "Expanding\n");
556 Expand(Node, Results&: ResultVals);
557 break;
558 }
559
560 if (ResultVals.empty())
561 return TranslateLegalizeResults(Op, Result: Node);
562
563 Changed = true;
564 return RecursivelyLegalizeResults(Op, Results: ResultVals);
565}
566
567// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
568// merge them somehow?
569bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
570 SmallVectorImpl<SDValue> &Results) {
571 SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG);
572
573 if (!Res.getNode())
574 return false;
575
576 if (Res == SDValue(Node, 0))
577 return true;
578
579 // If the original node has one result, take the return value from
580 // LowerOperation as is. It might not be result number 0.
581 if (Node->getNumValues() == 1) {
582 Results.push_back(Elt: Res);
583 return true;
584 }
585
586 // If the original node has multiple results, then the return node should
587 // have the same number of results.
588 assert((Node->getNumValues() == Res->getNumValues()) &&
589 "Lowering returned the wrong number of results!");
590
591 // Places new result values base on N result number.
592 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
593 Results.push_back(Elt: Res.getValue(R: I));
594
595 return true;
596}
597
598void VectorLegalizer::PromoteSETCC(SDNode *Node,
599 SmallVectorImpl<SDValue> &Results) {
600 MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType();
601 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
602
603 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
604
605 SDLoc DL(Node);
606 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
607
608 Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0));
609 Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1));
610 Operands[2] = Node->getOperand(Num: 2);
611
612 if (Node->getOpcode() == ISD::VP_SETCC) {
613 Operands[3] = Node->getOperand(Num: 3); // mask
614 Operands[4] = Node->getOperand(Num: 4); // evl
615 }
616
617 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0),
618 Ops: Operands, Flags: Node->getFlags());
619
620 Results.push_back(Elt: Res);
621}
622
623void VectorLegalizer::PromoteSTRICT(SDNode *Node,
624 SmallVectorImpl<SDValue> &Results) {
625 MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType();
626 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
627
628 assert(VecVT.isFloatingPoint());
629
630 SDLoc DL(Node);
631 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
632 SmallVector<SDValue, 2> Chains;
633
634 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
635 if (Node->getOperand(Num: j).getValueType().isVector() &&
636 !(ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
637 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand.
638 {
639 // promote the vector operand.
640 SDValue Ext =
641 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other},
642 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)});
643 Operands[j] = Ext.getValue(R: 0);
644 Chains.push_back(Elt: Ext.getValue(R: 1));
645 } else
646 Operands[j] = Node->getOperand(Num: j); // Skip no vector operand.
647
648 SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1));
649
650 Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
651
652 SDValue Res =
653 DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags());
654
655 SDValue Round =
656 DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other},
657 Ops: {Res.getValue(R: 1), Res.getValue(R: 0),
658 DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)});
659
660 Results.push_back(Elt: Round.getValue(R: 0));
661 Results.push_back(Elt: Round.getValue(R: 1));
662}
663
664void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
665 // For a few operations there is a specific concept for promotion based on
666 // the operand's type.
667 switch (Node->getOpcode()) {
668 case ISD::SINT_TO_FP:
669 case ISD::UINT_TO_FP:
670 case ISD::STRICT_SINT_TO_FP:
671 case ISD::STRICT_UINT_TO_FP:
672 // "Promote" the operation by extending the operand.
673 PromoteINT_TO_FP(Node, Results);
674 return;
675 case ISD::FP_TO_UINT:
676 case ISD::FP_TO_SINT:
677 case ISD::STRICT_FP_TO_UINT:
678 case ISD::STRICT_FP_TO_SINT:
679 // Promote the operation by extending the operand.
680 PromoteFP_TO_INT(Node, Results);
681 return;
682 case ISD::VP_SETCC:
683 case ISD::SETCC:
684 // Promote the operation by extending the operand.
685 PromoteSETCC(Node, Results);
686 return;
687 case ISD::STRICT_FADD:
688 case ISD::STRICT_FSUB:
689 case ISD::STRICT_FMUL:
690 case ISD::STRICT_FDIV:
691 case ISD::STRICT_FSQRT:
692 case ISD::STRICT_FMA:
693 PromoteSTRICT(Node, Results);
694 return;
695 case ISD::FP_ROUND:
696 case ISD::FP_EXTEND:
697 // These operations are used to do promotion so they can't be promoted
698 // themselves.
699 llvm_unreachable("Don't know how to promote this operation!");
700 }
701
702 // There are currently two cases of vector promotion:
703 // 1) Bitcasting a vector of integers to a different type to a vector of the
704 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
705 // 2) Extending a vector of floats to a vector of the same number of larger
706 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
707 assert(Node->getNumValues() == 1 &&
708 "Can't promote a vector with multiple results!");
709 MVT VT = Node->getSimpleValueType(ResNo: 0);
710 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
711 SDLoc dl(Node);
712 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
713
714 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
715 // Do not promote the mask operand of a VP OP.
716 bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
717 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j;
718 if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote)
719 if (Node->getOperand(Num: j)
720 .getValueType()
721 .getVectorElementType()
722 .isFloatingPoint() &&
723 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
724 Operands[j] = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
725 else
726 Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
727 else
728 Operands[j] = Node->getOperand(Num: j);
729 }
730
731 SDValue Res =
732 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags());
733
734 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
735 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
736 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
737 Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res,
738 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
739 else
740 Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res);
741
742 Results.push_back(Elt: Res);
743}
744
745void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
746 SmallVectorImpl<SDValue> &Results) {
747 // INT_TO_FP operations may require the input operand be promoted even
748 // when the type is otherwise legal.
749 bool IsStrict = Node->isStrictFPOpcode();
750 MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType();
751 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
752 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
753 "Vectors have different number of elements!");
754
755 SDLoc dl(Node);
756 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
757
758 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
759 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
760 ? ISD::ZERO_EXTEND
761 : ISD::SIGN_EXTEND;
762 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
763 if (Node->getOperand(Num: j).getValueType().isVector())
764 Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
765 else
766 Operands[j] = Node->getOperand(Num: j);
767 }
768
769 if (IsStrict) {
770 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl,
771 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands);
772 Results.push_back(Elt: Res);
773 Results.push_back(Elt: Res.getValue(R: 1));
774 return;
775 }
776
777 SDValue Res =
778 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands);
779 Results.push_back(Elt: Res);
780}
781
782// For FP_TO_INT we promote the result type to a vector type with wider
783// elements and then truncate the result. This is different from the default
784// PromoteVector which uses bitcast to promote thus assumning that the
785// promoted vector type has the same overall size.
786void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
787 SmallVectorImpl<SDValue> &Results) {
788 MVT VT = Node->getSimpleValueType(ResNo: 0);
789 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
790 bool IsStrict = Node->isStrictFPOpcode();
791 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
792 "Vectors have different number of elements!");
793
794 unsigned NewOpc = Node->getOpcode();
795 // Change FP_TO_UINT to FP_TO_SINT if possible.
796 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
797 if (NewOpc == ISD::FP_TO_UINT &&
798 TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT))
799 NewOpc = ISD::FP_TO_SINT;
800
801 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
802 TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT))
803 NewOpc = ISD::STRICT_FP_TO_SINT;
804
805 SDLoc dl(Node);
806 SDValue Promoted, Chain;
807 if (IsStrict) {
808 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other},
809 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)});
810 Chain = Promoted.getValue(R: 1);
811 } else
812 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0));
813
814 // Assert that the converted value fits in the original type. If it doesn't
815 // (eg: because the value being converted is too big), then the result of the
816 // original operation was undefined anyway, so the assert is still correct.
817 if (Node->getOpcode() == ISD::FP_TO_UINT ||
818 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
819 NewOpc = ISD::AssertZext;
820 else
821 NewOpc = ISD::AssertSext;
822
823 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted,
824 N2: DAG.getValueType(VT.getScalarType()));
825 Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted);
826 Results.push_back(Elt: Promoted);
827 if (IsStrict)
828 Results.push_back(Elt: Chain);
829}
830
831std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
832 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
833 return TLI.scalarizeVectorLoad(LD, DAG);
834}
835
836SDValue VectorLegalizer::ExpandStore(SDNode *N) {
837 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
838 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
839 return TF;
840}
841
842void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
843 switch (Node->getOpcode()) {
844 case ISD::LOAD: {
845 std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node);
846 Results.push_back(Elt: Tmp.first);
847 Results.push_back(Elt: Tmp.second);
848 return;
849 }
850 case ISD::STORE:
851 Results.push_back(Elt: ExpandStore(N: Node));
852 return;
853 case ISD::MERGE_VALUES:
854 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
855 Results.push_back(Elt: Node->getOperand(Num: i));
856 return;
857 case ISD::SIGN_EXTEND_INREG:
858 Results.push_back(Elt: ExpandSEXTINREG(Node));
859 return;
860 case ISD::ANY_EXTEND_VECTOR_INREG:
861 Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node));
862 return;
863 case ISD::SIGN_EXTEND_VECTOR_INREG:
864 Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node));
865 return;
866 case ISD::ZERO_EXTEND_VECTOR_INREG:
867 Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node));
868 return;
869 case ISD::BSWAP:
870 Results.push_back(Elt: ExpandBSWAP(Node));
871 return;
872 case ISD::VP_BSWAP:
873 Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG));
874 return;
875 case ISD::VSELECT:
876 Results.push_back(Elt: ExpandVSELECT(Node));
877 return;
878 case ISD::VP_SELECT:
879 Results.push_back(Elt: ExpandVP_SELECT(Node));
880 return;
881 case ISD::VP_SREM:
882 case ISD::VP_UREM:
883 if (SDValue Expanded = ExpandVP_REM(Node)) {
884 Results.push_back(Elt: Expanded);
885 return;
886 }
887 break;
888 case ISD::SELECT:
889 Results.push_back(Elt: ExpandSELECT(Node));
890 return;
891 case ISD::SELECT_CC: {
892 if (Node->getValueType(ResNo: 0).isScalableVector()) {
893 EVT CondVT = TLI.getSetCCResultType(
894 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
895 SDValue SetCC =
896 DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0),
897 N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4));
898 Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC,
899 LHS: Node->getOperand(Num: 2),
900 RHS: Node->getOperand(Num: 3)));
901 return;
902 }
903 break;
904 }
905 case ISD::FP_TO_UINT:
906 ExpandFP_TO_UINT(Node, Results);
907 return;
908 case ISD::UINT_TO_FP:
909 ExpandUINT_TO_FLOAT(Node, Results);
910 return;
911 case ISD::FNEG:
912 Results.push_back(Elt: ExpandFNEG(Node));
913 return;
914 case ISD::FSUB:
915 ExpandFSUB(Node, Results);
916 return;
917 case ISD::SETCC:
918 case ISD::VP_SETCC:
919 ExpandSETCC(Node, Results);
920 return;
921 case ISD::ABS:
922 if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) {
923 Results.push_back(Elt: Expanded);
924 return;
925 }
926 break;
927 case ISD::ABDS:
928 case ISD::ABDU:
929 if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) {
930 Results.push_back(Elt: Expanded);
931 return;
932 }
933 break;
934 case ISD::AVGCEILS:
935 case ISD::AVGCEILU:
936 case ISD::AVGFLOORS:
937 case ISD::AVGFLOORU:
938 if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) {
939 Results.push_back(Elt: Expanded);
940 return;
941 }
942 break;
943 case ISD::BITREVERSE:
944 ExpandBITREVERSE(Node, Results);
945 return;
946 case ISD::VP_BITREVERSE:
947 if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) {
948 Results.push_back(Elt: Expanded);
949 return;
950 }
951 break;
952 case ISD::CTPOP:
953 if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) {
954 Results.push_back(Elt: Expanded);
955 return;
956 }
957 break;
958 case ISD::VP_CTPOP:
959 if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) {
960 Results.push_back(Elt: Expanded);
961 return;
962 }
963 break;
964 case ISD::CTLZ:
965 case ISD::CTLZ_ZERO_UNDEF:
966 if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) {
967 Results.push_back(Elt: Expanded);
968 return;
969 }
970 break;
971 case ISD::VP_CTLZ:
972 case ISD::VP_CTLZ_ZERO_UNDEF:
973 if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) {
974 Results.push_back(Elt: Expanded);
975 return;
976 }
977 break;
978 case ISD::CTTZ:
979 case ISD::CTTZ_ZERO_UNDEF:
980 if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) {
981 Results.push_back(Elt: Expanded);
982 return;
983 }
984 break;
985 case ISD::VP_CTTZ:
986 case ISD::VP_CTTZ_ZERO_UNDEF:
987 if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) {
988 Results.push_back(Elt: Expanded);
989 return;
990 }
991 break;
992 case ISD::FSHL:
993 case ISD::VP_FSHL:
994 case ISD::FSHR:
995 case ISD::VP_FSHR:
996 if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) {
997 Results.push_back(Elt: Expanded);
998 return;
999 }
1000 break;
1001 case ISD::ROTL:
1002 case ISD::ROTR:
1003 if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) {
1004 Results.push_back(Elt: Expanded);
1005 return;
1006 }
1007 break;
1008 case ISD::FMINNUM:
1009 case ISD::FMAXNUM:
1010 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) {
1011 Results.push_back(Elt: Expanded);
1012 return;
1013 }
1014 break;
1015 case ISD::FMINIMUM:
1016 case ISD::FMAXIMUM:
1017 Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG));
1018 return;
1019 case ISD::SMIN:
1020 case ISD::SMAX:
1021 case ISD::UMIN:
1022 case ISD::UMAX:
1023 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1024 Results.push_back(Elt: Expanded);
1025 return;
1026 }
1027 break;
1028 case ISD::UADDO:
1029 case ISD::USUBO:
1030 ExpandUADDSUBO(Node, Results);
1031 return;
1032 case ISD::SADDO:
1033 case ISD::SSUBO:
1034 ExpandSADDSUBO(Node, Results);
1035 return;
1036 case ISD::UMULO:
1037 case ISD::SMULO:
1038 ExpandMULO(Node, Results);
1039 return;
1040 case ISD::USUBSAT:
1041 case ISD::SSUBSAT:
1042 case ISD::UADDSAT:
1043 case ISD::SADDSAT:
1044 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1045 Results.push_back(Elt: Expanded);
1046 return;
1047 }
1048 break;
1049 case ISD::USHLSAT:
1050 case ISD::SSHLSAT:
1051 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1052 Results.push_back(Elt: Expanded);
1053 return;
1054 }
1055 break;
1056 case ISD::FP_TO_SINT_SAT:
1057 case ISD::FP_TO_UINT_SAT:
1058 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1059 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1060 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) {
1061 Results.push_back(Elt: Expanded);
1062 return;
1063 }
1064 }
1065 break;
1066 case ISD::SMULFIX:
1067 case ISD::UMULFIX:
1068 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1069 Results.push_back(Elt: Expanded);
1070 return;
1071 }
1072 break;
1073 case ISD::SMULFIXSAT:
1074 case ISD::UMULFIXSAT:
1075 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1076 // why. Maybe it results in worse codegen compared to the unroll for some
1077 // targets? This should probably be investigated. And if we still prefer to
1078 // unroll an explanation could be helpful.
1079 break;
1080 case ISD::SDIVFIX:
1081 case ISD::UDIVFIX:
1082 ExpandFixedPointDiv(Node, Results);
1083 return;
1084 case ISD::SDIVFIXSAT:
1085 case ISD::UDIVFIXSAT:
1086 break;
1087#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1088 case ISD::STRICT_##DAGN:
1089#include "llvm/IR/ConstrainedOps.def"
1090 ExpandStrictFPOp(Node, Results);
1091 return;
1092 case ISD::VECREDUCE_ADD:
1093 case ISD::VECREDUCE_MUL:
1094 case ISD::VECREDUCE_AND:
1095 case ISD::VECREDUCE_OR:
1096 case ISD::VECREDUCE_XOR:
1097 case ISD::VECREDUCE_SMAX:
1098 case ISD::VECREDUCE_SMIN:
1099 case ISD::VECREDUCE_UMAX:
1100 case ISD::VECREDUCE_UMIN:
1101 case ISD::VECREDUCE_FADD:
1102 case ISD::VECREDUCE_FMUL:
1103 case ISD::VECREDUCE_FMAX:
1104 case ISD::VECREDUCE_FMIN:
1105 case ISD::VECREDUCE_FMAXIMUM:
1106 case ISD::VECREDUCE_FMINIMUM:
1107 Results.push_back(Elt: TLI.expandVecReduce(Node, DAG));
1108 return;
1109 case ISD::VECREDUCE_SEQ_FADD:
1110 case ISD::VECREDUCE_SEQ_FMUL:
1111 Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG));
1112 return;
1113 case ISD::SREM:
1114 case ISD::UREM:
1115 ExpandREM(Node, Results);
1116 return;
1117 case ISD::VP_MERGE:
1118 Results.push_back(Elt: ExpandVP_MERGE(Node));
1119 return;
1120 case ISD::FREM:
1121 if (tryExpandVecMathCall(Node, Call_F32: RTLIB::REM_F32, Call_F64: RTLIB::REM_F64,
1122 Call_F80: RTLIB::REM_F80, Call_F128: RTLIB::REM_F128,
1123 Call_PPCF128: RTLIB::REM_PPCF128, Results))
1124 return;
1125
1126 break;
1127 case ISD::VECTOR_COMPRESS:
1128 Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG));
1129 return;
1130 }
1131
1132 SDValue Unrolled = DAG.UnrollVectorOp(N: Node);
1133 if (Node->getNumValues() == 1) {
1134 Results.push_back(Elt: Unrolled);
1135 } else {
1136 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1137 "VectorLegalizer Expand returned wrong number of results!");
1138 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1139 Results.push_back(Elt: Unrolled.getValue(R: I));
1140 }
1141}
1142
1143SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1144 // Lower a select instruction where the condition is a scalar and the
1145 // operands are vectors. Lower this select to VSELECT and implement it
1146 // using XOR AND OR. The selector bit is broadcasted.
1147 EVT VT = Node->getValueType(ResNo: 0);
1148 SDLoc DL(Node);
1149
1150 SDValue Mask = Node->getOperand(Num: 0);
1151 SDValue Op1 = Node->getOperand(Num: 1);
1152 SDValue Op2 = Node->getOperand(Num: 2);
1153
1154 assert(VT.isVector() && !Mask.getValueType().isVector()
1155 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1156
1157 // If we can't even use the basic vector operations of
1158 // AND,OR,XOR, we will have to scalarize the op.
1159 // Notice that the operation may be 'promoted' which means that it is
1160 // 'bitcasted' to another type which is handled.
1161 // Also, we need to be able to construct a splat vector using either
1162 // BUILD_VECTOR or SPLAT_VECTOR.
1163 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1164 // BUILD_VECTOR?
1165 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1166 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1167 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand ||
1168 TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1169 : ISD::SPLAT_VECTOR,
1170 VT) == TargetLowering::Expand)
1171 return DAG.UnrollVectorOp(N: Node);
1172
1173 // Generate a mask operand.
1174 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1175
1176 // What is the size of each element in the vector mask.
1177 EVT BitTy = MaskTy.getScalarType();
1178
1179 Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy),
1180 RHS: DAG.getConstant(Val: 0, DL, VT: BitTy));
1181
1182 // Broadcast the mask so that the entire vector is all one or all zero.
1183 Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask);
1184
1185 // Bitcast the operands to be the same type as the mask.
1186 // This is needed when we select between FP types because
1187 // the mask is a vector of integers.
1188 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1);
1189 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2);
1190
1191 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy);
1192
1193 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask);
1194 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask);
1195 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2);
1196 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1197}
1198
1199SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1200 EVT VT = Node->getValueType(ResNo: 0);
1201
1202 // Make sure that the SRA and SHL instructions are available.
1203 if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand ||
1204 TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand)
1205 return DAG.UnrollVectorOp(N: Node);
1206
1207 SDLoc DL(Node);
1208 EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
1209
1210 unsigned BW = VT.getScalarSizeInBits();
1211 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1212 SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT);
1213
1214 SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz);
1215 return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz);
1216}
1217
1218// Generically expand a vector anyext in register to a shuffle of the relevant
1219// lanes into the appropriate locations, with other lanes left undef.
1220SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1221 SDLoc DL(Node);
1222 EVT VT = Node->getValueType(ResNo: 0);
1223 int NumElements = VT.getVectorNumElements();
1224 SDValue Src = Node->getOperand(Num: 0);
1225 EVT SrcVT = Src.getValueType();
1226 int NumSrcElements = SrcVT.getVectorNumElements();
1227
1228 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1229 // into a larger vector type.
1230 if (SrcVT.bitsLE(VT)) {
1231 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1232 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1233 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1234 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1235 NumElements: NumSrcElements);
1236 Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT),
1237 N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL));
1238 }
1239
1240 // Build a base mask of undef shuffles.
1241 SmallVector<int, 16> ShuffleMask;
1242 ShuffleMask.resize(N: NumSrcElements, NV: -1);
1243
1244 // Place the extended lanes into the correct locations.
1245 int ExtLaneScale = NumSrcElements / NumElements;
1246 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1247 for (int i = 0; i < NumElements; ++i)
1248 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1249
1250 return DAG.getNode(
1251 Opcode: ISD::BITCAST, DL, VT,
1252 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getUNDEF(VT: SrcVT), Mask: ShuffleMask));
1253}
1254
1255SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1256 SDLoc DL(Node);
1257 EVT VT = Node->getValueType(ResNo: 0);
1258 SDValue Src = Node->getOperand(Num: 0);
1259 EVT SrcVT = Src.getValueType();
1260
1261 // First build an any-extend node which can be legalized above when we
1262 // recurse through it.
1263 SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src);
1264
1265 // Now we need sign extend. Do this by shifting the elements. Even if these
1266 // aren't legal operations, they have a better chance of being legalized
1267 // without full scalarization than the sign extension does.
1268 unsigned EltWidth = VT.getScalarSizeInBits();
1269 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1270 SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT);
1271 return DAG.getNode(Opcode: ISD::SRA, DL, VT,
1272 N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount),
1273 N2: ShiftAmount);
1274}
1275
1276// Generically expand a vector zext in register to a shuffle of the relevant
1277// lanes into the appropriate locations, a blend of zero into the high bits,
1278// and a bitcast to the wider element type.
1279SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1280 SDLoc DL(Node);
1281 EVT VT = Node->getValueType(ResNo: 0);
1282 int NumElements = VT.getVectorNumElements();
1283 SDValue Src = Node->getOperand(Num: 0);
1284 EVT SrcVT = Src.getValueType();
1285 int NumSrcElements = SrcVT.getVectorNumElements();
1286
1287 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1288 // into a larger vector type.
1289 if (SrcVT.bitsLE(VT)) {
1290 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1291 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1292 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1293 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1294 NumElements: NumSrcElements);
1295 Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT),
1296 N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL));
1297 }
1298
1299 // Build up a zero vector to blend into this one.
1300 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
1301
1302 // Shuffle the incoming lanes into the correct position, and pull all other
1303 // lanes from the zero vector.
1304 auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements));
1305
1306 int ExtLaneScale = NumSrcElements / NumElements;
1307 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1308 for (int i = 0; i < NumElements; ++i)
1309 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1310
1311 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
1312 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask));
1313}
1314
1315static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1316 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1317 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1318 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1319 ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J);
1320}
1321
1322SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1323 EVT VT = Node->getValueType(ResNo: 0);
1324
1325 // Scalable vectors can't use shuffle expansion.
1326 if (VT.isScalableVector())
1327 return TLI.expandBSWAP(N: Node, DAG);
1328
1329 // Generate a byte wise shuffle mask for the BSWAP.
1330 SmallVector<int, 16> ShuffleMask;
1331 createBSWAPShuffleMask(VT, ShuffleMask);
1332 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size());
1333
1334 // Only emit a shuffle if the mask is legal.
1335 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1336 SDLoc DL(Node);
1337 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1338 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), Mask: ShuffleMask);
1339 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1340 }
1341
1342 // If we have the appropriate vector bit operations, it is better to use them
1343 // than unrolling and expanding each component.
1344 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1345 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1346 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1347 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1348 return TLI.expandBSWAP(N: Node, DAG);
1349
1350 // Otherwise unroll.
1351 return DAG.UnrollVectorOp(N: Node);
1352}
1353
1354void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
1355 SmallVectorImpl<SDValue> &Results) {
1356 EVT VT = Node->getValueType(ResNo: 0);
1357
1358 // We can't unroll or use shuffles for scalable vectors.
1359 if (VT.isScalableVector()) {
1360 Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG));
1361 return;
1362 }
1363
1364 // If we have the scalar operation, it's probably cheaper to unroll it.
1365 if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType())) {
1366 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
1367 Results.push_back(Elt: Tmp);
1368 return;
1369 }
1370
1371 // If the vector element width is a whole number of bytes, test if its legal
1372 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1373 // vector. This greatly reduces the number of bit shifts necessary.
1374 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1375 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1376 SmallVector<int, 16> BSWAPMask;
1377 createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask);
1378
1379 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size());
1380 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1381 (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) ||
1382 (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) &&
1383 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) &&
1384 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) &&
1385 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) {
1386 SDLoc DL(Node);
1387 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1388 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT),
1389 Mask: BSWAPMask);
1390 Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op);
1391 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1392 Results.push_back(Elt: Op);
1393 return;
1394 }
1395 }
1396
1397 // If we have the appropriate vector bit operations, it is better to use them
1398 // than unrolling and expanding each component.
1399 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1400 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1401 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1402 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) {
1403 Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG));
1404 return;
1405 }
1406
1407 // Otherwise unroll.
1408 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
1409 Results.push_back(Elt: Tmp);
1410}
1411
1412SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1413 // Implement VSELECT in terms of XOR, AND, OR
1414 // on platforms which do not support blend natively.
1415 SDLoc DL(Node);
1416
1417 SDValue Mask = Node->getOperand(Num: 0);
1418 SDValue Op1 = Node->getOperand(Num: 1);
1419 SDValue Op2 = Node->getOperand(Num: 2);
1420
1421 EVT VT = Mask.getValueType();
1422
1423 // If we can't even use the basic vector operations of
1424 // AND,OR,XOR, we will have to scalarize the op.
1425 // Notice that the operation may be 'promoted' which means that it is
1426 // 'bitcasted' to another type which is handled.
1427 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1428 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1429 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand)
1430 return DAG.UnrollVectorOp(N: Node);
1431
1432 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1433 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1434 // vector constant to mask with.
1435 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1436 auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType());
1437 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1438 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1439 Op1.getValueType().getVectorElementType() == MVT::i1))
1440 return DAG.UnrollVectorOp(N: Node);
1441
1442 // If the mask and the type are different sizes, unroll the vector op. This
1443 // can occur when getSetCCResultType returns something that is different in
1444 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1445 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1446 return DAG.UnrollVectorOp(N: Node);
1447
1448 // Bitcast the operands to be the same type as the mask.
1449 // This is needed when we select between FP types because
1450 // the mask is a vector of integers.
1451 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1);
1452 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2);
1453
1454 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT);
1455
1456 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask);
1457 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask);
1458 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2);
1459 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1460}
1461
1462SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1463 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1464 // do not support it natively.
1465 SDLoc DL(Node);
1466
1467 SDValue Mask = Node->getOperand(Num: 0);
1468 SDValue Op1 = Node->getOperand(Num: 1);
1469 SDValue Op2 = Node->getOperand(Num: 2);
1470 SDValue EVL = Node->getOperand(Num: 3);
1471
1472 EVT VT = Mask.getValueType();
1473
1474 // If we can't even use the basic vector operations of
1475 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1476 if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand ||
1477 TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand ||
1478 TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand)
1479 return DAG.UnrollVectorOp(N: Node);
1480
1481 // This operation also isn't safe when the operands aren't also booleans.
1482 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1483 return DAG.UnrollVectorOp(N: Node);
1484
1485 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1486 SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL);
1487
1488 Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL);
1489 Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL);
1490 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL);
1491}
1492
1493SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1494 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1495 // indices less than the EVL/pivot are true. Combine that with the original
1496 // mask for a full-length mask. Use a full-length VSELECT to select between
1497 // the true and false values.
1498 SDLoc DL(Node);
1499
1500 SDValue Mask = Node->getOperand(Num: 0);
1501 SDValue Op1 = Node->getOperand(Num: 1);
1502 SDValue Op2 = Node->getOperand(Num: 2);
1503 SDValue EVL = Node->getOperand(Num: 3);
1504
1505 EVT MaskVT = Mask.getValueType();
1506 bool IsFixedLen = MaskVT.isFixedLengthVector();
1507
1508 EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(),
1509 EC: MaskVT.getVectorElementCount());
1510
1511 // If we can't construct the EVL mask efficiently, it's better to unroll.
1512 if ((IsFixedLen &&
1513 !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) ||
1514 (!IsFixedLen &&
1515 (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) ||
1516 !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT))))
1517 return DAG.UnrollVectorOp(N: Node);
1518
1519 // If using a SETCC would result in a different type than the mask type,
1520 // unroll.
1521 if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
1522 VT: EVLVecVT) != MaskVT)
1523 return DAG.UnrollVectorOp(N: Node);
1524
1525 SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT);
1526 SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL);
1527 SDValue EVLMask =
1528 DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT);
1529
1530 SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask);
1531 return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2);
1532}
1533
1534SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1535 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1536 EVT VT = Node->getValueType(ResNo: 0);
1537
1538 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1539
1540 if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) ||
1541 !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) ||
1542 !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT))
1543 return SDValue();
1544
1545 SDLoc DL(Node);
1546
1547 SDValue Dividend = Node->getOperand(Num: 0);
1548 SDValue Divisor = Node->getOperand(Num: 1);
1549 SDValue Mask = Node->getOperand(Num: 2);
1550 SDValue EVL = Node->getOperand(Num: 3);
1551
1552 // X % Y -> X-X/Y*Y
1553 SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL);
1554 SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL);
1555 return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL);
1556}
1557
1558void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1559 SmallVectorImpl<SDValue> &Results) {
1560 // Attempt to expand using TargetLowering.
1561 SDValue Result, Chain;
1562 if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) {
1563 Results.push_back(Elt: Result);
1564 if (Node->isStrictFPOpcode())
1565 Results.push_back(Elt: Chain);
1566 return;
1567 }
1568
1569 // Otherwise go ahead and unroll.
1570 if (Node->isStrictFPOpcode()) {
1571 UnrollStrictFPOp(Node, Results);
1572 return;
1573 }
1574
1575 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1576}
1577
1578void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1579 SmallVectorImpl<SDValue> &Results) {
1580 bool IsStrict = Node->isStrictFPOpcode();
1581 unsigned OpNo = IsStrict ? 1 : 0;
1582 SDValue Src = Node->getOperand(Num: OpNo);
1583 EVT VT = Src.getValueType();
1584 SDLoc DL(Node);
1585
1586 // Attempt to expand using TargetLowering.
1587 SDValue Result;
1588 SDValue Chain;
1589 if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) {
1590 Results.push_back(Elt: Result);
1591 if (IsStrict)
1592 Results.push_back(Elt: Chain);
1593 return;
1594 }
1595
1596 // Make sure that the SINT_TO_FP and SRL instructions are available.
1597 if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT) ==
1598 TargetLowering::Expand) ||
1599 (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT) ==
1600 TargetLowering::Expand)) ||
1601 TLI.getOperationAction(Op: ISD::SRL, VT) == TargetLowering::Expand) {
1602 if (IsStrict) {
1603 UnrollStrictFPOp(Node, Results);
1604 return;
1605 }
1606
1607 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1608 return;
1609 }
1610
1611 unsigned BW = VT.getScalarSizeInBits();
1612 assert((BW == 64 || BW == 32) &&
1613 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1614
1615 SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT);
1616
1617 // Constants to clear the upper part of the word.
1618 // Notice that we can also use SHL+SHR, but using a constant is slightly
1619 // faster on x86.
1620 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1621 SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT);
1622
1623 // Two to the power of half-word-size.
1624 SDValue TWOHW =
1625 DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: Node->getValueType(ResNo: 0));
1626
1627 // Clear upper part of LO, lower HI
1628 SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Src, N2: HalfWord);
1629 SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: HalfWordMask);
1630
1631 if (IsStrict) {
1632 // Convert hi and lo to floats
1633 // Convert the hi part back to the upper values
1634 // TODO: Can any fast-math-flags be set on these nodes?
1635 SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL,
1636 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other},
1637 Ops: {Node->getOperand(Num: 0), HI});
1638 fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {Node->getValueType(ResNo: 0), MVT::Other},
1639 Ops: {fHI.getValue(R: 1), fHI, TWOHW});
1640 SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL,
1641 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other},
1642 Ops: {Node->getOperand(Num: 0), LO});
1643
1644 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1),
1645 N2: fLO.getValue(R: 1));
1646
1647 // Add the two halves
1648 SDValue Result =
1649 DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {Node->getValueType(ResNo: 0), MVT::Other},
1650 Ops: {TF, fHI, fLO});
1651
1652 Results.push_back(Elt: Result);
1653 Results.push_back(Elt: Result.getValue(R: 1));
1654 return;
1655 }
1656
1657 // Convert hi and lo to floats
1658 // Convert the hi part back to the upper values
1659 // TODO: Can any fast-math-flags be set on these nodes?
1660 SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: HI);
1661 fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: TWOHW);
1662 SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: LO);
1663
1664 // Add the two halves
1665 Results.push_back(
1666 Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: fLO));
1667}
1668
1669SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1670 if (TLI.isOperationLegalOrCustom(Op: ISD::FSUB, VT: Node->getValueType(ResNo: 0))) {
1671 SDLoc DL(Node);
1672 SDValue Zero = DAG.getConstantFP(Val: -0.0, DL, VT: Node->getValueType(ResNo: 0));
1673 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1674 return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Node->getValueType(ResNo: 0), N1: Zero,
1675 N2: Node->getOperand(Num: 0));
1676 }
1677 return DAG.UnrollVectorOp(N: Node);
1678}
1679
1680void VectorLegalizer::ExpandFSUB(SDNode *Node,
1681 SmallVectorImpl<SDValue> &Results) {
1682 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1683 // we can defer this to operation legalization where it will be lowered as
1684 // a+(-b).
1685 EVT VT = Node->getValueType(ResNo: 0);
1686 if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) &&
1687 TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT))
1688 return; // Defer to LegalizeDAG
1689
1690 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
1691 Results.push_back(Elt: Tmp);
1692}
1693
1694void VectorLegalizer::ExpandSETCC(SDNode *Node,
1695 SmallVectorImpl<SDValue> &Results) {
1696 bool NeedInvert = false;
1697 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
1698 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
1699 Node->getOpcode() == ISD::STRICT_FSETCCS;
1700 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
1701 unsigned Offset = IsStrict ? 1 : 0;
1702
1703 SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue();
1704 SDValue LHS = Node->getOperand(Num: 0 + Offset);
1705 SDValue RHS = Node->getOperand(Num: 1 + Offset);
1706 SDValue CC = Node->getOperand(Num: 2 + Offset);
1707
1708 MVT OpVT = LHS.getSimpleValueType();
1709 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
1710
1711 if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) {
1712 if (IsStrict) {
1713 UnrollStrictFPOp(Node, Results);
1714 return;
1715 }
1716 Results.push_back(Elt: UnrollVSETCC(Node));
1717 return;
1718 }
1719
1720 SDValue Mask, EVL;
1721 if (IsVP) {
1722 Mask = Node->getOperand(Num: 3 + Offset);
1723 EVL = Node->getOperand(Num: 4 + Offset);
1724 }
1725
1726 SDLoc dl(Node);
1727 bool Legalized =
1728 TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask,
1729 EVL, NeedInvert, dl, Chain, IsSignaling);
1730
1731 if (Legalized) {
1732 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1733 // condition code, create a new SETCC node.
1734 if (CC.getNode()) {
1735 if (IsStrict) {
1736 LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(),
1737 Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags());
1738 Chain = LHS.getValue(R: 1);
1739 } else if (IsVP) {
1740 LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0),
1741 Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags());
1742 } else {
1743 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC,
1744 Flags: Node->getFlags());
1745 }
1746 }
1747
1748 // If we expanded the SETCC by inverting the condition code, then wrap
1749 // the existing SETCC in a NOT to restore the intended condition.
1750 if (NeedInvert) {
1751 if (!IsVP)
1752 LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0));
1753 else
1754 LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0));
1755 }
1756 } else {
1757 assert(!IsStrict && "Don't know how to expand for strict nodes.");
1758
1759 // Otherwise, SETCC for the given comparison type must be completely
1760 // illegal; expand it into a SELECT_CC.
1761 EVT VT = Node->getValueType(ResNo: 0);
1762 LHS =
1763 DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS,
1764 N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()),
1765 N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()), N5: CC);
1766 LHS->setFlags(Node->getFlags());
1767 }
1768
1769 Results.push_back(Elt: LHS);
1770 if (IsStrict)
1771 Results.push_back(Elt: Chain);
1772}
1773
1774void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
1775 SmallVectorImpl<SDValue> &Results) {
1776 SDValue Result, Overflow;
1777 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
1778 Results.push_back(Elt: Result);
1779 Results.push_back(Elt: Overflow);
1780}
1781
1782void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
1783 SmallVectorImpl<SDValue> &Results) {
1784 SDValue Result, Overflow;
1785 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
1786 Results.push_back(Elt: Result);
1787 Results.push_back(Elt: Overflow);
1788}
1789
1790void VectorLegalizer::ExpandMULO(SDNode *Node,
1791 SmallVectorImpl<SDValue> &Results) {
1792 SDValue Result, Overflow;
1793 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
1794 std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node);
1795
1796 Results.push_back(Elt: Result);
1797 Results.push_back(Elt: Overflow);
1798}
1799
1800void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
1801 SmallVectorImpl<SDValue> &Results) {
1802 SDNode *N = Node;
1803 if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N),
1804 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG))
1805 Results.push_back(Elt: Expanded);
1806}
1807
1808void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
1809 SmallVectorImpl<SDValue> &Results) {
1810 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
1811 ExpandUINT_TO_FLOAT(Node, Results);
1812 return;
1813 }
1814 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
1815 ExpandFP_TO_UINT(Node, Results);
1816 return;
1817 }
1818
1819 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1820 Node->getOpcode() == ISD::STRICT_FSETCCS) {
1821 ExpandSETCC(Node, Results);
1822 return;
1823 }
1824
1825 UnrollStrictFPOp(Node, Results);
1826}
1827
1828void VectorLegalizer::ExpandREM(SDNode *Node,
1829 SmallVectorImpl<SDValue> &Results) {
1830 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
1831 "Expected REM node");
1832
1833 SDValue Result;
1834 if (!TLI.expandREM(Node, Result, DAG))
1835 Result = DAG.UnrollVectorOp(N: Node);
1836 Results.push_back(Elt: Result);
1837}
1838
1839// Try to expand libm nodes into vector math routine calls. Callers provide the
1840// LibFunc equivalent of the passed in Node, which is used to lookup mappings
1841// within TargetLibraryInfo. The only mappings considered are those where the
1842// result and all operands are the same vector type. While predicated nodes are
1843// not supported, we will emit calls to masked routines by passing in an all
1844// true mask.
1845bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
1846 SmallVectorImpl<SDValue> &Results) {
1847 // Chain must be propagated but currently strict fp operations are down
1848 // converted to their none strict counterpart.
1849 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
1850
1851 const char *LCName = TLI.getLibcallName(Call: LC);
1852 if (!LCName)
1853 return false;
1854 LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n");
1855
1856 EVT VT = Node->getValueType(ResNo: 0);
1857 ElementCount VL = VT.getVectorElementCount();
1858
1859 // Lookup a vector function equivalent to the specified libcall. Prefer
1860 // unmasked variants but we will generate a mask if need be.
1861 const TargetLibraryInfo &TLibInfo = DAG.getLibInfo();
1862 const VecDesc *VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, Masked: false);
1863 if (!VD)
1864 VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, /*Masked=*/true);
1865 if (!VD)
1866 return false;
1867
1868 LLVMContext *Ctx = DAG.getContext();
1869 Type *Ty = VT.getTypeForEVT(Context&: *Ctx);
1870 Type *ScalarTy = Ty->getScalarType();
1871
1872 // Construct a scalar function type based on Node's operands.
1873 SmallVector<Type *, 8> ArgTys;
1874 for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
1875 assert(Node->getOperand(i).getValueType() == VT &&
1876 "Expected matching vector types!");
1877 ArgTys.push_back(Elt: ScalarTy);
1878 }
1879 FunctionType *ScalarFTy = FunctionType::get(Result: ScalarTy, Params: ArgTys, isVarArg: false);
1880
1881 // Generate call information for the vector function.
1882 const std::string MangledName = VD->getVectorFunctionABIVariantString();
1883 auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, FTy: ScalarFTy);
1884 if (!OptVFInfo)
1885 return false;
1886
1887 LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName()
1888 << "\n");
1889
1890 // Sanity check just in case OptVFInfo has unexpected parameters.
1891 if (OptVFInfo->Shape.Parameters.size() !=
1892 Node->getNumOperands() + VD->isMasked())
1893 return false;
1894
1895 // Collect vector call operands.
1896
1897 SDLoc DL(Node);
1898 TargetLowering::ArgListTy Args;
1899 TargetLowering::ArgListEntry Entry;
1900 Entry.IsSExt = false;
1901 Entry.IsZExt = false;
1902
1903 unsigned OpNum = 0;
1904 for (auto &VFParam : OptVFInfo->Shape.Parameters) {
1905 if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
1906 EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *Ctx, VT);
1907 Entry.Node = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
1908 Entry.Ty = MaskVT.getTypeForEVT(Context&: *Ctx);
1909 Args.push_back(x: Entry);
1910 continue;
1911 }
1912
1913 // Only vector operands are supported.
1914 if (VFParam.ParamKind != VFParamKind::Vector)
1915 return false;
1916
1917 Entry.Node = Node->getOperand(Num: OpNum++);
1918 Entry.Ty = Ty;
1919 Args.push_back(x: Entry);
1920 }
1921
1922 // Emit a call to the vector function.
1923 SDValue Callee = DAG.getExternalSymbol(Sym: VD->getVectorFnName().data(),
1924 VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
1925 TargetLowering::CallLoweringInfo CLI(DAG);
1926 CLI.setDebugLoc(DL)
1927 .setChain(DAG.getEntryNode())
1928 .setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee, ArgsList: std::move(Args));
1929
1930 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
1931 Results.push_back(Elt: CallResult.first);
1932 return true;
1933}
1934
1935/// Try to expand the node to a vector libcall based on the result type.
1936bool VectorLegalizer::tryExpandVecMathCall(
1937 SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
1938 RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
1939 RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) {
1940 RTLIB::Libcall LC = RTLIB::getFPLibCall(
1941 VT: Node->getValueType(ResNo: 0).getVectorElementType(), Call_F32, Call_F64,
1942 Call_F80, Call_F128, Call_PPCF128);
1943
1944 if (LC == RTLIB::UNKNOWN_LIBCALL)
1945 return false;
1946
1947 return tryExpandVecMathCall(Node, LC, Results);
1948}
1949
1950void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
1951 SmallVectorImpl<SDValue> &Results) {
1952 EVT VT = Node->getValueType(ResNo: 0);
1953 EVT EltVT = VT.getVectorElementType();
1954 unsigned NumElems = VT.getVectorNumElements();
1955 unsigned NumOpers = Node->getNumOperands();
1956 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1957
1958 EVT TmpEltVT = EltVT;
1959 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1960 Node->getOpcode() == ISD::STRICT_FSETCCS)
1961 TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(),
1962 Context&: *DAG.getContext(), VT: TmpEltVT);
1963
1964 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
1965 SDValue Chain = Node->getOperand(Num: 0);
1966 SDLoc dl(Node);
1967
1968 SmallVector<SDValue, 32> OpValues;
1969 SmallVector<SDValue, 32> OpChains;
1970 for (unsigned i = 0; i < NumElems; ++i) {
1971 SmallVector<SDValue, 4> Opers;
1972 SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl);
1973
1974 // The Chain is the first operand.
1975 Opers.push_back(Elt: Chain);
1976
1977 // Now process the remaining operands.
1978 for (unsigned j = 1; j < NumOpers; ++j) {
1979 SDValue Oper = Node->getOperand(Num: j);
1980 EVT OperVT = Oper.getValueType();
1981
1982 if (OperVT.isVector())
1983 Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl,
1984 VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx);
1985
1986 Opers.push_back(Elt: Oper);
1987 }
1988
1989 SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers);
1990 SDValue ScalarResult = ScalarOp.getValue(R: 0);
1991 SDValue ScalarChain = ScalarOp.getValue(R: 1);
1992
1993 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1994 Node->getOpcode() == ISD::STRICT_FSETCCS)
1995 ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult,
1996 LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT),
1997 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
1998
1999 OpValues.push_back(Elt: ScalarResult);
2000 OpChains.push_back(Elt: ScalarChain);
2001 }
2002
2003 SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues);
2004 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains);
2005
2006 Results.push_back(Elt: Result);
2007 Results.push_back(Elt: NewChain);
2008}
2009
2010SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2011 EVT VT = Node->getValueType(ResNo: 0);
2012 unsigned NumElems = VT.getVectorNumElements();
2013 EVT EltVT = VT.getVectorElementType();
2014 SDValue LHS = Node->getOperand(Num: 0);
2015 SDValue RHS = Node->getOperand(Num: 1);
2016 SDValue CC = Node->getOperand(Num: 2);
2017 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2018 SDLoc dl(Node);
2019 SmallVector<SDValue, 8> Ops(NumElems);
2020 for (unsigned i = 0; i < NumElems; ++i) {
2021 SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS,
2022 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2023 SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS,
2024 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2025 Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl,
2026 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2027 Context&: *DAG.getContext(), VT: TmpEltVT),
2028 N1: LHSElem, N2: RHSElem, N3: CC);
2029 Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i], LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT),
2030 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2031 }
2032 return DAG.getBuildVector(VT, DL: dl, Ops);
2033}
2034
2035bool SelectionDAG::LegalizeVectors() {
2036 return VectorLegalizer(*this).Run();
2037}
2038