1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/ValueTypes.h"
38#include "llvm/CodeGenTypes/MachineValueType.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
81 MutableArrayRef<SDValue> Results);
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N);
142 SDValue ExpandSELECT(SDNode *Node);
143 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
144 SDValue ExpandStore(SDNode *N);
145 SDValue ExpandFNEG(SDNode *Node);
146 SDValue ExpandFABS(SDNode *Node);
147 SDValue ExpandFCOPYSIGN(SDNode *Node);
148 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 SDValue ExpandBITREVERSE(SDNode *Node);
151 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
157
158 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
159 SmallVectorImpl<SDValue> &Results);
160
161 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162
163 /// Implements vector promotion.
164 ///
165 /// This is essentially just bitcasting the operands to a different type and
166 /// bitcasting the result back to the original type.
167 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
168
169 /// Implements [SU]INT_TO_FP vector promotion.
170 ///
171 /// This is a [zs]ext of the input operand to a larger integer type.
172 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
173
174 /// Implements FP_TO_[SU]INT vector promotion of the result type.
175 ///
176 /// It is promoted to a larger integer type. The result is then
177 /// truncated back to the original type.
178 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
179
180 /// Implements vector setcc operation promotion.
181 ///
182 /// All vector operands are promoted to a vector type with larger element
183 /// type.
184 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
185
186 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
187
188 /// Calculate the reduction using a type of higher precision and round the
189 /// result to match the original type. Setting NonArithmetic signifies the
190 /// rounding of the result does not affect its value.
191 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
192 bool NonArithmetic);
193
194 void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
195
196public:
197 VectorLegalizer(SelectionDAG& dag) :
198 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
199
200 /// Begin legalizer the vector operations in the DAG.
201 bool Run();
202};
203
204} // end anonymous namespace
205
206bool VectorLegalizer::Run() {
207 // Before we start legalizing vector nodes, check if there are any vectors.
208 bool HasVectors = false;
209 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
210 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) {
211 // Check if the values of the nodes contain vectors. We don't need to check
212 // the operands because we are going to check their values at some point.
213 HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); });
214
215 // If we found a vector node we can start the legalization.
216 if (HasVectors)
217 break;
218 }
219
220 // If this basic block has no vectors then no need to legalize vectors.
221 if (!HasVectors)
222 return false;
223
224 // The legalize process is inherently a bottom-up recursive process (users
225 // legalize their uses before themselves). Given infinite stack space, we
226 // could just start legalizing on the root and traverse the whole graph. In
227 // practice however, this causes us to run out of stack space on large basic
228 // blocks. To avoid this problem, compute an ordering of the nodes where each
229 // node is only legalized after all of its operands are legalized.
230 DAG.AssignTopologicalOrder();
231 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
232 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I)
233 LegalizeOp(Op: SDValue(&*I, 0));
234
235 // Finally, it's possible the root changed. Get the new root.
236 SDValue OldRoot = DAG.getRoot();
237 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
238 DAG.setRoot(LegalizedNodes[OldRoot]);
239
240 LegalizedNodes.clear();
241
242 // Remove dead nodes now.
243 DAG.RemoveDeadNodes();
244
245 return Changed;
246}
247
248SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
249 assert(Op->getNumValues() == Result->getNumValues() &&
250 "Unexpected number of results");
251 // Generic legalization: just pass the operand through.
252 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
253 AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i));
254 return SDValue(Result, Op.getResNo());
255}
256
257SDValue
258VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
259 MutableArrayRef<SDValue> Results) {
260 assert(Results.size() == Op->getNumValues() &&
261 "Unexpected number of results");
262 // Make sure that the generated code is itself legal.
263 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
264 Results[i] = LegalizeOp(Op: Results[i]);
265 AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]);
266 }
267
268 return Results[Op.getResNo()];
269}
270
271SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
272 // Note that LegalizeOp may be reentered even from single-use nodes, which
273 // means that we always must cache transformed nodes.
274 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op);
275 if (I != LegalizedNodes.end()) return I->second;
276
277 // Legalize the operands
278 SmallVector<SDValue, 8> Ops;
279 for (const SDValue &Oper : Op->op_values())
280 Ops.push_back(Elt: LegalizeOp(Op: Oper));
281
282 SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops);
283
284 bool HasVectorValueOrOp =
285 llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) ||
286 llvm::any_of(Range: Node->op_values(),
287 P: [](SDValue O) { return O.getValueType().isVector(); });
288 if (!HasVectorValueOrOp)
289 return TranslateLegalizeResults(Op, Result: Node);
290
291 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
292 EVT ValVT;
293 switch (Op.getOpcode()) {
294 default:
295 return TranslateLegalizeResults(Op, Result: Node);
296 case ISD::LOAD: {
297 LoadSDNode *LD = cast<LoadSDNode>(Val: Node);
298 ISD::LoadExtType ExtType = LD->getExtensionType();
299 EVT LoadedVT = LD->getMemoryVT();
300 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
301 Action = TLI.getLoadAction(ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT, Alignment: LD->getAlign(),
302 AddrSpace: LD->getAddressSpace(), ExtType, Atomic: false);
303 break;
304 }
305 case ISD::STORE: {
306 StoreSDNode *ST = cast<StoreSDNode>(Val: Node);
307 EVT StVT = ST->getMemoryVT();
308 MVT ValVT = ST->getValue().getSimpleValueType();
309 if (StVT.isVector() && ST->isTruncatingStore())
310 Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT, Alignment: ST->getAlign(),
311 AddrSpace: ST->getAddressSpace());
312 break;
313 }
314 case ISD::MERGE_VALUES:
315 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
316 // This operation lies about being legal: when it claims to be legal,
317 // it should actually be expanded.
318 if (Action == TargetLowering::Legal)
319 Action = TargetLowering::Expand;
320 break;
321#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
322 case ISD::STRICT_##DAGN:
323#include "llvm/IR/ConstrainedOps.def"
324 ValVT = Node->getValueType(ResNo: 0);
325 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
326 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
327 ValVT = Node->getOperand(Num: 1).getValueType();
328 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
329 Op.getOpcode() == ISD::STRICT_FSETCCS) {
330 MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType();
331 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get();
332 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
333 if (Action == TargetLowering::Legal)
334 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
335 } else {
336 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT);
337 }
338 // If we're asked to expand a strict vector floating-point operation,
339 // by default we're going to simply unroll it. That is usually the
340 // best approach, except in the case where the resulting strict (scalar)
341 // operations would themselves use the fallback mutation to non-strict.
342 // In that specific case, just do the fallback on the vector op.
343 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
344 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) ==
345 TargetLowering::Legal) {
346 EVT EltVT = ValVT.getVectorElementType();
347 if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT)
348 == TargetLowering::Expand &&
349 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT)
350 == TargetLowering::Legal)
351 Action = TargetLowering::Legal;
352 }
353 break;
354 case ISD::ADD:
355 case ISD::SUB:
356 case ISD::MUL:
357 case ISD::MULHS:
358 case ISD::MULHU:
359 case ISD::SDIV:
360 case ISD::UDIV:
361 case ISD::SREM:
362 case ISD::UREM:
363 case ISD::SDIVREM:
364 case ISD::UDIVREM:
365 case ISD::FADD:
366 case ISD::FSUB:
367 case ISD::FMUL:
368 case ISD::FDIV:
369 case ISD::FREM:
370 case ISD::AND:
371 case ISD::OR:
372 case ISD::XOR:
373 case ISD::SHL:
374 case ISD::SRA:
375 case ISD::SRL:
376 case ISD::FSHL:
377 case ISD::FSHR:
378 case ISD::ROTL:
379 case ISD::ROTR:
380 case ISD::ABS:
381 case ISD::ABDS:
382 case ISD::ABDU:
383 case ISD::AVGCEILS:
384 case ISD::AVGCEILU:
385 case ISD::AVGFLOORS:
386 case ISD::AVGFLOORU:
387 case ISD::BSWAP:
388 case ISD::BITREVERSE:
389 case ISD::CTLZ:
390 case ISD::CTTZ:
391 case ISD::CTLZ_ZERO_UNDEF:
392 case ISD::CTTZ_ZERO_UNDEF:
393 case ISD::CTPOP:
394 case ISD::CLMUL:
395 case ISD::CLMULH:
396 case ISD::CLMULR:
397 case ISD::SELECT:
398 case ISD::VSELECT:
399 case ISD::SELECT_CC:
400 case ISD::ZERO_EXTEND:
401 case ISD::ANY_EXTEND:
402 case ISD::TRUNCATE:
403 case ISD::SIGN_EXTEND:
404 case ISD::FP_TO_SINT:
405 case ISD::FP_TO_UINT:
406 case ISD::FNEG:
407 case ISD::FABS:
408 case ISD::FMINNUM:
409 case ISD::FMAXNUM:
410 case ISD::FMINNUM_IEEE:
411 case ISD::FMAXNUM_IEEE:
412 case ISD::FMINIMUM:
413 case ISD::FMAXIMUM:
414 case ISD::FMINIMUMNUM:
415 case ISD::FMAXIMUMNUM:
416 case ISD::FCOPYSIGN:
417 case ISD::FSQRT:
418 case ISD::FSIN:
419 case ISD::FCOS:
420 case ISD::FTAN:
421 case ISD::FASIN:
422 case ISD::FACOS:
423 case ISD::FATAN:
424 case ISD::FATAN2:
425 case ISD::FSINH:
426 case ISD::FCOSH:
427 case ISD::FTANH:
428 case ISD::FLDEXP:
429 case ISD::FPOWI:
430 case ISD::FPOW:
431 case ISD::FCBRT:
432 case ISD::FLOG:
433 case ISD::FLOG2:
434 case ISD::FLOG10:
435 case ISD::FEXP:
436 case ISD::FEXP2:
437 case ISD::FEXP10:
438 case ISD::FCEIL:
439 case ISD::FTRUNC:
440 case ISD::FRINT:
441 case ISD::FNEARBYINT:
442 case ISD::FROUND:
443 case ISD::FROUNDEVEN:
444 case ISD::FFLOOR:
445 case ISD::FP_ROUND:
446 case ISD::FP_EXTEND:
447 case ISD::FPTRUNC_ROUND:
448 case ISD::FMA:
449 case ISD::SIGN_EXTEND_INREG:
450 case ISD::ANY_EXTEND_VECTOR_INREG:
451 case ISD::SIGN_EXTEND_VECTOR_INREG:
452 case ISD::ZERO_EXTEND_VECTOR_INREG:
453 case ISD::SMIN:
454 case ISD::SMAX:
455 case ISD::UMIN:
456 case ISD::UMAX:
457 case ISD::SMUL_LOHI:
458 case ISD::UMUL_LOHI:
459 case ISD::SADDO:
460 case ISD::UADDO:
461 case ISD::SSUBO:
462 case ISD::USUBO:
463 case ISD::SMULO:
464 case ISD::UMULO:
465 case ISD::CONVERT_FROM_ARBITRARY_FP:
466 case ISD::FCANONICALIZE:
467 case ISD::FFREXP:
468 case ISD::FMODF:
469 case ISD::FSINCOS:
470 case ISD::FSINCOSPI:
471 case ISD::SADDSAT:
472 case ISD::UADDSAT:
473 case ISD::SSUBSAT:
474 case ISD::USUBSAT:
475 case ISD::SSHLSAT:
476 case ISD::USHLSAT:
477 case ISD::FP_TO_SINT_SAT:
478 case ISD::FP_TO_UINT_SAT:
479 case ISD::MGATHER:
480 case ISD::VECTOR_COMPRESS:
481 case ISD::SCMP:
482 case ISD::UCMP:
483 case ISD::LOOP_DEPENDENCE_WAR_MASK:
484 case ISD::LOOP_DEPENDENCE_RAW_MASK:
485 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
486 break;
487 case ISD::SMULFIX:
488 case ISD::SMULFIXSAT:
489 case ISD::UMULFIX:
490 case ISD::UMULFIXSAT:
491 case ISD::SDIVFIX:
492 case ISD::SDIVFIXSAT:
493 case ISD::UDIVFIX:
494 case ISD::UDIVFIXSAT: {
495 unsigned Scale = Node->getConstantOperandVal(Num: 2);
496 Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(),
497 VT: Node->getValueType(ResNo: 0), Scale);
498 break;
499 }
500 case ISD::LROUND:
501 case ISD::LLROUND:
502 case ISD::LRINT:
503 case ISD::LLRINT:
504 case ISD::SINT_TO_FP:
505 case ISD::UINT_TO_FP:
506 case ISD::VECREDUCE_ADD:
507 case ISD::VECREDUCE_MUL:
508 case ISD::VECREDUCE_AND:
509 case ISD::VECREDUCE_OR:
510 case ISD::VECREDUCE_XOR:
511 case ISD::VECREDUCE_SMAX:
512 case ISD::VECREDUCE_SMIN:
513 case ISD::VECREDUCE_UMAX:
514 case ISD::VECREDUCE_UMIN:
515 case ISD::VECREDUCE_FADD:
516 case ISD::VECREDUCE_FMAX:
517 case ISD::VECREDUCE_FMAXIMUM:
518 case ISD::VECREDUCE_FMIN:
519 case ISD::VECREDUCE_FMINIMUM:
520 case ISD::VECREDUCE_FMUL:
521 case ISD::CTTZ_ELTS:
522 case ISD::CTTZ_ELTS_ZERO_POISON:
523 case ISD::VECTOR_FIND_LAST_ACTIVE:
524 Action = TLI.getOperationAction(Op: Node->getOpcode(),
525 VT: Node->getOperand(Num: 0).getValueType());
526 break;
527 case ISD::VECREDUCE_SEQ_FADD:
528 case ISD::VECREDUCE_SEQ_FMUL:
529 Action = TLI.getOperationAction(Op: Node->getOpcode(),
530 VT: Node->getOperand(Num: 1).getValueType());
531 break;
532 case ISD::SETCC: {
533 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
534 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get();
535 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
536 if (Action == TargetLowering::Legal)
537 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
538 break;
539 }
540 case ISD::PARTIAL_REDUCE_UMLA:
541 case ISD::PARTIAL_REDUCE_SMLA:
542 case ISD::PARTIAL_REDUCE_SUMLA:
543 case ISD::PARTIAL_REDUCE_FMLA:
544 Action =
545 TLI.getPartialReduceMLAAction(Opc: Op.getOpcode(), AccVT: Node->getValueType(ResNo: 0),
546 InputVT: Node->getOperand(Num: 1).getValueType());
547 break;
548
549#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
550 case ISD::VPID: { \
551 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
552 : Node->getOperand(LEGALPOS).getValueType(); \
553 if (ISD::VPID == ISD::VP_SETCC) { \
554 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
555 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
556 if (Action != TargetLowering::Legal) \
557 break; \
558 } \
559 /* Defer non-vector results to LegalizeDAG. */ \
560 if (!Node->getValueType(0).isVector() && \
561 Node->getValueType(0) != MVT::Other) { \
562 Action = TargetLowering::Legal; \
563 break; \
564 } \
565 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
566 } break;
567#include "llvm/IR/VPIntrinsics.def"
568 }
569
570 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
571
572 SmallVector<SDValue, 8> ResultVals;
573 switch (Action) {
574 default: llvm_unreachable("This action is not supported yet!");
575 case TargetLowering::Promote:
576 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
577 "This action is not supported yet!");
578 LLVM_DEBUG(dbgs() << "Promoting\n");
579 Promote(Node, Results&: ResultVals);
580 assert(!ResultVals.empty() && "No results for promotion?");
581 break;
582 case TargetLowering::Legal:
583 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
584 break;
585 case TargetLowering::Custom:
586 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
587 if (LowerOperationWrapper(N: Node, Results&: ResultVals))
588 break;
589 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
590 [[fallthrough]];
591 case TargetLowering::Expand:
592 LLVM_DEBUG(dbgs() << "Expanding\n");
593 Expand(Node, Results&: ResultVals);
594 break;
595 }
596
597 if (ResultVals.empty())
598 return TranslateLegalizeResults(Op, Result: Node);
599
600 Changed = true;
601 return RecursivelyLegalizeResults(Op, Results: ResultVals);
602}
603
604// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
605// merge them somehow?
606bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
607 SmallVectorImpl<SDValue> &Results) {
608 SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG);
609
610 if (!Res.getNode())
611 return false;
612
613 if (Res == SDValue(Node, 0))
614 return true;
615
616 // If the original node has one result, take the return value from
617 // LowerOperation as is. It might not be result number 0.
618 if (Node->getNumValues() == 1) {
619 Results.push_back(Elt: Res);
620 return true;
621 }
622
623 // If the original node has multiple results, then the return node should
624 // have the same number of results.
625 assert((Node->getNumValues() == Res->getNumValues()) &&
626 "Lowering returned the wrong number of results!");
627
628 // Places new result values base on N result number.
629 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
630 Results.push_back(Elt: Res.getValue(R: I));
631
632 return true;
633}
634
635void VectorLegalizer::PromoteSETCC(SDNode *Node,
636 SmallVectorImpl<SDValue> &Results) {
637 MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType();
638 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
639
640 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
641
642 SDLoc DL(Node);
643 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
644
645 Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0));
646 Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1));
647 Operands[2] = Node->getOperand(Num: 2);
648
649 if (Node->getOpcode() == ISD::VP_SETCC) {
650 Operands[3] = Node->getOperand(Num: 3); // mask
651 Operands[4] = Node->getOperand(Num: 4); // evl
652 }
653
654 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0),
655 Ops: Operands, Flags: Node->getFlags());
656
657 Results.push_back(Elt: Res);
658}
659
660void VectorLegalizer::PromoteSTRICT(SDNode *Node,
661 SmallVectorImpl<SDValue> &Results) {
662 MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType();
663 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
664
665 assert(VecVT.isFloatingPoint());
666
667 SDLoc DL(Node);
668 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
669 SmallVector<SDValue, 2> Chains;
670
671 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
672 if (Node->getOperand(Num: j).getValueType().isVector() &&
673 !(ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
674 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand.
675 {
676 // promote the vector operand.
677 SDValue Ext =
678 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other},
679 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)});
680 Operands[j] = Ext.getValue(R: 0);
681 Chains.push_back(Elt: Ext.getValue(R: 1));
682 } else
683 Operands[j] = Node->getOperand(Num: j); // Skip no vector operand.
684
685 SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1));
686
687 Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
688
689 SDValue Res =
690 DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags());
691
692 SDValue Round =
693 DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other},
694 Ops: {Res.getValue(R: 1), Res.getValue(R: 0),
695 DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)});
696
697 Results.push_back(Elt: Round.getValue(R: 0));
698 Results.push_back(Elt: Round.getValue(R: 1));
699}
700
701void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
702 SmallVectorImpl<SDValue> &Results,
703 bool NonArithmetic) {
704 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
705 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
706 MVT NewOpVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: OpVT);
707
708 SDLoc DL(Node);
709 SDValue NewOp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewOpVT, Operand: Node->getOperand(Num: 0));
710 SDValue Rdx =
711 DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewOpVT.getVectorElementType(), Operand: NewOp,
712 Flags: Node->getFlags());
713 SDValue Res =
714 DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Node->getValueType(ResNo: 0), N1: Rdx,
715 N2: DAG.getIntPtrConstant(Val: NonArithmetic, DL, /*isTarget=*/true));
716 Results.push_back(Elt: Res);
717}
718
719void VectorLegalizer::PromoteVECTOR_COMPRESS(
720 SDNode *Node, SmallVectorImpl<SDValue> &Results) {
721 SDLoc DL(Node);
722 EVT VT = Node->getValueType(ResNo: 0);
723 MVT PromotedVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VT.getSimpleVT());
724 assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
725 "Only integer promotion or bitcasts between types is supported");
726
727 SDValue Vec = Node->getOperand(Num: 0);
728 SDValue Mask = Node->getOperand(Num: 1);
729 SDValue Passthru = Node->getOperand(Num: 2);
730 if (VT.isInteger()) {
731 Vec = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PromotedVT, Operand: Vec);
732 Mask = TLI.promoteTargetBoolean(DAG, Bool: Mask, ValVT: PromotedVT);
733 Passthru = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PromotedVT, Operand: Passthru);
734 } else {
735 Vec = DAG.getBitcast(VT: PromotedVT, V: Vec);
736 Passthru = DAG.getBitcast(VT: PromotedVT, V: Passthru);
737 }
738
739 SDValue Result =
740 DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL, VT: PromotedVT, N1: Vec, N2: Mask, N3: Passthru);
741 Result = VT.isInteger() ? DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Result)
742 : DAG.getBitcast(VT, V: Result);
743 Results.push_back(Elt: Result);
744}
745
746void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
747 // For a few operations there is a specific concept for promotion based on
748 // the operand's type.
749 switch (Node->getOpcode()) {
750 case ISD::SINT_TO_FP:
751 case ISD::UINT_TO_FP:
752 case ISD::STRICT_SINT_TO_FP:
753 case ISD::STRICT_UINT_TO_FP:
754 // "Promote" the operation by extending the operand.
755 PromoteINT_TO_FP(Node, Results);
756 return;
757 case ISD::FP_TO_UINT:
758 case ISD::FP_TO_SINT:
759 case ISD::STRICT_FP_TO_UINT:
760 case ISD::STRICT_FP_TO_SINT:
761 // Promote the operation by extending the operand.
762 PromoteFP_TO_INT(Node, Results);
763 return;
764 case ISD::VP_SETCC:
765 case ISD::SETCC:
766 // Promote the operation by extending the operand.
767 PromoteSETCC(Node, Results);
768 return;
769 case ISD::STRICT_FADD:
770 case ISD::STRICT_FSUB:
771 case ISD::STRICT_FMUL:
772 case ISD::STRICT_FDIV:
773 case ISD::STRICT_FSQRT:
774 case ISD::STRICT_FMA:
775 PromoteSTRICT(Node, Results);
776 return;
777 case ISD::VECREDUCE_FADD:
778 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
779 return;
780 case ISD::VECREDUCE_FMAX:
781 case ISD::VECREDUCE_FMAXIMUM:
782 case ISD::VECREDUCE_FMIN:
783 case ISD::VECREDUCE_FMINIMUM:
784 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
785 return;
786 case ISD::VECTOR_COMPRESS:
787 PromoteVECTOR_COMPRESS(Node, Results);
788 return;
789
790 case ISD::FP_ROUND:
791 case ISD::FP_EXTEND:
792 // These operations are used to do promotion so they can't be promoted
793 // themselves.
794 llvm_unreachable("Don't know how to promote this operation!");
795 case ISD::VP_FABS:
796 case ISD::VP_FCOPYSIGN:
797 case ISD::VP_FNEG:
798 // Promoting fabs, fneg, and fcopysign changes their semantics.
799 llvm_unreachable("These operations should not be promoted");
800 }
801
802 // There are currently two cases of vector promotion:
803 // 1) Bitcasting a vector of integers to a different type to a vector of the
804 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
805 // 2) Extending a vector of floats to a vector of the same number of larger
806 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
807 assert(Node->getNumValues() == 1 &&
808 "Can't promote a vector with multiple results!");
809 MVT VT = Node->getSimpleValueType(ResNo: 0);
810 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
811 SDLoc dl(Node);
812 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
813
814 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
815 // Do not promote the mask operand of a VP OP.
816 bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
817 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j;
818 if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote)
819 if (Node->getOperand(Num: j)
820 .getValueType()
821 .getVectorElementType()
822 .isFloatingPoint() &&
823 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
824 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
825 unsigned EVLIdx =
826 *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
827 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
828 Operands[j] =
829 DAG.getNode(Opcode: ISD::VP_FP_EXTEND, DL: dl, VT: NVT, N1: Node->getOperand(Num: j),
830 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
831 } else {
832 Operands[j] =
833 DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
834 }
835 else
836 Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
837 else
838 Operands[j] = Node->getOperand(Num: j);
839 }
840
841 SDValue Res =
842 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags());
843
844 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
845 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
846 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
847 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
848 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
849 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
850 Res = DAG.getNode(Opcode: ISD::VP_FP_ROUND, DL: dl, VT, N1: Res,
851 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
852 } else {
853 Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res,
854 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
855 }
856 else
857 Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res);
858
859 Results.push_back(Elt: Res);
860}
861
862void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
863 SmallVectorImpl<SDValue> &Results) {
864 // INT_TO_FP operations may require the input operand be promoted even
865 // when the type is otherwise legal.
866 bool IsStrict = Node->isStrictFPOpcode();
867 MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType();
868 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
869 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
870 "Vectors have different number of elements!");
871
872 SDLoc dl(Node);
873 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
874
875 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
876 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
877 ? ISD::ZERO_EXTEND
878 : ISD::SIGN_EXTEND;
879 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
880 if (Node->getOperand(Num: j).getValueType().isVector())
881 Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
882 else
883 Operands[j] = Node->getOperand(Num: j);
884 }
885
886 if (IsStrict) {
887 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl,
888 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands);
889 Results.push_back(Elt: Res);
890 Results.push_back(Elt: Res.getValue(R: 1));
891 return;
892 }
893
894 SDValue Res =
895 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands);
896 Results.push_back(Elt: Res);
897}
898
899// For FP_TO_INT we promote the result type to a vector type with wider
900// elements and then truncate the result. This is different from the default
901// PromoteVector which uses bitcast to promote thus assumning that the
902// promoted vector type has the same overall size.
903void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
904 SmallVectorImpl<SDValue> &Results) {
905 MVT VT = Node->getSimpleValueType(ResNo: 0);
906 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
907 bool IsStrict = Node->isStrictFPOpcode();
908 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
909 "Vectors have different number of elements!");
910
911 unsigned NewOpc = Node->getOpcode();
912 // Change FP_TO_UINT to FP_TO_SINT if possible.
913 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
914 if (NewOpc == ISD::FP_TO_UINT &&
915 TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT))
916 NewOpc = ISD::FP_TO_SINT;
917
918 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
919 TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT))
920 NewOpc = ISD::STRICT_FP_TO_SINT;
921
922 SDLoc dl(Node);
923 SDValue Promoted, Chain;
924 if (IsStrict) {
925 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other},
926 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)});
927 Chain = Promoted.getValue(R: 1);
928 } else
929 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0));
930
931 // Assert that the converted value fits in the original type. If it doesn't
932 // (eg: because the value being converted is too big), then the result of the
933 // original operation was undefined anyway, so the assert is still correct.
934 if (Node->getOpcode() == ISD::FP_TO_UINT ||
935 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
936 NewOpc = ISD::AssertZext;
937 else
938 NewOpc = ISD::AssertSext;
939
940 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted,
941 N2: DAG.getValueType(VT.getScalarType()));
942 Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted);
943 Results.push_back(Elt: Promoted);
944 if (IsStrict)
945 Results.push_back(Elt: Chain);
946}
947
948std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
949 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
950 return TLI.scalarizeVectorLoad(LD, DAG);
951}
952
953SDValue VectorLegalizer::ExpandStore(SDNode *N) {
954 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
955 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
956 return TF;
957}
958
959void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
960 switch (Node->getOpcode()) {
961 case ISD::LOAD: {
962 std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node);
963 Results.push_back(Elt: Tmp.first);
964 Results.push_back(Elt: Tmp.second);
965 return;
966 }
967 case ISD::STORE:
968 Results.push_back(Elt: ExpandStore(N: Node));
969 return;
970 case ISD::MERGE_VALUES:
971 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
972 Results.push_back(Elt: Node->getOperand(Num: i));
973 return;
974 case ISD::SIGN_EXTEND_INREG:
975 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
976 Results.push_back(Elt: Expanded);
977 return;
978 }
979 break;
980 case ISD::ANY_EXTEND_VECTOR_INREG:
981 Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node));
982 return;
983 case ISD::SIGN_EXTEND_VECTOR_INREG:
984 Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node));
985 return;
986 case ISD::ZERO_EXTEND_VECTOR_INREG:
987 Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node));
988 return;
989 case ISD::BSWAP:
990 if (SDValue Expanded = ExpandBSWAP(Node)) {
991 Results.push_back(Elt: Expanded);
992 return;
993 }
994 break;
995 case ISD::VP_BSWAP:
996 Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG));
997 return;
998 case ISD::VSELECT:
999 if (SDValue Expanded = ExpandVSELECT(Node)) {
1000 Results.push_back(Elt: Expanded);
1001 return;
1002 }
1003 break;
1004 case ISD::VP_SELECT:
1005 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
1006 Results.push_back(Elt: Expanded);
1007 return;
1008 }
1009 break;
1010 case ISD::VP_SREM:
1011 case ISD::VP_UREM:
1012 if (SDValue Expanded = ExpandVP_REM(Node)) {
1013 Results.push_back(Elt: Expanded);
1014 return;
1015 }
1016 break;
1017 case ISD::VP_FNEG:
1018 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
1019 Results.push_back(Elt: Expanded);
1020 return;
1021 }
1022 break;
1023 case ISD::VP_FABS:
1024 if (SDValue Expanded = ExpandVP_FABS(Node)) {
1025 Results.push_back(Elt: Expanded);
1026 return;
1027 }
1028 break;
1029 case ISD::VP_FCOPYSIGN:
1030 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
1031 Results.push_back(Elt: Expanded);
1032 return;
1033 }
1034 break;
1035 case ISD::SELECT:
1036 if (SDValue Expanded = ExpandSELECT(Node)) {
1037 Results.push_back(Elt: Expanded);
1038 return;
1039 }
1040 break;
1041 case ISD::SELECT_CC: {
1042 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1043 EVT CondVT = TLI.getSetCCResultType(
1044 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
1045 SDValue SetCC =
1046 DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0),
1047 N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4));
1048 Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC,
1049 LHS: Node->getOperand(Num: 2),
1050 RHS: Node->getOperand(Num: 3)));
1051 return;
1052 }
1053 break;
1054 }
1055 case ISD::FP_TO_UINT:
1056 ExpandFP_TO_UINT(Node, Results);
1057 return;
1058 case ISD::UINT_TO_FP:
1059 ExpandUINT_TO_FLOAT(Node, Results);
1060 return;
1061 case ISD::FNEG:
1062 if (SDValue Expanded = ExpandFNEG(Node)) {
1063 Results.push_back(Elt: Expanded);
1064 return;
1065 }
1066 break;
1067 case ISD::FABS:
1068 if (SDValue Expanded = ExpandFABS(Node)) {
1069 Results.push_back(Elt: Expanded);
1070 return;
1071 }
1072 break;
1073 case ISD::FCOPYSIGN:
1074 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1075 Results.push_back(Elt: Expanded);
1076 return;
1077 }
1078 break;
1079 case ISD::FCANONICALIZE: {
1080 // If the scalar element type has a
1081 // Legal/Custom FCANONICALIZE, don't
1082 // mess with the vector, fall back.
1083 EVT VT = Node->getValueType(ResNo: 0);
1084 EVT EltVT = VT.getVectorElementType();
1085 if (TLI.getOperationAction(Op: ISD::FCANONICALIZE, VT: EltVT.getSimpleVT()) !=
1086 TargetLowering::Expand)
1087 break;
1088 // Otherwise canonicalize the whole vector.
1089 SDValue Mul = TLI.expandFCANONICALIZE(Node, DAG);
1090 Results.push_back(Elt: Mul);
1091 return;
1092 }
1093 case ISD::FSUB:
1094 ExpandFSUB(Node, Results);
1095 return;
1096 case ISD::SETCC:
1097 case ISD::VP_SETCC:
1098 ExpandSETCC(Node, Results);
1099 return;
1100 case ISD::ABS:
1101 if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) {
1102 Results.push_back(Elt: Expanded);
1103 return;
1104 }
1105 break;
1106 case ISD::ABDS:
1107 case ISD::ABDU:
1108 if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) {
1109 Results.push_back(Elt: Expanded);
1110 return;
1111 }
1112 break;
1113 case ISD::AVGCEILS:
1114 case ISD::AVGCEILU:
1115 case ISD::AVGFLOORS:
1116 case ISD::AVGFLOORU:
1117 if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) {
1118 Results.push_back(Elt: Expanded);
1119 return;
1120 }
1121 break;
1122 case ISD::BITREVERSE:
1123 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1124 Results.push_back(Elt: Expanded);
1125 return;
1126 }
1127 break;
1128 case ISD::VP_BITREVERSE:
1129 if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) {
1130 Results.push_back(Elt: Expanded);
1131 return;
1132 }
1133 break;
1134 case ISD::CTPOP:
1135 if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) {
1136 Results.push_back(Elt: Expanded);
1137 return;
1138 }
1139 break;
1140 case ISD::VP_CTPOP:
1141 if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) {
1142 Results.push_back(Elt: Expanded);
1143 return;
1144 }
1145 break;
1146 case ISD::CTLZ:
1147 case ISD::CTLZ_ZERO_UNDEF:
1148 if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) {
1149 Results.push_back(Elt: Expanded);
1150 return;
1151 }
1152 break;
1153 case ISD::VP_CTLZ:
1154 case ISD::VP_CTLZ_ZERO_UNDEF:
1155 if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) {
1156 Results.push_back(Elt: Expanded);
1157 return;
1158 }
1159 break;
1160 case ISD::CTTZ:
1161 case ISD::CTTZ_ZERO_UNDEF:
1162 if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) {
1163 Results.push_back(Elt: Expanded);
1164 return;
1165 }
1166 break;
1167 case ISD::VP_CTTZ:
1168 case ISD::VP_CTTZ_ZERO_UNDEF:
1169 if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) {
1170 Results.push_back(Elt: Expanded);
1171 return;
1172 }
1173 break;
1174 case ISD::FSHL:
1175 case ISD::VP_FSHL:
1176 case ISD::FSHR:
1177 case ISD::VP_FSHR:
1178 if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) {
1179 Results.push_back(Elt: Expanded);
1180 return;
1181 }
1182 break;
1183 case ISD::CLMUL:
1184 case ISD::CLMULR:
1185 case ISD::CLMULH:
1186 if (SDValue Expanded = TLI.expandCLMUL(N: Node, DAG)) {
1187 Results.push_back(Elt: Expanded);
1188 return;
1189 }
1190 break;
1191 case ISD::ROTL:
1192 case ISD::ROTR:
1193 if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) {
1194 Results.push_back(Elt: Expanded);
1195 return;
1196 }
1197 break;
1198 case ISD::FMINNUM:
1199 case ISD::FMAXNUM:
1200 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) {
1201 Results.push_back(Elt: Expanded);
1202 return;
1203 }
1204 break;
1205 case ISD::FMINIMUM:
1206 case ISD::FMAXIMUM:
1207 Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG));
1208 return;
1209 case ISD::FMINIMUMNUM:
1210 case ISD::FMAXIMUMNUM:
1211 Results.push_back(Elt: TLI.expandFMINIMUMNUM_FMAXIMUMNUM(N: Node, DAG));
1212 return;
1213 case ISD::SMIN:
1214 case ISD::SMAX:
1215 case ISD::UMIN:
1216 case ISD::UMAX:
1217 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1218 Results.push_back(Elt: Expanded);
1219 return;
1220 }
1221 break;
1222 case ISD::UADDO:
1223 case ISD::USUBO:
1224 ExpandUADDSUBO(Node, Results);
1225 return;
1226 case ISD::SADDO:
1227 case ISD::SSUBO:
1228 ExpandSADDSUBO(Node, Results);
1229 return;
1230 case ISD::UMULO:
1231 case ISD::SMULO:
1232 ExpandMULO(Node, Results);
1233 return;
1234 case ISD::USUBSAT:
1235 case ISD::SSUBSAT:
1236 case ISD::UADDSAT:
1237 case ISD::SADDSAT:
1238 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1239 Results.push_back(Elt: Expanded);
1240 return;
1241 }
1242 break;
1243 case ISD::USHLSAT:
1244 case ISD::SSHLSAT:
1245 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1246 Results.push_back(Elt: Expanded);
1247 return;
1248 }
1249 break;
1250 case ISD::FP_TO_SINT_SAT:
1251 case ISD::FP_TO_UINT_SAT:
1252 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1253 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1254 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) {
1255 Results.push_back(Elt: Expanded);
1256 return;
1257 }
1258 }
1259 break;
1260 case ISD::SMULFIX:
1261 case ISD::UMULFIX:
1262 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1263 Results.push_back(Elt: Expanded);
1264 return;
1265 }
1266 break;
1267 case ISD::SMULFIXSAT:
1268 case ISD::UMULFIXSAT:
1269 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1270 // why. Maybe it results in worse codegen compared to the unroll for some
1271 // targets? This should probably be investigated. And if we still prefer to
1272 // unroll an explanation could be helpful.
1273 break;
1274 case ISD::SDIVFIX:
1275 case ISD::UDIVFIX:
1276 ExpandFixedPointDiv(Node, Results);
1277 return;
1278 case ISD::SDIVFIXSAT:
1279 case ISD::UDIVFIXSAT:
1280 break;
1281#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1282 case ISD::STRICT_##DAGN:
1283#include "llvm/IR/ConstrainedOps.def"
1284 ExpandStrictFPOp(Node, Results);
1285 return;
1286 case ISD::VECREDUCE_ADD:
1287 case ISD::VECREDUCE_MUL:
1288 case ISD::VECREDUCE_AND:
1289 case ISD::VECREDUCE_OR:
1290 case ISD::VECREDUCE_XOR:
1291 case ISD::VECREDUCE_SMAX:
1292 case ISD::VECREDUCE_SMIN:
1293 case ISD::VECREDUCE_UMAX:
1294 case ISD::VECREDUCE_UMIN:
1295 case ISD::VECREDUCE_FADD:
1296 case ISD::VECREDUCE_FMUL:
1297 case ISD::VECREDUCE_FMAX:
1298 case ISD::VECREDUCE_FMIN:
1299 case ISD::VECREDUCE_FMAXIMUM:
1300 case ISD::VECREDUCE_FMINIMUM:
1301 Results.push_back(Elt: TLI.expandVecReduce(Node, DAG));
1302 return;
1303 case ISD::PARTIAL_REDUCE_UMLA:
1304 case ISD::PARTIAL_REDUCE_SMLA:
1305 case ISD::PARTIAL_REDUCE_SUMLA:
1306 case ISD::PARTIAL_REDUCE_FMLA:
1307 Results.push_back(Elt: TLI.expandPartialReduceMLA(Node, DAG));
1308 return;
1309 case ISD::VECREDUCE_SEQ_FADD:
1310 case ISD::VECREDUCE_SEQ_FMUL:
1311 Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG));
1312 return;
1313 case ISD::SREM:
1314 case ISD::UREM:
1315 ExpandREM(Node, Results);
1316 return;
1317 case ISD::VP_MERGE:
1318 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1319 Results.push_back(Elt: Expanded);
1320 return;
1321 }
1322 break;
1323 case ISD::FREM: {
1324 RTLIB::Libcall LC = RTLIB::getREM(VT: Node->getValueType(ResNo: 0));
1325 if (tryExpandVecMathCall(Node, LC, Results))
1326 return;
1327
1328 break;
1329 }
1330 case ISD::FSINCOS:
1331 case ISD::FSINCOSPI: {
1332 EVT VT = Node->getValueType(ResNo: 0);
1333 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1334 ? RTLIB::getSINCOS(RetVT: VT)
1335 : RTLIB::getSINCOSPI(RetVT: VT);
1336 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1337 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
1338 return;
1339
1340 // TODO: Try to see if there's a narrower call available to use before
1341 // scalarizing.
1342 break;
1343 }
1344 case ISD::FPOW: {
1345 RTLIB::Libcall LC = RTLIB::getPOW(RetVT: Node->getValueType(ResNo: 0));
1346 if (tryExpandVecMathCall(Node, LC, Results))
1347 return;
1348
1349 // TODO: Try to see if there's a narrower call available to use before
1350 // scalarizing.
1351 break;
1352 }
1353 case ISD::FCBRT: {
1354 RTLIB::Libcall LC = RTLIB::getCBRT(RetVT: Node->getValueType(ResNo: 0));
1355 if (tryExpandVecMathCall(Node, LC, Results))
1356 return;
1357
1358 // TODO: Try to see if there's a narrower call available to use before
1359 // scalarizing.
1360 break;
1361 }
1362 case ISD::FMODF: {
1363 EVT VT = Node->getValueType(ResNo: 0);
1364 RTLIB::Libcall LC = RTLIB::getMODF(VT);
1365 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1366 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
1367 /*CallRetResNo=*/0))
1368 return;
1369 break;
1370 }
1371 case ISD::VECTOR_COMPRESS:
1372 Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG));
1373 return;
1374 case ISD::CTTZ_ELTS:
1375 case ISD::CTTZ_ELTS_ZERO_POISON:
1376 Results.push_back(Elt: TLI.expandCttzElts(Node, DAG));
1377 return;
1378 case ISD::VECTOR_FIND_LAST_ACTIVE:
1379 Results.push_back(Elt: TLI.expandVectorFindLastActive(N: Node, DAG));
1380 return;
1381 case ISD::SCMP:
1382 case ISD::UCMP:
1383 Results.push_back(Elt: TLI.expandCMP(Node, DAG));
1384 return;
1385 case ISD::LOOP_DEPENDENCE_WAR_MASK:
1386 case ISD::LOOP_DEPENDENCE_RAW_MASK:
1387 Results.push_back(Elt: ExpandLOOP_DEPENDENCE_MASK(N: Node));
1388 return;
1389
1390 case ISD::FADD:
1391 case ISD::FMUL:
1392 case ISD::FMA:
1393 case ISD::FDIV:
1394 case ISD::FCEIL:
1395 case ISD::FFLOOR:
1396 case ISD::FNEARBYINT:
1397 case ISD::FRINT:
1398 case ISD::FROUND:
1399 case ISD::FROUNDEVEN:
1400 case ISD::FTRUNC:
1401 case ISD::FSQRT:
1402 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1403 Results.push_back(Elt: Expanded);
1404 return;
1405 }
1406 break;
1407 }
1408
1409 SDValue Unrolled = DAG.UnrollVectorOp(N: Node);
1410 if (Node->getNumValues() == 1) {
1411 Results.push_back(Elt: Unrolled);
1412 } else {
1413 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1414 "VectorLegalizer Expand returned wrong number of results!");
1415 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1416 Results.push_back(Elt: Unrolled.getValue(R: I));
1417 }
1418}
1419
1420SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1421 // Lower a select instruction where the condition is a scalar and the
1422 // operands are vectors. Lower this select to VSELECT and implement it
1423 // using XOR AND OR. The selector bit is broadcasted.
1424 EVT VT = Node->getValueType(ResNo: 0);
1425 SDLoc DL(Node);
1426
1427 SDValue Mask = Node->getOperand(Num: 0);
1428 SDValue Op1 = Node->getOperand(Num: 1);
1429 SDValue Op2 = Node->getOperand(Num: 2);
1430
1431 assert(VT.isVector() && !Mask.getValueType().isVector()
1432 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1433
1434 // If we can't even use the basic vector operations of
1435 // AND,OR,XOR, we will have to scalarize the op.
1436 // Notice that the operation may be 'promoted' which means that it is
1437 // 'bitcasted' to another type which is handled.
1438 // Also, we need to be able to construct a splat vector using either
1439 // BUILD_VECTOR or SPLAT_VECTOR.
1440 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1441 // BUILD_VECTOR?
1442 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1443 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1444 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand ||
1445 TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1446 : ISD::SPLAT_VECTOR,
1447 VT) == TargetLowering::Expand)
1448 return SDValue();
1449
1450 // Generate a mask operand.
1451 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1452
1453 // What is the size of each element in the vector mask.
1454 EVT BitTy = MaskTy.getScalarType();
1455
1456 Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy),
1457 RHS: DAG.getConstant(Val: 0, DL, VT: BitTy));
1458
1459 // Broadcast the mask so that the entire vector is all one or all zero.
1460 Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask);
1461
1462 // Bitcast the operands to be the same type as the mask.
1463 // This is needed when we select between FP types because
1464 // the mask is a vector of integers.
1465 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1);
1466 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2);
1467
1468 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy);
1469
1470 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask);
1471 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask);
1472 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2);
1473 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1474}
1475
1476SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1477 EVT VT = Node->getValueType(ResNo: 0);
1478
1479 // Make sure that the SRA and SHL instructions are available.
1480 if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand ||
1481 TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand)
1482 return SDValue();
1483
1484 SDLoc DL(Node);
1485 EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
1486
1487 unsigned BW = VT.getScalarSizeInBits();
1488 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1489 SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT);
1490
1491 SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz);
1492 return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz);
1493}
1494
1495// Generically expand a vector anyext in register to a shuffle of the relevant
1496// lanes into the appropriate locations, with other lanes left undef.
1497SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1498 SDLoc DL(Node);
1499 EVT VT = Node->getValueType(ResNo: 0);
1500 int NumElements = VT.getVectorNumElements();
1501 SDValue Src = Node->getOperand(Num: 0);
1502 EVT SrcVT = Src.getValueType();
1503 int NumSrcElements = SrcVT.getVectorNumElements();
1504
1505 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1506 // into a larger vector type.
1507 if (SrcVT.bitsLE(VT)) {
1508 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1509 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1510 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1511 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1512 NumElements: NumSrcElements);
1513 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1514 }
1515
1516 // Build a base mask of undef shuffles.
1517 SmallVector<int, 16> ShuffleMask;
1518 ShuffleMask.resize(N: NumSrcElements, NV: -1);
1519
1520 // Place the extended lanes into the correct locations.
1521 int ExtLaneScale = NumSrcElements / NumElements;
1522 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1523 for (int i = 0; i < NumElements; ++i)
1524 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1525
1526 return DAG.getNode(
1527 Opcode: ISD::BITCAST, DL, VT,
1528 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getPOISON(VT: SrcVT), Mask: ShuffleMask));
1529}
1530
1531SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1532 SDLoc DL(Node);
1533 EVT VT = Node->getValueType(ResNo: 0);
1534 SDValue Src = Node->getOperand(Num: 0);
1535 EVT SrcVT = Src.getValueType();
1536
1537 // First build an any-extend node which can be legalized above when we
1538 // recurse through it.
1539 SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src);
1540
1541 // Now we need sign extend. Do this by shifting the elements. Even if these
1542 // aren't legal operations, they have a better chance of being legalized
1543 // without full scalarization than the sign extension does.
1544 unsigned EltWidth = VT.getScalarSizeInBits();
1545 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1546 SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT);
1547 return DAG.getNode(Opcode: ISD::SRA, DL, VT,
1548 N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount),
1549 N2: ShiftAmount);
1550}
1551
1552// Generically expand a vector zext in register to a shuffle of the relevant
1553// lanes into the appropriate locations, a blend of zero into the high bits,
1554// and a bitcast to the wider element type.
1555SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1556 SDLoc DL(Node);
1557 EVT VT = Node->getValueType(ResNo: 0);
1558 int NumElements = VT.getVectorNumElements();
1559 SDValue Src = Node->getOperand(Num: 0);
1560 EVT SrcVT = Src.getValueType();
1561 int NumSrcElements = SrcVT.getVectorNumElements();
1562
1563 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1564 // into a larger vector type.
1565 if (SrcVT.bitsLE(VT)) {
1566 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1567 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1568 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1569 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1570 NumElements: NumSrcElements);
1571 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1572 }
1573
1574 // Build up a zero vector to blend into this one.
1575 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
1576
1577 // Shuffle the incoming lanes into the correct position, and pull all other
1578 // lanes from the zero vector.
1579 auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements));
1580
1581 int ExtLaneScale = NumSrcElements / NumElements;
1582 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1583 for (int i = 0; i < NumElements; ++i)
1584 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1585
1586 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
1587 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask));
1588}
1589
1590static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1591 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1592 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1593 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1594 ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J);
1595}
1596
1597SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1598 EVT VT = Node->getValueType(ResNo: 0);
1599
1600 // Scalable vectors can't use shuffle expansion.
1601 if (VT.isScalableVector())
1602 return TLI.expandBSWAP(N: Node, DAG);
1603
1604 // Generate a byte wise shuffle mask for the BSWAP.
1605 SmallVector<int, 16> ShuffleMask;
1606 createBSWAPShuffleMask(VT, ShuffleMask);
1607 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size());
1608
1609 // Only emit a shuffle if the mask is legal.
1610 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1611 SDLoc DL(Node);
1612 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1613 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getPOISON(VT: ByteVT),
1614 Mask: ShuffleMask);
1615 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1616 }
1617
1618 // If we have the appropriate vector bit operations, it is better to use them
1619 // than unrolling and expanding each component.
1620 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1621 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1622 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1623 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1624 return TLI.expandBSWAP(N: Node, DAG);
1625
1626 // Otherwise let the caller unroll.
1627 return SDValue();
1628}
1629
1630SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1631 EVT VT = Node->getValueType(ResNo: 0);
1632
1633 // We can't unroll or use shuffles for scalable vectors.
1634 if (VT.isScalableVector())
1635 return TLI.expandBITREVERSE(N: Node, DAG);
1636
1637 // If we have the scalar operation, it's probably cheaper to unroll it.
1638 if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType()))
1639 return SDValue();
1640
1641 // If the vector element width is a whole number of bytes, test if its legal
1642 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1643 // vector. This greatly reduces the number of bit shifts necessary.
1644 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1645 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1646 SmallVector<int, 16> BSWAPMask;
1647 createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask);
1648
1649 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size());
1650 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1651 (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) ||
1652 (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) &&
1653 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) &&
1654 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) &&
1655 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) {
1656 SDLoc DL(Node);
1657 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1658 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getPOISON(VT: ByteVT),
1659 Mask: BSWAPMask);
1660 Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op);
1661 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1662 return Op;
1663 }
1664 }
1665
1666 // If we have the appropriate vector bit operations, it is better to use them
1667 // than unrolling and expanding each component.
1668 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1669 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1670 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1671 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1672 return TLI.expandBITREVERSE(N: Node, DAG);
1673
1674 // Otherwise unroll.
1675 return SDValue();
1676}
1677
1678SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1679 // Implement VSELECT in terms of XOR, AND, OR
1680 // on platforms which do not support blend natively.
1681 SDLoc DL(Node);
1682
1683 SDValue Mask = Node->getOperand(Num: 0);
1684 SDValue Op1 = Node->getOperand(Num: 1);
1685 SDValue Op2 = Node->getOperand(Num: 2);
1686
1687 EVT VT = Mask.getValueType();
1688
1689 // If we can't even use the basic vector operations of
1690 // AND,OR,XOR, we will have to scalarize the op.
1691 // Notice that the operation may be 'promoted' which means that it is
1692 // 'bitcasted' to another type which is handled.
1693 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1694 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1695 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand)
1696 return SDValue();
1697
1698 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1699 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1700 // vector constant to mask with.
1701 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1702 auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType());
1703 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1704 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1705 Op1.getValueType().getVectorElementType() == MVT::i1))
1706 return SDValue();
1707
1708 // If the mask and the type are different sizes, unroll the vector op. This
1709 // can occur when getSetCCResultType returns something that is different in
1710 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1711 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1712 return SDValue();
1713
1714 // Bitcast the operands to be the same type as the mask.
1715 // This is needed when we select between FP types because
1716 // the mask is a vector of integers.
1717 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1);
1718 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2);
1719
1720 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT);
1721
1722 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask);
1723 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask);
1724 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2);
1725 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1726}
1727
1728SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1729 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1730 // do not support it natively.
1731 SDLoc DL(Node);
1732
1733 SDValue Mask = Node->getOperand(Num: 0);
1734 SDValue Op1 = Node->getOperand(Num: 1);
1735 SDValue Op2 = Node->getOperand(Num: 2);
1736 SDValue EVL = Node->getOperand(Num: 3);
1737
1738 EVT VT = Mask.getValueType();
1739
1740 // If we can't even use the basic vector operations of
1741 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1742 if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand ||
1743 TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand ||
1744 TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand)
1745 return SDValue();
1746
1747 // This operation also isn't safe when the operands aren't also booleans.
1748 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1749 return SDValue();
1750
1751 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1752 SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL);
1753
1754 Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL);
1755 Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL);
1756 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL);
1757}
1758
1759SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1760 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1761 // indices less than the EVL/pivot are true. Combine that with the original
1762 // mask for a full-length mask. Use a full-length VSELECT to select between
1763 // the true and false values.
1764 SDLoc DL(Node);
1765
1766 SDValue Mask = Node->getOperand(Num: 0);
1767 SDValue Op1 = Node->getOperand(Num: 1);
1768 SDValue Op2 = Node->getOperand(Num: 2);
1769 SDValue EVL = Node->getOperand(Num: 3);
1770
1771 EVT MaskVT = Mask.getValueType();
1772 bool IsFixedLen = MaskVT.isFixedLengthVector();
1773
1774 EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(),
1775 EC: MaskVT.getVectorElementCount());
1776
1777 // If we can't construct the EVL mask efficiently, it's better to unroll.
1778 if ((IsFixedLen &&
1779 !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) ||
1780 (!IsFixedLen &&
1781 (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) ||
1782 !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT))))
1783 return SDValue();
1784
1785 // If using a SETCC would result in a different type than the mask type,
1786 // unroll.
1787 if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
1788 VT: EVLVecVT) != MaskVT)
1789 return SDValue();
1790
1791 SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT);
1792 SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL);
1793 SDValue EVLMask =
1794 DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT);
1795
1796 SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask);
1797 return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2);
1798}
1799
1800SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1801 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1802 EVT VT = Node->getValueType(ResNo: 0);
1803
1804 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1805
1806 if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) ||
1807 !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) ||
1808 !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT))
1809 return SDValue();
1810
1811 SDLoc DL(Node);
1812
1813 SDValue Dividend = Node->getOperand(Num: 0);
1814 SDValue Divisor = Node->getOperand(Num: 1);
1815 SDValue Mask = Node->getOperand(Num: 2);
1816 SDValue EVL = Node->getOperand(Num: 3);
1817
1818 // X % Y -> X-X/Y*Y
1819 SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL);
1820 SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL);
1821 return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL);
1822}
1823
1824SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1825 EVT VT = Node->getValueType(ResNo: 0);
1826 EVT IntVT = VT.changeVectorElementTypeToInteger();
1827
1828 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1829 return SDValue();
1830
1831 SDValue Mask = Node->getOperand(Num: 1);
1832 SDValue EVL = Node->getOperand(Num: 2);
1833
1834 SDLoc DL(Node);
1835 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1836 SDValue SignMask = DAG.getConstant(
1837 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1838 SDValue Xor = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: IntVT, N1: Cast, N2: SignMask, N3: Mask, N4: EVL);
1839 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
1840}
1841
1842SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1843 EVT VT = Node->getValueType(ResNo: 0);
1844 EVT IntVT = VT.changeVectorElementTypeToInteger();
1845
1846 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT))
1847 return SDValue();
1848
1849 SDValue Mask = Node->getOperand(Num: 1);
1850 SDValue EVL = Node->getOperand(Num: 2);
1851
1852 SDLoc DL(Node);
1853 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1854 SDValue ClearSignMask = DAG.getConstant(
1855 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1856 SDValue ClearSign =
1857 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask, N3: Mask, N4: EVL);
1858 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearSign);
1859}
1860
1861SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1862 EVT VT = Node->getValueType(ResNo: 0);
1863
1864 if (VT != Node->getOperand(Num: 1).getValueType())
1865 return SDValue();
1866
1867 EVT IntVT = VT.changeVectorElementTypeToInteger();
1868 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT) ||
1869 !TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1870 return SDValue();
1871
1872 SDValue Mask = Node->getOperand(Num: 2);
1873 SDValue EVL = Node->getOperand(Num: 3);
1874
1875 SDLoc DL(Node);
1876 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1877 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
1878
1879 SDValue SignMask = DAG.getConstant(
1880 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1881 SDValue SignBit =
1882 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Sign, N2: SignMask, N3: Mask, N4: EVL);
1883
1884 SDValue ClearSignMask = DAG.getConstant(
1885 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1886 SDValue ClearedSign =
1887 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask, N3: Mask, N4: EVL);
1888
1889 SDValue CopiedSign = DAG.getNode(Opcode: ISD::VP_OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
1890 N3: Mask, N4: EVL, Flags: SDNodeFlags::Disjoint);
1891
1892 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
1893}
1894
1895SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
1896 SDLoc DL(N);
1897 EVT VT = N->getValueType(ResNo: 0);
1898 SDValue SourceValue = N->getOperand(Num: 0);
1899 SDValue SinkValue = N->getOperand(Num: 1);
1900 SDValue EltSizeInBytes = N->getOperand(Num: 2);
1901
1902 // Note: The lane offset is scalable if the mask is scalable.
1903 ElementCount LaneOffsetEC =
1904 ElementCount::get(MinVal: N->getConstantOperandVal(Num: 3), Scalable: VT.isScalableVT());
1905
1906 EVT PtrVT = SourceValue->getValueType(ResNo: 0);
1907 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
1908
1909 // Take the difference between the pointers and divided by the element size,
1910 // to see how many lanes separate them.
1911 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: SinkValue, N2: SourceValue);
1912 if (IsReadAfterWrite)
1913 Diff = DAG.getNode(Opcode: ISD::ABS, DL, VT: PtrVT, Operand: Diff);
1914 Diff = DAG.getNode(Opcode: ISD::SDIV, DL, VT: PtrVT, N1: Diff, N2: EltSizeInBytes);
1915
1916 // The pointers do not alias if:
1917 // * Diff <= 0 (WAR_MASK)
1918 // * Diff == 0 (RAW_MASK)
1919 EVT CmpVT =
1920 TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: PtrVT);
1921 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: PtrVT);
1922 SDValue Cmp = DAG.getSetCC(DL, VT: CmpVT, LHS: Diff, RHS: Zero,
1923 Cond: IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
1924
1925 // The pointers do not alias if:
1926 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
1927 SDValue LaneOffset = DAG.getElementCount(DL, VT: PtrVT, EC: LaneOffsetEC);
1928 SDValue MaskN =
1929 DAG.getSelect(DL, VT: PtrVT, Cond: Cmp, LHS: DAG.getConstant(Val: -1, DL, VT: PtrVT), RHS: Diff);
1930
1931 return DAG.getNode(Opcode: ISD::GET_ACTIVE_LANE_MASK, DL, VT, N1: LaneOffset, N2: MaskN);
1932}
1933
1934void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1935 SmallVectorImpl<SDValue> &Results) {
1936 // Attempt to expand using TargetLowering.
1937 SDValue Result, Chain;
1938 if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) {
1939 Results.push_back(Elt: Result);
1940 if (Node->isStrictFPOpcode())
1941 Results.push_back(Elt: Chain);
1942 return;
1943 }
1944
1945 // Otherwise go ahead and unroll.
1946 if (Node->isStrictFPOpcode()) {
1947 UnrollStrictFPOp(Node, Results);
1948 return;
1949 }
1950
1951 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1952}
1953
1954void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1955 SmallVectorImpl<SDValue> &Results) {
1956 bool IsStrict = Node->isStrictFPOpcode();
1957 unsigned OpNo = IsStrict ? 1 : 0;
1958 SDValue Src = Node->getOperand(Num: OpNo);
1959 EVT SrcVT = Src.getValueType();
1960 EVT DstVT = Node->getValueType(ResNo: 0);
1961 SDLoc DL(Node);
1962
1963 // Attempt to expand using TargetLowering.
1964 SDValue Result;
1965 SDValue Chain;
1966 if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) {
1967 Results.push_back(Elt: Result);
1968 if (IsStrict)
1969 Results.push_back(Elt: Chain);
1970 return;
1971 }
1972
1973 // Make sure that the SINT_TO_FP and SRL instructions are available.
1974 if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT: SrcVT) ==
1975 TargetLowering::Expand) ||
1976 (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: SrcVT) ==
1977 TargetLowering::Expand)) ||
1978 TLI.getOperationAction(Op: ISD::SRL, VT: SrcVT) == TargetLowering::Expand) {
1979 if (IsStrict) {
1980 UnrollStrictFPOp(Node, Results);
1981 return;
1982 }
1983
1984 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1985 return;
1986 }
1987
1988 unsigned BW = SrcVT.getScalarSizeInBits();
1989 assert((BW == 64 || BW == 32) &&
1990 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1991
1992 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1993 // UINT_TO_FP with a larger float and round to the smaller type
1994 if ((!IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT: DstVT)) ||
1995 (IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FMUL, VT: DstVT))) {
1996 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1997 SDValue UIToFP;
1998 SDValue Result;
1999 SDValue TargetZero = DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true);
2000 EVT FloatVecVT = SrcVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: FPVT);
2001 if (IsStrict) {
2002 UIToFP = DAG.getNode(Opcode: ISD::STRICT_UINT_TO_FP, DL, ResultTys: {FloatVecVT, MVT::Other},
2003 Ops: {Node->getOperand(Num: 0), Src});
2004 Result = DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {DstVT, MVT::Other},
2005 Ops: {Node->getOperand(Num: 0), UIToFP, TargetZero});
2006 Results.push_back(Elt: Result);
2007 Results.push_back(Elt: Result.getValue(R: 1));
2008 } else {
2009 UIToFP = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVecVT, Operand: Src);
2010 Result = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: DstVT, N1: UIToFP, N2: TargetZero);
2011 Results.push_back(Elt: Result);
2012 }
2013
2014 return;
2015 }
2016
2017 SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT: SrcVT);
2018
2019 // Constants to clear the upper part of the word.
2020 // Notice that we can also use SHL+SHR, but using a constant is slightly
2021 // faster on x86.
2022 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
2023 SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT: SrcVT);
2024
2025 // Two to the power of half-word-size.
2026 SDValue TWOHW = DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: DstVT);
2027
2028 // Clear upper part of LO, lower HI
2029 SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT: SrcVT, N1: Src, N2: HalfWord);
2030 SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src, N2: HalfWordMask);
2031
2032 if (IsStrict) {
2033 // Convert hi and lo to floats
2034 // Convert the hi part back to the upper values
2035 // TODO: Can any fast-math-flags be set on these nodes?
2036 SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
2037 Ops: {Node->getOperand(Num: 0), HI});
2038 fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {DstVT, MVT::Other},
2039 Ops: {fHI.getValue(R: 1), fHI, TWOHW});
2040 SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
2041 Ops: {Node->getOperand(Num: 0), LO});
2042
2043 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1),
2044 N2: fLO.getValue(R: 1));
2045
2046 // Add the two halves
2047 SDValue Result =
2048 DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {DstVT, MVT::Other}, Ops: {TF, fHI, fLO});
2049
2050 Results.push_back(Elt: Result);
2051 Results.push_back(Elt: Result.getValue(R: 1));
2052 return;
2053 }
2054
2055 // Convert hi and lo to floats
2056 // Convert the hi part back to the upper values
2057 // TODO: Can any fast-math-flags be set on these nodes?
2058 SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: HI);
2059 fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: DstVT, N1: fHI, N2: TWOHW);
2060 SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: LO);
2061
2062 // Add the two halves
2063 Results.push_back(Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: DstVT, N1: fHI, N2: fLO));
2064}
2065
2066SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
2067 EVT VT = Node->getValueType(ResNo: 0);
2068 EVT IntVT = VT.changeVectorElementTypeToInteger();
2069
2070 if (!TLI.isOperationLegalOrCustom(Op: ISD::XOR, VT: IntVT))
2071 return SDValue();
2072
2073 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2074 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2075 !VT.isScalableVector())
2076 return SDValue();
2077
2078 SDLoc DL(Node);
2079 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2080 SDValue SignMask = DAG.getConstant(
2081 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2082 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT: IntVT, N1: Cast, N2: SignMask);
2083 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
2084}
2085
2086SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
2087 EVT VT = Node->getValueType(ResNo: 0);
2088 EVT IntVT = VT.changeVectorElementTypeToInteger();
2089
2090 if (!TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT))
2091 return SDValue();
2092
2093 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2094 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2095 !VT.isScalableVector())
2096 return SDValue();
2097
2098 SDLoc DL(Node);
2099 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2100 SDValue ClearSignMask = DAG.getConstant(
2101 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2102 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask);
2103 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearedSign);
2104}
2105
2106SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
2107 EVT VT = Node->getValueType(ResNo: 0);
2108 EVT IntVT = VT.changeVectorElementTypeToInteger();
2109
2110 if (VT != Node->getOperand(Num: 1).getValueType() ||
2111 !TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT) ||
2112 !TLI.isOperationLegalOrCustom(Op: ISD::OR, VT: IntVT))
2113 return SDValue();
2114
2115 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2116 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2117 !VT.isScalableVector())
2118 return SDValue();
2119
2120 SDLoc DL(Node);
2121 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2122 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
2123
2124 SDValue SignMask = DAG.getConstant(
2125 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2126 SDValue SignBit = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Sign, N2: SignMask);
2127
2128 SDValue ClearSignMask = DAG.getConstant(
2129 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2130 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask);
2131
2132 SDValue CopiedSign = DAG.getNode(Opcode: ISD::OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
2133 Flags: SDNodeFlags::Disjoint);
2134
2135 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
2136}
2137
2138void VectorLegalizer::ExpandFSUB(SDNode *Node,
2139 SmallVectorImpl<SDValue> &Results) {
2140 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2141 // we can defer this to operation legalization where it will be lowered as
2142 // a+(-b).
2143 EVT VT = Node->getValueType(ResNo: 0);
2144 if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) &&
2145 TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT))
2146 return; // Defer to LegalizeDAG
2147
2148 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2149 Results.push_back(Elt: Expanded);
2150 return;
2151 }
2152
2153 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
2154 Results.push_back(Elt: Tmp);
2155}
2156
2157void VectorLegalizer::ExpandSETCC(SDNode *Node,
2158 SmallVectorImpl<SDValue> &Results) {
2159 bool NeedInvert = false;
2160 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2161 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2162 Node->getOpcode() == ISD::STRICT_FSETCCS;
2163 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2164 unsigned Offset = IsStrict ? 1 : 0;
2165
2166 SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue();
2167 SDValue LHS = Node->getOperand(Num: 0 + Offset);
2168 SDValue RHS = Node->getOperand(Num: 1 + Offset);
2169 SDValue CC = Node->getOperand(Num: 2 + Offset);
2170
2171 MVT OpVT = LHS.getSimpleValueType();
2172 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
2173
2174 if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) {
2175 if (IsStrict) {
2176 UnrollStrictFPOp(Node, Results);
2177 return;
2178 }
2179 Results.push_back(Elt: UnrollVSETCC(Node));
2180 return;
2181 }
2182
2183 SDValue Mask, EVL;
2184 if (IsVP) {
2185 Mask = Node->getOperand(Num: 3 + Offset);
2186 EVL = Node->getOperand(Num: 4 + Offset);
2187 }
2188
2189 SDLoc dl(Node);
2190 bool Legalized =
2191 TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask,
2192 EVL, NeedInvert, dl, Chain, IsSignaling);
2193
2194 if (Legalized) {
2195 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2196 // condition code, create a new SETCC node.
2197 if (CC.getNode()) {
2198 if (IsStrict) {
2199 LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(),
2200 Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags());
2201 Chain = LHS.getValue(R: 1);
2202 } else if (IsVP) {
2203 LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0),
2204 Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags());
2205 } else {
2206 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC,
2207 Flags: Node->getFlags());
2208 }
2209 }
2210
2211 // If we expanded the SETCC by inverting the condition code, then wrap
2212 // the existing SETCC in a NOT to restore the intended condition.
2213 if (NeedInvert) {
2214 if (!IsVP)
2215 LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0));
2216 else
2217 LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0));
2218 }
2219 } else {
2220 assert(!IsStrict && "Don't know how to expand for strict nodes.");
2221
2222 // Otherwise, SETCC for the given comparison type must be completely
2223 // illegal; expand it into a SELECT_CC.
2224 EVT VT = Node->getValueType(ResNo: 0);
2225 LHS = DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS,
2226 N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()),
2227 N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()),
2228 N5: CC, Flags: Node->getFlags());
2229 }
2230
2231 Results.push_back(Elt: LHS);
2232 if (IsStrict)
2233 Results.push_back(Elt: Chain);
2234}
2235
2236void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2237 SmallVectorImpl<SDValue> &Results) {
2238 SDValue Result, Overflow;
2239 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2240 Results.push_back(Elt: Result);
2241 Results.push_back(Elt: Overflow);
2242}
2243
2244void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2245 SmallVectorImpl<SDValue> &Results) {
2246 SDValue Result, Overflow;
2247 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2248 Results.push_back(Elt: Result);
2249 Results.push_back(Elt: Overflow);
2250}
2251
2252void VectorLegalizer::ExpandMULO(SDNode *Node,
2253 SmallVectorImpl<SDValue> &Results) {
2254 SDValue Result, Overflow;
2255 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2256 std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node);
2257
2258 Results.push_back(Elt: Result);
2259 Results.push_back(Elt: Overflow);
2260}
2261
2262void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2263 SmallVectorImpl<SDValue> &Results) {
2264 SDNode *N = Node;
2265 if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N),
2266 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG))
2267 Results.push_back(Elt: Expanded);
2268}
2269
2270void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2271 SmallVectorImpl<SDValue> &Results) {
2272 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2273 ExpandUINT_TO_FLOAT(Node, Results);
2274 return;
2275 }
2276 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2277 ExpandFP_TO_UINT(Node, Results);
2278 return;
2279 }
2280
2281 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2282 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2283 ExpandSETCC(Node, Results);
2284 return;
2285 }
2286
2287 UnrollStrictFPOp(Node, Results);
2288}
2289
2290void VectorLegalizer::ExpandREM(SDNode *Node,
2291 SmallVectorImpl<SDValue> &Results) {
2292 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2293 "Expected REM node");
2294
2295 SDValue Result;
2296 if (!TLI.expandREM(Node, Result, DAG))
2297 Result = DAG.UnrollVectorOp(N: Node);
2298 Results.push_back(Elt: Result);
2299}
2300
2301// Try to expand libm nodes into vector math routine calls. Callers provide the
2302// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2303// within TargetLibraryInfo. The only mappings considered are those where the
2304// result and all operands are the same vector type. While predicated nodes are
2305// not supported, we will emit calls to masked routines by passing in an all
2306// true mask.
2307bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2308 SmallVectorImpl<SDValue> &Results) {
2309 // Chain must be propagated but currently strict fp operations are down
2310 // converted to their none strict counterpart.
2311 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2312
2313 RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(Call: LC);
2314 if (LCImpl == RTLIB::Unsupported)
2315 return false;
2316
2317 EVT VT = Node->getValueType(ResNo: 0);
2318 const RTLIB::RuntimeLibcallsInfo &RTLCI = TLI.getRuntimeLibcallsInfo();
2319 LLVMContext &Ctx = *DAG.getContext();
2320
2321 auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(
2322 Ctx, TT: DAG.getSubtarget().getTargetTriple(), DL: DAG.getDataLayout(), LibcallImpl: LCImpl);
2323
2324 SDLoc DL(Node);
2325 TargetLowering::ArgListTy Args;
2326
2327 bool HasMaskArg = RTLCI.hasVectorMaskArgument(Impl: LCImpl);
2328
2329 // Sanity check just in case function has unexpected parameters.
2330 assert(FuncTy->getNumParams() == Node->getNumOperands() + HasMaskArg &&
2331 EVT::getEVT(FuncTy->getReturnType(), true) == VT &&
2332 "mismatch in value type and call signature type");
2333
2334 for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) {
2335 Type *ParamTy = FuncTy->getParamType(i: I);
2336
2337 if (HasMaskArg && I == E - 1) {
2338 assert(cast<VectorType>(ParamTy)->getElementType()->isIntegerTy(1) &&
2339 "unexpected vector mask type");
2340 EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
2341 Args.emplace_back(args: DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT),
2342 args: MaskVT.getTypeForEVT(Context&: Ctx));
2343
2344 } else {
2345 SDValue Op = Node->getOperand(Num: I);
2346 assert(Op.getValueType() == EVT::getEVT(ParamTy, true) &&
2347 "mismatch in value type and call argument type");
2348 Args.emplace_back(args&: Op, args&: ParamTy);
2349 }
2350 }
2351
2352 // Emit a call to the vector function.
2353 SDValue Callee =
2354 DAG.getExternalSymbol(LCImpl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2355 CallingConv::ID CC = RTLCI.getLibcallImplCallingConv(Call: LCImpl);
2356
2357 TargetLowering::CallLoweringInfo CLI(DAG);
2358 CLI.setDebugLoc(DL)
2359 .setChain(DAG.getEntryNode())
2360 .setLibCallee(CC, ResultType: FuncTy->getReturnType(), Target: Callee, ArgsList: std::move(Args));
2361
2362 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2363 Results.push_back(Elt: CallResult.first);
2364 return true;
2365}
2366
2367void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2368 SmallVectorImpl<SDValue> &Results) {
2369 EVT VT = Node->getValueType(ResNo: 0);
2370 EVT EltVT = VT.getVectorElementType();
2371 unsigned NumElems = VT.getVectorNumElements();
2372 unsigned NumOpers = Node->getNumOperands();
2373 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2374
2375 EVT TmpEltVT = EltVT;
2376 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2377 Node->getOpcode() == ISD::STRICT_FSETCCS)
2378 TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2379 Context&: *DAG.getContext(), VT: TmpEltVT);
2380
2381 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2382 SDValue Chain = Node->getOperand(Num: 0);
2383 SDLoc dl(Node);
2384
2385 SmallVector<SDValue, 32> OpValues;
2386 SmallVector<SDValue, 32> OpChains;
2387 for (unsigned i = 0; i < NumElems; ++i) {
2388 SmallVector<SDValue, 4> Opers;
2389 SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl);
2390
2391 // The Chain is the first operand.
2392 Opers.push_back(Elt: Chain);
2393
2394 // Now process the remaining operands.
2395 for (unsigned j = 1; j < NumOpers; ++j) {
2396 SDValue Oper = Node->getOperand(Num: j);
2397 EVT OperVT = Oper.getValueType();
2398
2399 if (OperVT.isVector())
2400 Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl,
2401 VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx);
2402
2403 Opers.push_back(Elt: Oper);
2404 }
2405
2406 SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers);
2407 SDValue ScalarResult = ScalarOp.getValue(R: 0);
2408 SDValue ScalarChain = ScalarOp.getValue(R: 1);
2409
2410 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2411 Node->getOpcode() == ISD::STRICT_FSETCCS)
2412 ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult,
2413 LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT),
2414 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2415
2416 OpValues.push_back(Elt: ScalarResult);
2417 OpChains.push_back(Elt: ScalarChain);
2418 }
2419
2420 SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues);
2421 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains);
2422
2423 Results.push_back(Elt: Result);
2424 Results.push_back(Elt: NewChain);
2425}
2426
2427SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2428 EVT VT = Node->getValueType(ResNo: 0);
2429 unsigned NumElems = VT.getVectorNumElements();
2430 EVT EltVT = VT.getVectorElementType();
2431 SDValue LHS = Node->getOperand(Num: 0);
2432 SDValue RHS = Node->getOperand(Num: 1);
2433 SDValue CC = Node->getOperand(Num: 2);
2434 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2435 SDLoc dl(Node);
2436 SmallVector<SDValue, 8> Ops(NumElems);
2437 for (unsigned i = 0; i < NumElems; ++i) {
2438 SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS,
2439 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2440 SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS,
2441 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2442 // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2443 Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl,
2444 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2445 Context&: *DAG.getContext(), VT: TmpEltVT),
2446 N1: LHSElem, N2: RHSElem, N3: CC);
2447 Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i],
2448 LHS: DAG.getBoolConstant(V: true, DL: dl, VT: EltVT, OpVT: VT),
2449 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2450 }
2451 return DAG.getBuildVector(VT, DL: dl, Ops);
2452}
2453
2454bool SelectionDAG::LegalizeVectors() {
2455 return VectorLegalizer(*this).Run();
2456}
2457